In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
%matplotlib inline

In [2]:
train_data = pd.read_csv('Dataset/train.csv', index_col='pet_id')
test_data = pd.read_csv('Dataset/test.csv', index_col='pet_id')

In [15]:
train_data.head()

Unnamed: 0_level_0,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category,color_type_encoded,issue_year,issue_month,issue_day,listing_year,listing_month,listing_day,total_days,length(cm),rec_area
pet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
ANSL_69903,2016-07-10,2016-09-21 16:25:00,2.0,Brown Tabby,0.8,7.78,13,9,0.0,1,18,2016,7,10,2016,9,21,73,80.0,622.4
ANSL_66892,2013-11-21,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2,53,2013,11,21,2018,12,27,1862,72.0,1021.68
ANSL_69750,2014-09-28,2016-10-19 08:24:00,,Brown,0.15,40.9,15,4,2.0,4,15,2014,9,28,2016,10,19,752,15.0,613.5
ANSL_71623,2016-12-31,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2,53,2016,12,31,2019,1,25,755,62.0,1104.84
ANSL_57969,2017-09-28,2017-11-19 09:38:00,2.0,Black,0.5,11.06,18,4,0.0,1,2,2017,9,28,2017,11,19,52,50.0,553.0


In [4]:
test_data.head()

Unnamed: 0_level_0,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2
pet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ANSL_75005,2005-08-17 00:00:00,2017-09-07 15:35:00,0.0,Black,0.87,42.73,0,7
ANSL_76663,2018-11-15 00:00:00,2019-05-08 17:24:00,1.0,Orange Tabby,0.06,6.71,0,1
ANSL_58259,2012-10-11 00:00:00,2018-04-02 16:51:00,1.0,Black,0.24,41.21,0,7
ANSL_67171,2015-02-13 00:00:00,2018-04-06 07:25:00,1.0,Black,0.29,8.46,7,1
ANSL_72871,2017-01-18 00:00:00,2018-04-26 13:42:00,1.0,Brown,0.71,30.92,0,7


In [5]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

train_data['color_type_encoded'] = encoder.fit_transform(train_data.color_type)
test_data['color_type_encoded'] = encoder.transform(test_data.color_type)

In [6]:
np.random.seed(42)

In [None]:
train_data['lbyh'] = train_data['length(cm)']/train_data['height(cm)']

In [7]:
train_data['issue_date'] = pd.to_datetime(train_data.issue_date)
train_data['listing_date'] = pd.to_datetime(train_data.listing_date)
                                            
test_data['issue_date'] = pd.to_datetime(test_data.issue_date)
test_data['listing_date'] = pd.to_datetime(test_data.listing_date)

In [8]:
train_data['issue_year'] = train_data.issue_date.dt.year
train_data['issue_month'] = train_data.issue_date.dt.month
train_data['issue_day'] = train_data.issue_date.dt.day

In [9]:
train_data['listing_year'] = train_data.listing_date.dt.year
train_data['listing_month'] = train_data.listing_date.dt.month
train_data['listing_day'] = train_data.listing_date.dt.day

In [10]:
test_data['issue_year'] = test_data.issue_date.dt.year
test_data['issue_month'] = test_data.issue_date.dt.month
test_data['issue_day'] = test_data.issue_date.dt.day

In [11]:
test_data['listing_year'] = test_data.listing_date.dt.year
test_data['listing_month'] = test_data.listing_date.dt.month
test_data['listing_day'] = test_data.listing_date.dt.day

In [12]:
train_data['total_days'] = np.array(train_data.listing_date-train_data.issue_date).astype('timedelta64[D]').astype('int64')
test_data['total_days'] = np.array(test_data.listing_date-test_data.issue_date).astype('timedelta64[D]').astype('int64')

In [13]:
train_data['length(cm)'] = train_data['length(m)']*100
test_data['length(cm)'] = test_data['length(m)']*100

In [14]:
train_data['rec_area'] = train_data['length(cm)'] * train_data['height(cm)']
test_data['rec_area'] = test_data['length(cm)'] * test_data['height(cm)']

In [None]:
train_data.head()

In [None]:
test_data.head()

In [None]:
features = ['condition', 'length(cm)', 'height(cm)','rec_area', 'X1', 'X2', 'color_type_encoded',
           'issue_year', 'issue_month', 'issue_day', 'listing_year', 'listing_month', 'listing_day', 'total_days']
X = np.array(train_data[features])
y_pet = np.array(train_data['pet_category'])
y_breed = np.array(train_data['breed_category'])

In [None]:
from sklearn.model_selection import train_test_split

X_pet_train, X_pet_valid, y_pet_train, y_pet_valid = train_test_split(X, y_pet, test_size=0.20)

In [None]:
from  sklearn.tree import DecisionTreeClassifier

pet_classifier = DecisionTreeClassifier(random_state=13)

In [None]:
pet_classifier.fit(X_pet_train, y_pet_train)

In [None]:
pet_classifier.score(X_pet_valid, y_pet_valid)

In [None]:
pet_classifier.fit(X, y_pet)

In [None]:
predicted_pets = pet_classifier.predict(X)

In [None]:
X_new=train_data[features]
X_new['predicted_pets'] = predicted_pets

In [None]:
features.append('predicted_pets')

In [None]:
X = np.array(X_new)

In [None]:
X

In [None]:
X_breed_train, X_breed_valid, y_breed_train, y_breed_valid = train_test_split(X, y_breed, test_size=0.20)

In [None]:
breed_classifier = DecisionTreeClassifier(random_state=13)

In [None]:
breed_classifier.fit(X_breed_train,y_breed_train)

In [None]:
breed_classifier.score(X_breed_valid, y_breed_valid)

In [None]:
breed_classifier.fit(X, y_breed)

In [None]:
predicted_breeds = breed_classifier.predict(X) 

In [None]:
new_features = ['condition', 'length(cm)', 'height(cm)','rec_area', 'X1', 'X2', 'color_type_encoded',
           'issue_year', 'issue_month', 'issue_day', 'listing_year', 'listing_month', 'listing_day', 'total_days']

In [None]:
X=train_data[new_features]
X['predicted_breeds'] =predicted_breeds
new_features.append('predicted_breeds')

In [None]:
new_pet_classifier = DecisionTreeClassifier(random_state=13)

In [None]:
X_pet_train, X_pet_valid, y_pet_train, y_pet_valid = train_test_split(X, y_pet, test_size=0.20)

In [None]:
new_pet_classifier.fit(X_pet_train, y_pet_train)

In [None]:
new_pet_classifier.score(X_pet_valid, y_pet_valid)

In [None]:
new_pet_classifier.fit(X, y_pet)

In [None]:
features = ['condition', 'length(cm)', 'height(cm)','rec_area', 'X1', 'X2', 'color_type_encoded',
           'issue_year', 'issue_month', 'issue_day', 'listing_year', 'listing_month', 'listing_day', 'total_days']

In [None]:
X_test = np.array(test_data[features])


In [None]:
pets = pet_classifier.predict(X_test)

In [None]:
X_new=test_data[features]
X_new['predicted_pets'] = pets

In [None]:
features.append('predicted_pets')

In [None]:
breeds = breed_classifier.predict(X_new)

In [None]:
new_features = ['condition', 'length(cm)', 'height(cm)','rec_area', 'X1', 'X2', 'color_type_encoded',
           'issue_year', 'issue_month', 'issue_day', 'listing_year', 'listing_month', 'listing_day', 'total_days']


In [None]:
X=test_data[new_features]
X['predicted_breeds'] =breeds
new_features.append('predicted_breeds')

In [None]:
new_pets = new_pet_classifier.predict(X)

In [None]:
submission = pd.DataFrame({'pet_id':test_data.index, 'breed_category':breeds, 'pet_category':new_pets})
submission = submission.set_index('pet_id', drop=True)

In [None]:
submission.to_csv("submission_3.csv")

In [None]:
submission1 = pd.read_csv("submission_1.csv", index_col='pet_id')

In [None]:
submission1.head()

In [None]:
features_1 = ['condition', 'length(cm)', 'height(cm)','rec_area', 'X1', 'X2', 'color_type_encoded', 'total_days']
X = np.array(train_data[features_1])

In [None]:
X_test = np.array(test_data[features_1])

In [None]:
X_pet_train, X_pet_valid, y_pet_train, y_pet_valid = train_test_split(X, y_pet, test_size=0.20)
X_breed_train, X_breed_valid, y_breed_train, y_breed_valid = train_test_split(X, y_breed, test_size=0.20)

In [None]:
pet_classifier.fit(X_pet_train, y_pet_train)

In [None]:
pet_classifier.score(X_pet_valid, y_pet_valid)

In [None]:
breed_classifier.fit(X_breed_train,y_breed_train)

In [None]:
breed_classifier.score(X_breed_valid, y_breed_valid)

In [None]:
pet_classifier.fit(X, y_pet)

In [None]:
breed_classifier.fit(X,y_breed)

In [None]:
breeds = breed_classifier.predict(X_test)

In [None]:
pets = pet_classifier.predict(X_test)

In [None]:
submission = pd.DataFrame({'pet_id':test_data.index, 'breed_category':breeds, 'pet_category':pets})
submission = submission.set_index('pet_id', drop=True)

In [None]:
submission.to_csv("submission_2.csv")

In [None]:
features = ['condition', 'length(cm)', 'height(cm)','rec_area', 'X1', 'X2', 'color_type_encoded',
           'issue_year', 'issue_month', 'issue_day', 'listing_year', 'listing_month', 'listing_day', 'total_days']
X_pet = train_data[features]
X_breed = train_data[features]

y_pet = train_data['pet_category']
y_breed = train_data['breed_category']

In [None]:
pets_score = []
breeds_score = []
breeds_classifier=DecisionTreeClassifier(random_state=13)
X_breed_train, X_breed_valid, y_breed_train, y_breed_valid = train_test_split(X_breed, y_breed, test_size=0.20)
breed_classifier.fit(X_breed_train,y_breed_train)
breeds_score.append(breed_classifier.score(X_breed_valid, y_breed_valid))
breeds_classifier.fit(X_breed, y_breed)
X_pet['predicted_breed'] = breeds_classifier.predict(X_breed)


pets_classifier=DecisionTreeClassifier(random_state=13)
X_pet_train, X_pet_valid, y_pet_train, y_pet_valid = train_test_split(X_pet, y_pet, test_size=0.20)
pets_classifier.fit(X_pet_train, y_pet_train)
pets_score.append(pets_classifier.score(X_pet_valid, y_pet_valid))
pets_classifier.fit(X_pet, y_pet)
X_breed['predicted_pet'] = pets_classifier.predict(X_pet)
    


for i in range(500):
    
    
    X_breed_train, X_breed_valid, y_breed_train, y_breed_valid = train_test_split(X_breed, y_breed, test_size=0.20)
    breed_classifier.fit(X_breed_train,y_breed_train)
    breeds_score.append(breed_classifier.score(X_breed_valid, y_breed_valid))
    breeds_classifier.fit(X_breed, y_breed)
    X_pet['predicted_breed'] = breeds_classifier.predict(X_breed)
    
    X_pet_train, X_pet_valid, y_pet_train, y_pet_valid = train_test_split(X_pet, y_pet, test_size=0.20)
    pets_classifier.fit(X_pet_train, y_pet_train)
    pets_score.append(pets_classifier.score(X_pet_valid, y_pet_valid))
    pets_classifier.fit(X_pet, y_pet)
    X_breed['predicted_pet'] = pets_classifier.predict(X_pet)
    


In [None]:
plt.figure(figsize=(20,8))
plt.plot(pets_score)

In [None]:
plt.figure(figsize=(20,8))
plt.plot(breeds_score)

In [None]:
pets_score.index(max(pets_score))

In [None]:
breeds_score.index(max(breeds_score))

In [None]:
pets_score

In [None]:
breeds_score

In [None]:
train_data.X1.unique()

In [None]:
train_data.X2.unique()