In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import missingno as msno
import seaborn as sns
import pandas_profiling

In [2]:
train_df = pd.read_csv('Dataset/train.csv')
soln_df = pd.read_csv('Dataset/test.csv')

In [3]:
train_df.drop(['pet_id'], axis = 1, inplace = True)
main_test = soln_df.drop(['pet_id'], axis = 1)

In [4]:
def pre_process(df):
    df.condition.fillna(3, inplace=True)
    df['time_to_shelter'] = (pd.to_datetime(df.listing_date) - pd.to_datetime(df.issue_date)).dt.days
    df.drop(['issue_date','listing_date'], axis = 1, inplace=True)
    
    top_colors = [
                     'Black',
                     'White',
                     'Brown',
                     'Brown Tabby',
                     'Tan',
                     'Blue',
                     'Orange Tabby',
                     'Red',
                     'Brown Brindle',
                     'Tricolor',
                     'Blue Tabby',
                     'Tortie',
                     'Calico',
                     'Gray',
                     'Chocolate',
                     'Torbie',
                     'Cream Tabby',
                     'Sable',
                     'Cream',
                     'Fawn',
                     'Yellow',
                     'Buff',
                     'Lynx Point',
                     'Blue Merle'
                ]
    
    def reduce_color(color):
        if color in top_colors:
            return color
        return 'Other'
    
    df['color_mod'] = df.color_type.apply(reduce_color)
    df.drop(['color_type'], axis = 1, inplace=True)
    df.drop(['length(m)','height(cm)'], axis = 1, inplace= True)
    
    return df

In [5]:
train_df = pre_process(train_df)
main_test = pre_process(main_test)

In [6]:
train_y1 = train_df.loc[:,['breed_category']]
train_y2 = train_df.loc[:,['pet_category']]

In [7]:
train_X = train_df.drop(['breed_category','pet_category'], axis=1)

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [9]:
ct = ColumnTransformer([("color", OneHotEncoder(sparse=False, drop='first'), [4])], remainder = 'passthrough')
ct.fit(train_X)

ColumnTransformer(n_jobs=None, remainder='passthrough', sparse_threshold=0.3,
                  transformer_weights=None,
                  transformers=[('color',
                                 OneHotEncoder(categories='auto', drop='first',
                                               dtype=<class 'numpy.float64'>,
                                               handle_unknown='error',
                                               sparse=False),
                                 [4])],
                  verbose=False)

In [10]:
train_X = ct.transform(train_X)
main_test = ct.transform(main_test)

In [11]:
print(train_X.shape)
print(main_test.shape)

(18834, 28)
(8072, 28)


In [12]:
from sklearn.model_selection import train_test_split

X_train_breed, X_test_breed, y_train_breed, y_test_breed = train_test_split(train_X, train_y1, test_size = 0.2)
X_train_pet, X_test_pet, y_train_pet, y_test_pet = train_test_split(train_X, train_y2, test_size = 0.2)

In [15]:
from imblearn.over_sampling import RandomOverSampler

os = RandomOverSampler()

X_train_breed, y_train_breed = os.fit_resample(X_train_breed, y_train_breed)
X_train_pet, y_train_pet = os.fit_resample(X_train_pet, y_train_pet)

Using TensorFlow backend.


In [17]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from keras.layers import Dropout
from sklearn.metrics import confusion_matrix 

In [18]:
X_train_breed.shape

(21480, 28)

In [19]:
y_train_breed.shape

(21480, 1)

In [20]:
def breed_model():

    model = Sequential()
    model.add(Dense(20, input_dim=28, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(15,  activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(5,  activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(3, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [21]:
def pet_model():

    model = Sequential()
    model.add(Dense(20, input_dim=28, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(15,  activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(5,  activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(4, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [22]:
estimator = KerasClassifier(build_fn=breed_model, epochs=100, batch_size=5, verbose=1)
estimator.fit(X_train_breed,pd.get_dummies(pd.DataFrame(y_train_breed).astype(str)).values)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.callbacks.History at 0x7fc0e4395110>

In [23]:
estimator_pet = KerasClassifier(build_fn=pet_model, epochs=100, batch_size=5, verbose=1)
estimator_pet.fit(X_train_pet,pd.get_dummies(pd.DataFrame(y_train_pet).astype(str)).values)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.callbacks.History at 0x7fc0e4b51f50>

In [24]:
breed_preds = estimator.predict(X_test_breed)



In [25]:
pet_preds = estimator_pet.predict(X_test_pet)



In [28]:
confusion_matrix(y_test_breed, breed_preds)

array([[1839,    0,    1],
       [ 379, 1276,    2],
       [   0,    1,  269]])

In [29]:
confusion_matrix(y_test_pet, pet_preds)

array([[   7,    0,    2,    4,    0],
       [  54,  962,  379,    4,    0],
       [ 182,  132, 1844,    2,    0],
       [   0,    0,    0,    0,    0],
       [  69,    4,   26,   96,    0]])

In [33]:
breed_preds = estimator.predict(main_test)
pet_preds = estimator_pet.predict(main_test)



In [34]:
submission = pd.DataFrame()

In [35]:
submission['pet_id'] = soln_df.pet_id

In [36]:
submission['breed_category'] = breed_preds

In [37]:
submission['pet_category'] = pet_preds

In [38]:
submission.breed_category = submission.breed_category.astype('int64')

In [39]:
submission

Unnamed: 0,pet_id,breed_category,pet_category
0,ANSL_75005,1,2
1,ANSL_76663,0,1
2,ANSL_58259,0,2
3,ANSL_67171,0,2
4,ANSL_72871,0,2
...,...,...,...
8067,ANSL_66809,0,0
8068,ANSL_59041,1,2
8069,ANSL_60034,1,2
8070,ANSL_58066,2,0


In [40]:
submission.to_csv('submission_3.csv',index=False)