## Python loading

In [30]:
import sys

import pandas as pd
from pandas import DataFrame, Series

import numpy as np

import matplotlib.pyplot as plt
import matplotlib

#import xgboost as xgb

from sklearn import tree
from sklearn.ensemble import RandomForestClassifier 

%matplotlib inline

In [31]:
train = pd.read_csv("../../data/train.csv", index_col=0)
test = pd.read_csv("../../data/test.csv", index_col=0)

train.head(2)

Unnamed: 0_level_0,Name,DateTime,OutcomeType,OutcomeSubtype,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color
AnimalID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A671945,Hambone,2014-02-12 18:22:00,Return_to_owner,,Dog,Neutered Male,1 year,Shetland Sheepdog Mix,Brown/White
A656520,Emily,2013-10-13 12:44:00,Euthanasia,Suffering,Cat,Spayed Female,1 year,Domestic Shorthair Mix,Cream Tabby


## Data fitting

In [32]:
train.SexuponOutcome.value_counts()

Neutered Male    9779
Spayed Female    8820
Intact Male      3525
Intact Female    3511
Unknown          1093
Name: SexuponOutcome, dtype: int64

In [33]:
train = pd.read_csv("../../data/train.csv", index_col=0)
test = pd.read_csv("../../data/test.csv", index_col=0)

def categorising(dataset):
    opt = dataset.copy()

    opt["Intact"] = opt.SexuponOutcome.replace("Intact Male", "Yes")
    opt.Intact = opt.Intact.replace("Neutered Male", "No")
    opt.Intact = opt.Intact.replace("Spayed Female", "No")
    opt.Intact = opt.Intact.replace("Intact Female", "Yes")
    opt.Intact = opt.Intact.replace("Unknown", np.nan)

    opt.Intact = opt.Intact.astype("category")
    opt["IntactValue"] = opt.Intact.cat.codes

    opt.SexuponOutcome = opt.SexuponOutcome.replace("Neutered Male", "Male")
    opt.SexuponOutcome = opt.SexuponOutcome.replace("Intact Male", "Male")
    opt.SexuponOutcome = opt.SexuponOutcome.replace("Spayed Female", "Female")
    opt.SexuponOutcome = opt.SexuponOutcome.replace("Intact Female", "Female")
    opt.SexuponOutcome = opt.SexuponOutcome.replace("Unknown", np.nan)

    opt.SexuponOutcome = opt.SexuponOutcome.astype("category")
    opt["SexuponOutcomeValue"] = opt.SexuponOutcome.cat.codes
    
    opt.Color = opt.Color.astype("category")
    opt["ColorValue"] = opt.Color.cat.codes

    return opt

train = categorising(train)
test = categorising(test)

opt = train.copy()

opt.Intact.value_counts()

No     18599
Yes     7036
dtype: int64

In [34]:


opt.Color.value_counts()

opt[(opt.Color == "Blue") & (opt.AnimalType == "Dog")].Breed.value_counts()

Pit Bull Mix                     34
Weimaraner Mix                    4
Weimaraner                        3
Labrador Retriever Mix            3
Blue Lacy Mix                     3
Chihuahua Shorthair Mix           2
Pit Bull                          2
Chinese Sharpei                   1
Pit Bull/Weimaraner               1
Great Dane/Pit Bull               1
Australian Kelpie Mix             1
Chinese Sharpei/Great Dane        1
Blue Lacy/Australian Kelpie       1
Pit Bull/Flat Coat Retriever      1
Australian Shepherd/Chow Chow     1
Chow Chow                         1
Cane Corso Mix                    1
Great Dane Mix                    1
Staffordshire Mix                 1
Australian Kelpie/Blue Lacy       1
Standard Schnauzer Mix            1
Name: Breed, dtype: int64

In [35]:
opt.AnimalType = opt.AnimalType.astype("category")
opt.AnimalType.value_counts()

Dog    15595
Cat    11134
dtype: int64

In [36]:
opt.OutcomeType = opt.OutcomeType.astype("category")
opt.OutcomeType.value_counts()


Adoption           10769
Transfer            9422
Return_to_owner     4786
Euthanasia          1555
Died                 197
dtype: int64

## Prediction

In [46]:
#create and train the random forest
    #multi-core CPUs can use: rf = RandomForestClassifier(n_estimators=100, n_jobs=2)
rf = RandomForestClassifier(n_estimators=10, n_jobs=4)

d_train = opt[["SexuponOutcomeValue", "IntactValue", "ColorValue"]].values

#d_train
d_target = opt.OutcomeType.cat.codes.values

In [47]:
d_train
#d_train.reshape(-1, 1)

array([[  1,   0, 130],
       [  0,   0, 167],
       [  1,   0,  86],
       ..., 
       [  1,   0, 353],
       [  1,   1,   5],
       [  1,   1, 111]], dtype=int16)

In [48]:
rf.fit(d_train, d_target)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=4,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [60]:
def pred(row):   
    pred = rf.predict([[row['SexuponOutcomeValue'], row['IntactValue'], row['ColorValue']]])[0]
    predTitle = opt.OutcomeType.cat.categories[pred]
    return predTitle

#test["OutcomeType"] = test.apply(lambda row: pred(row), axis=1)

myTest = test.head(50).copy()
myTest["OutcomeType"] = myTest.apply(lambda row: pred(row), axis=1)

myTest.head(5)

Unnamed: 0_level_0,Name,DateTime,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color,Intact,IntactValue,SexuponOutcomeValue,ColorValue,OutcomeType
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Summer,2015-10-12 12:15:00,Dog,Female,10 months,Labrador Retriever Mix,Red/White,Yes,1,0,185,Transfer
2,Cheyenne,2014-07-26 17:59:00,Dog,Female,2 years,German Shepherd/Siberian Husky,Black/Tan,No,0,0,28,Return_to_owner
3,Gus,2016-01-13 12:20:00,Cat,Male,1 year,Domestic Shorthair Mix,Brown Tabby,No,0,1,79,Euthanasia
4,Pongo,2013-12-28 18:12:00,Dog,Male,4 months,Collie Smooth Mix,Tricolor,Yes,1,1,229,Return_to_owner
5,Skooter,2015-09-24 17:59:00,Dog,Male,2 years,Miniature Poodle Mix,White,No,0,1,238,Adoption
6,Beau,2015-06-23 11:17:00,Dog,Male,3 years,Beagle Mix,Brown/White,No,0,1,95,Return_to_owner
7,Bobo,2014-03-12 09:45:00,Cat,Male,13 years,Domestic Medium Hair Mix,Brown Tabby/White,No,0,1,84,Adoption
8,Abby,2014-06-25 08:27:00,Cat,Female,6 months,Domestic Shorthair Mix,Brown Tabby,No,0,0,79,Adoption
9,Ruby Grace,2014-11-12 18:05:00,Dog,Female,3 months,Cairn Terrier,Black/Cream,No,0,0,23,Return_to_owner
10,Ruby,2014-04-07 17:41:00,Dog,Female,1 year,Pit Bull Mix,Brown/White,No,0,0,95,Adoption
