# Shelter Animal Outcomes 11 

## Bagging 

In [1]:
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn import cross_validation
import pandas as pd

In [2]:
df_train = pd.read_csv('../Shelter_train.csv')
df_test = pd.read_csv('../Shelter_test.csv')

In [3]:
x = df_train.ix[:, :-1]
y = df_train.ix[:, -1]
df_test = df_test.drop('ID', 1)

### Logistic Regression

In [4]:
clf = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(clf, x, y, scoring="log_loss")

array([-1.04228698, -1.03388775, -1.02656614])

The bagging classifier seems to be doing worse than the individual Logistic Regression model which had a score of
[-0.97484274, -0.96763943, -0.9647996 ]. 

In [5]:
%timeit clf.fit(x, y)

1 loop, best of 3: 840 ms per loop


In [6]:
clf = clf.fit(x, y)
predictions = clf.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()

Unnamed: 0,Adoption,Died,Euthanasia,Return_to_owner,Transfer
1,0.307511,0.007824,0.055138,0.187988,0.441539
2,0.454759,0.004691,0.040908,0.239664,0.259978
3,0.546731,0.006263,0.032967,0.165094,0.248945
4,0.381165,0.006338,0.04548,0.201305,0.365713
5,0.53349,0.003683,0.032833,0.238712,0.191283


In [7]:
output.to_csv('../submission-Bagging-LogisticRegression.1.0.csv', index_label = 'ID')

### SVC

In [8]:
svc = BaggingClassifier(SVC(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(svc, x, y, scoring="log_loss")

array([-8.1003993 , -8.14571116, -7.73783385])

In [9]:
%timeit svc.fit(x, y)

1 loop, best of 3: 26.5 s per loop


In [10]:
clf = svc.fit(x, y)
predictions = clf.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()

Unnamed: 0,Adoption,Died,Euthanasia,Return_to_owner,Transfer
1,0.4,0.0,0.0,0.0,0.6
2,1.0,0.0,0.0,0.0,0.0
3,0.8,0.0,0.0,0.0,0.2
4,0.2,0.0,0.0,0.0,0.8
5,1.0,0.0,0.0,0.0,0.0


In [11]:
output.to_csv('../submission-Bagging-SVC.1.0.csv', index_label = 'ID')

### Decision Tree

In [12]:
decisionTree = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(decisionTree, x, y, scoring="log_loss")

array([-0.94326021, -0.9617784 , -0.93770676])

In [13]:
%timeit decisionTree.fit(x, y)

10 loops, best of 3: 47.9 ms per loop


In [14]:
clf = decisionTree.fit(x, y)
predictions = clf.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()

Unnamed: 0,Adoption,Died,Euthanasia,Return_to_owner,Transfer
1,0.320629,0.00398,0.059626,0.19567,0.420095
2,0.462455,0.002366,0.043648,0.287777,0.203754
3,0.522407,0.004714,0.03761,0.100372,0.334897
4,0.305996,0.005094,0.072303,0.211643,0.404964
5,0.437375,0.002003,0.047601,0.299223,0.213798


In [15]:
output.to_csv('../submission-Bagging-DecisionTree.1.0.csv', index_label = 'ID')

### KNN

In [16]:
knn = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(decisionTree, x, y, scoring="log_loss")

array([-0.94996338, -0.9414747 , -0.96176933])

In [17]:
%timeit knn.fit(x, y)

1 loop, best of 3: 343 ms per loop


In [18]:
knn = decisionTree.fit(x, y)
predictions = knn.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()

Unnamed: 0,Adoption,Died,Euthanasia,Return_to_owner,Transfer
1,0.290128,0.004905,0.081111,0.198852,0.425003
2,0.485573,0.001971,0.040703,0.276404,0.195349
3,0.520471,0.005207,0.038347,0.112737,0.323239
4,0.271061,0.00627,0.091708,0.241826,0.389136
5,0.453361,0.002222,0.043926,0.297309,0.203182


In [19]:
output.to_csv('../submission-Bagging-KNN.1.0.csv', index_label = 'ID')

### Naive Bayes

In [20]:
gaussianNB = BaggingClassifier(GaussianNB(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(decisionTree, x, y, scoring="log_loss")

array([-0.94672725, -0.94512211, -0.9854242 ])

In [21]:
%timeit gaussianNB.fit(x, y)

10 loops, best of 3: 54.2 ms per loop


In [22]:
gaussianNB = decisionTree.fit(x, y)
predictions = gaussianNB.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()

Unnamed: 0,Adoption,Died,Euthanasia,Return_to_owner,Transfer
1,0.281955,0.006378,0.065493,0.188658,0.457517
2,0.481469,0.001358,0.037355,0.276645,0.203173
3,0.563319,0.002952,0.029153,0.156778,0.247799
4,0.303255,0.003835,0.083779,0.209596,0.399535
5,0.444225,0.001426,0.040759,0.298665,0.214924


In [23]:
output.to_csv('../submission-Bagging-Naive-Bayes.1.0.csv', index_label = 'ID')