**Modeling Animal Shelters Outcomes (Model Tweaks)**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from keras.models import load_model
from sklearn.metrics import precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay, classification_report, accuracy_score, f1_score
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import uniform


Import data and drop outcomes columns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/MyDrive/animal_shelter_outcomes/datasets/main.csv')

In [None]:
df.head()

Unnamed: 0,animal_id_in,animal_type,color,breed,intake_type,outcome_type,intake_condition,outcome_subtype,datetime_in,datetime_out,year_in,month_in,day_in,year_out,month_out,day_out,prev_adoption,prev_transfer,prev_ret_to_owner,prev_rto_adopt,prev_disposal,prev_missing,prev_relocate,age_upon_outcome,age_upon_intake,sex_upon_intake,sex_upon_outcome,is_named_in,is_named_out,found_location,name_in,name_out
0,A006100,Dog,yellow white,spinone italiano mix,Public Assist,Return to Owner,Normal,Unknown,2014-12-19 10:21:00,2014-12-20 16:35:00,2014,12,Friday,2014,12,Saturday,0,0,1,0,0,0,0,7.0,7.0,Neutered Male,Neutered Male,1,1,8700 Research Blvd in Austin (TX),Scamp,Scamp
1,A006100,Dog,yellow white,spinone italiano mix,Public Assist,Return to Owner,Normal,Unknown,2014-03-07 14:26:00,2014-03-08 17:10:00,2014,3,Friday,2014,3,Saturday,0,0,0,0,0,0,0,6.0,6.0,Neutered Male,Neutered Male,1,1,8700 Research in Austin (TX),Scamp,Scamp
2,A047759,Dog,tricolor,dachshund,Owner Surrender,Transfer,Normal,Partner,2014-04-02 15:55:00,2014-04-07 15:12:00,2014,4,Wednesday,2014,4,Monday,0,0,0,0,0,0,0,1.0,1.0,Neutered Male,Neutered Male,1,1,Austin (TX),Oreo,Oreo
3,A134067,Dog,brown white,sheepdog shetland,Public Assist,Return to Owner,Injured,Unknown,2013-11-16 09:02:00,2013-11-16 11:54:00,2013,11,Saturday,2013,11,Saturday,0,0,0,0,0,0,0,1.0,1.0,Neutered Male,Neutered Male,1,1,12034 Research Blvd in Austin (TX),Bandit,Bandit
4,A141142,Dog,white black,retriever labrador bull pit,Stray,Return to Owner,Aged,Unknown,2013-11-16 14:46:00,2013-11-17 11:40:00,2013,11,Saturday,2013,11,Sunday,0,0,0,0,0,0,0,1.0,1.0,Spayed Female,Spayed Female,1,1,Austin (TX),Bettie,Bettie


In [None]:
df = df.drop(columns=['animal_id_in', 'outcome_subtype', 'year_out', 'month_out', 'day_out', 'datetime_out',
                                 'name_out', 'is_named_out','sex_upon_outcome', 'age_upon_outcome', 'name_in',
                                 'found_location', 'datetime_in'])

In [None]:
df.head()

Unnamed: 0,animal_type,color,breed,intake_type,outcome_type,intake_condition,year_in,month_in,day_in,prev_adoption,prev_transfer,prev_ret_to_owner,prev_rto_adopt,prev_disposal,prev_missing,prev_relocate,age_upon_intake,sex_upon_intake,is_named_in
0,Dog,yellow white,spinone italiano mix,Public Assist,Return to Owner,Normal,2014,12,Friday,0,0,1,0,0,0,0,7.0,Neutered Male,1
1,Dog,yellow white,spinone italiano mix,Public Assist,Return to Owner,Normal,2014,3,Friday,0,0,0,0,0,0,0,6.0,Neutered Male,1
2,Dog,tricolor,dachshund,Owner Surrender,Transfer,Normal,2014,4,Wednesday,0,0,0,0,0,0,0,1.0,Neutered Male,1
3,Dog,brown white,sheepdog shetland,Public Assist,Return to Owner,Injured,2013,11,Saturday,0,0,0,0,0,0,0,1.0,Neutered Male,1
4,Dog,white black,retriever labrador bull pit,Stray,Return to Owner,Aged,2013,11,Saturday,0,0,0,0,0,0,0,1.0,Spayed Female,1


In [None]:
df.shape

(127144, 19)

Clean up outcome type column to see if it makes a difference in modeling

In [None]:
df['outcome_type'].value_counts()

Adoption           57116
Transfer           37056
Return to Owner    21827
Euthanasia          8581
Died                1134
Rto-Adopt            758
Disposal             584
Missing               66
Relocate              22
Name: outcome_type, dtype: int64

In [None]:
df['outcome_type'] = df['outcome_type'].map({ 
    'Adoption' : 'Adoption',
    'Transfer' : 'Transfer',
    'Return to Owner' : 'Return to Owner',
    'Euthanasia': 'Died',
    'Died' : 'Died',
    'Disposal': 'Died',
    'Relocate': 'Transfer',
    'Rto-Adopt': 'Return to Owner',
    'Missing' : 'Missing'
})

In [None]:
df['outcome_type'].value_counts()

Adoption           57116
Transfer           37078
Return to Owner    22585
Died               10299
Missing               66
Name: outcome_type, dtype: int64

In [None]:
df = pd.get_dummies(df, columns=['animal_type', 'color', 'breed', 'intake_type', 'intake_condition', 
                                'year_in', 'month_in', 'day_in', 'sex_upon_intake'] , drop_first=True)

In [None]:
df.head()

Unnamed: 0,outcome_type,prev_adoption,prev_transfer,prev_ret_to_owner,prev_rto_adopt,prev_disposal,prev_missing,prev_relocate,age_upon_intake,is_named_in,animal_type_Cat,animal_type_Dog,animal_type_Livestock,animal_type_Other,color_agouti cream,color_apricot,color_black,color_black buff,color_black sable,color_black silver,color_black smoke,color_blue,color_blue black,color_blue brindle black,color_blue calico,color_blue calico tabby,color_blue chocolate tick,color_blue cream,color_blue cream buff,color_blue cream calico,color_blue cream tabby,color_blue cream tiger,color_blue fawn,color_blue gray,color_blue gray merle,color_blue gray smoke,color_blue liver,color_blue merle,color_blue merle black,color_blue merle cream,...,intake_type_Wildlife,intake_condition_Behavior,intake_condition_Feral,intake_condition_Injured,intake_condition_Medical,intake_condition_Normal,intake_condition_Nursing,intake_condition_Other,intake_condition_Pregnant,intake_condition_Sick,intake_condition_Space,year_in_2014,year_in_2015,year_in_2016,year_in_2017,year_in_2018,year_in_2019,year_in_2020,year_in_2021,month_in_2,month_in_3,month_in_4,month_in_5,month_in_6,month_in_7,month_in_8,month_in_9,month_in_10,month_in_11,month_in_12,day_in_Monday,day_in_Saturday,day_in_Sunday,day_in_Thursday,day_in_Tuesday,day_in_Wednesday,sex_upon_intake_Intact Male,sex_upon_intake_Neutered Male,sex_upon_intake_Spayed Female,sex_upon_intake_Unknown
0,Return to Owner,0,0,1,0,0,0,0,7.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
1,Return to Owner,0,0,0,0,0,0,0,6.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,Transfer,0,0,0,0,0,0,0,1.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
3,Return to Owner,0,0,0,0,0,0,0,1.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0
4,Return to Owner,0,0,0,0,0,0,0,1.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0


Set training test groups

In [None]:
X = df.drop(columns=['outcome_type'])

y = df['outcome_type']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y )

In [None]:
ss = StandardScaler()

X_train_sc = ss.fit_transform(X_train)

X_test_sc = ss.transform(X_test)

Try extra trees on cleaned up versions

In [None]:
model = ExtraTreesClassifier( )

In [None]:
model.fit(X_train_sc, y_train)



ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100,
                     n_jobs=None, oob_score=False, random_state=None, verbose=0,
                     warm_start=False)

In [None]:
predictions = model.predict(X_test_sc)

In [None]:
print(f'training score: {model.score(X_train_sc, y_train)}')
print(f'testing score: {model.score(X_test_sc, y_test)}')

training score: 0.9933408838272615
testing score: 0.6746051720883408


Try with random forest

In [None]:
rf = RandomForestClassifier()

In [None]:
rf.fit(X_train_sc, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
preds = rf.predict(X_test_sc)

In [None]:
print(f'training score: {rf.score(X_train_sc, y_train)}')
print(f'testing score: {rf.score(X_test_sc, y_test)}')

training score: 0.9935401329725875
testing score: 0.6898949222928333


Try with binary outcome type

In [None]:
df['outcome_type'].value_counts()

Adoption           57116
Transfer           37078
Return to Owner    22585
Died               10299
Missing               66
Name: outcome_type, dtype: int64

In [None]:
df['outcome_type'] = df['outcome_type'].map({
    'Adoption': 'Adopted',
    'Transfer': 'Not Adopted',
    'Return to Owner': 'Not Adopted',
    'Died' : 'Not Adopted',
    'Missing' : 'Not Adopted'
})

In [None]:
df['outcome_type'].value_counts()

Not Adopted    70028
Adopted        57116
Name: outcome_type, dtype: int64

In [None]:
X = df.drop(columns=['outcome_type'])

y = df['outcome_type']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y )

In [None]:
ss = StandardScaler()

X_train_sc = ss.fit_transform(X_train)

X_test_sc = ss.transform(X_test)

In [None]:
rf = RandomForestClassifier()

In [None]:
rf.fit(X_train_sc, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
preds = rf.predict(X_test_sc)

In [None]:
print(f'training score: {rf.score(X_train_sc, y_train)}')
print(f'testing score: {rf.score(X_test_sc, y_test)}')

training score: 0.9946307598733195
testing score: 0.7487887749323602
