# Import Libraries

In [29]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Feature Selection

In [30]:
target = ['time_in_shelter']
features = ['age', 'breed_1', 'condition', 'primary_color']
filters = ['intake_type', 'condition', 'outcome']

# Import Data

In [31]:
data = pd.read_csv('./datasets/cleaned_data/shelters_with_stats.csv', usecols=features + target + filters)

# Filters

In [32]:
data.rename(columns={
    'condition': 'cond',
    'breed_1': 'breed',
    'time_in_shelter': 'time',
    'primary_color': 'color'
}, inplace=True)

In [33]:
data['time'] = pd.to_timedelta(data['time']).dt.days

In [34]:
data = data[
        (data['time'] < 200) & (data['time'] >=7)                                       &
        (data['intake_type'] != 'euth_request')                                         &
        (~data['outcome'].isin(['return_owner', 'missing', 'disposal', 'transfer']))    &
        (data['cond'] != 'neonatal')
]

In [35]:
data.loc[data['cond'] == 'pregnant', 'cond'] = 'nursing'
data.loc[data['cond'] == 'medical', 'cond'] = 'injured'
data.loc[data['cond'] == 'sick', 'cond'] = 'injured'

data['target'] = np.where(data['time'] > 20, 1, 0)
data.drop(columns=['intake_type', 'outcome', 'time'], inplace=True)

data.dropna(inplace=True)

# Export Data

In [36]:
data.to_csv('./datasets/cleaned_data/model_data.csv', index=False)

# Model

In [37]:
y = data['target']
X = data.drop('target', axis=1)

In [38]:
X = pd.get_dummies(X[['age', 'breed', 'cond']])

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123, stratify=y)

In [40]:
rfc = RandomForestClassifier(n_estimators=300, max_depth=200, random_state=123, min_samples_split=10)
rfc.fit(X_train, y_train)

RandomForestClassifier(max_depth=200, min_samples_split=10, n_estimators=300,
                       random_state=123)

In [41]:
X.shape

(13120, 132)

# Score

In [42]:
print(rfc.score(X_train, y_train))
print(rfc.score(X_test, y_test))

0.6531504065040651
0.6387195121951219
