# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Feature Selection

In [2]:
target = ['at_risk']
features = ['age', 'breed_1', 'condition', 'primary_color']
# filters = ['intake_type', 'condition', 'outcome']

# Import Data

In [3]:
data = pd.read_csv('./datasets/working_data/model_data_full.csv', usecols=features + target) # + filters)

In [4]:
data.head(2)

Unnamed: 0,condition,age,primary_color,breed_1,at_risk
0,normal,senior,black,dachshund,1
1,aged,senior,tan,chihuahua,0


# Filters

In [5]:
data.rename(columns={
    'condition': 'cond',
    'breed_1': 'breed',
    'at_risk': 'target',
    'primary_color': 'color'
}, inplace=True)

In [6]:
data.head(2)

Unnamed: 0,cond,age,color,breed,target
0,normal,senior,black,dachshund,1
1,aged,senior,tan,chihuahua,0


In [7]:
data.isna().sum()

cond      0
age       0
color     0
breed     0
target    0
dtype: int64

In [9]:
data['cond'] = np.where(data['cond'] == 'normal', 1, 0)

# Export Data

In [16]:
data.to_csv('./datasets/cleaned_data/model_data.csv', index=False)

# Model

In [17]:
y = data['target']
X = data.drop('target', axis=1)

In [18]:
ohe = OneHotEncoder(handle_unknown='ignore').fit(np.array(X[['age', 'breed', 'cond']]))
X = ohe.transform(np.array(X[['age', 'breed', 'cond']]))

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123, stratify=y)

In [20]:
rfc = RandomForestClassifier(n_estimators=300, max_depth=200, random_state=123, min_samples_split=10)
rfc.fit(X_train, y_train)

RandomForestClassifier(max_depth=200, min_samples_split=10, n_estimators=300,
                       random_state=123)

# Score

In [21]:
print(rfc.score(X_train, y_train))
print(rfc.score(X_test, y_test))

0.6537533049775837
0.6320689655172413


Save the model and encoder for the web app.

In [22]:
with open("./models/model_rfc.pkl", "wb") as file:
    pickle.dump(rfc, file)
with open("./models/encode_data.pkl", "wb") as file:
    pickle.dump(ohe, file)

[Run Next](https://github.com/gwoodstock/project4/blob/main/9_models_GPU.ipynb): XGB & Neural Net.