# Part 3: Model Selection and Evaluation

#### Libraries

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sqlite3

#Preprocessing and Feature Selection
from sklearn import preprocessing
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

#Classificatoin Models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

#YellowBrick Visualizers
from yellowbrick.classifier import ConfusionMatrix
from yellowbrick.classifier import ClassificationReport
from yellowbrick.model_selection import FeatureImportances
from sklearn.metrics import accuracy_score

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Modeling

#### Select Features and  Target

In [None]:
feature_cols = [
    'day', 'tod_num','temperature', 'uv_index','weekday', 'bg_cat'
]

In [None]:
features = scaler_df[feature_cols]
target = df_upsampled['crime_rate_cat']

#### Split-Test-Train

In [None]:
classes=list(target.unique())

#### Split Test and Training Datasets

Standardize AFTER split, otherwise, you are leaking information.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
scaler = StandardScaler()

X_train = scaler.fit(X_train).transform(X_train)
X_test = scaler.fit(X_test).transform(X_test)

#### Model

- K-Nearest Neighbors Classifier
- Gradient Boosting Classifier
- Decision Tree Classifier
- Random Forest Classifier
- Extra Tree Classifier
- Bagging Classifier with Decision Tree Classifier Estimator (Default value)
- Bagging Classifier with Random Forest Classifier Estimator

In [None]:
models_list = [
    ['KNN',KNeighborsClassifier()],
    ['GradientBoosting',GradientBoostingClassifier()],
    ['DecisionTreeClassifier', DecisionTreeClassifier()],
    ['RandomForestClassifier',RandomForestClassifier()],
    ['ExtraTreesClassifier',ExtraTreesClassifier()],
    ['BC_Decision',BaggingClassifier(base_estimator=DecisionTreeClassifier())],
    ['BC_RMF',BaggingClassifier(base_estimator=RandomForestClassifier())]
]

In [None]:
models_dict = {}

for name, model in models_list:
    try:
        fit = model.fit(X_train, y_train)

        models_dict[name] = fit
        
        print("{} fitted".format(name))

    except Exception as e:
        print("{} failed; {}.".format(name, e))

        pass

In [None]:
for name, model in models_dict.items():
    print(name, " ", accuracy_score(y_test, model.predict(X_test)))

#### Classification Reports

In [None]:
for name, model in models_dict.items():
    cr = ClassificationReport(model, classes=None)
    cr.score(X_test, y_test)
    cr.poof()

#### Confusion Matrix

In [None]:
for name, model in models_dict.items():
    cm = ConfusionMatrix(model, classes=classes)
    cm.score(X_test, y_test)
    cm.poof()