# Overview
Let's create an overview of the models we got so far. For each model, we'll apply 5-fold cross validation to get an average accuracy and standard deviation. We'll also test some different feature combinations.

In [20]:
from sklearn import metrics
from sklearn.model_selection import GroupKFold
from util import get_preprocessed_dataset, get_winners_only, fit_predict_print_wp, get_label_columns, get_embed_column_names
import pandas as pd
import numpy as np



# Create a new datframe with columns model_name, manual_mean, manual_std, extra_mean, extra_std, embed_mean, embed_std, all_mean, all_std
feature_groups = [
    ('manual', get_label_columns()),
    ('extra', get_label_columns() + ['NumWordsDiff', 'AvgWordLengthDiff', 'MaxWordLengthDiff']),
    ('embed', get_embed_column_names()),
    ('all', get_label_columns() + ['NumWordsDiff', 'AvgWordLengthDiff', 'MaxWordLengthDiff'] + get_embed_column_names())
]

column_names = []
for group in feature_groups:
    column_names.append(group[0] + '_mean')
    column_names.append(group[0] + '_std')

evaluation_df = pd.DataFrame(columns=['model_name'] + column_names)

def evaluate_model(model_, model_name, fit_predict_wp_kwargs=None):
    if fit_predict_wp_kwargs is None:
        fit_predict_wp_kwargs = {}

    df = get_preprocessed_dataset()
    x = df.drop(['Winner'], axis=1)
    y = df # Taking winners only after train test split
    groups = df['Test']

    for group_name, group_features in feature_groups:
        scores = []
        for train_index, test_index in GroupKFold(n_splits=10).split(x, y, groups):
            x_train, x_test = x.iloc[train_index], x.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            x_train_features = x_train[group_features]
            y_test_features = y_test[group_features]

            y_goal = get_winners_only(y_test)

            accuracy = fit_predict_print_wp(model_, x_train_features, y_test, x_test, y_goal, silent=True, **fit_predict_wp_kwargs)

            scores.append(accuracy)

        mean = np.mean(scores)
        std = np.std(scores)

        print(f"{model_name}[{group_name}] - \tMean: {mean:.3f}, Std: {std:.3f}, Min {np.min(scores):.3f}, Max {np.max(scores):.3f}")
        evaluation_df[f"{group_name}_mean"] = mean
        evaluation_df[f"{group_name}_std"] = std


## Random
The first classifier we tried, used as a baseline.

In [21]:
from util import get_random_predictor_model
model = get_random_predictor_model()
evaluate_model(model, 'Random')

Random[manual] - 	Mean: 0.419, Std: 0.041, Min 0.319, Max 0.484


KeyError: "['NumWordsDiff', 'AvgWordLengthDiff', 'MaxWordLengthDiff'] not in index"

## Naive Bayes

In [19]:
from util import get_naive_bayes_model_wp
model = get_naive_bayes_model_wp()
evaluate_model(model, 'Naive Bayes')

Naive Bayes[manual] - 	Mean: 0.625, Std: 0.051, Min 0.538, Max 0.703
Naive Bayes[extra] - 	Mean: 0.625, Std: 0.051, Min 0.538, Max 0.703
Naive Bayes[embed] - 	Mean: 0.625, Std: 0.051, Min 0.538, Max 0.703
Naive Bayes[all] - 	Mean: 0.625, Std: 0.051, Min 0.538, Max 0.703
