# Classification model(s):
## preprocess data and train model(s)

#### Import necessary modules

In [1]:
import os
import numpy as np
import pandas as pd

from sklearn import metrics

from preprocessing import partition_dataset
#from api_requests import get_rank, get_matches 
#from composite_stats import composite_player_stats
#from combine_and_expand_match_data import combine_match_dfs
#from graphs import rank_order

### Make folder paths for data (and potentially a new subfolder)

In [2]:
def make_folder(folder):
    ## Check whether the specified path exists or not
    isExist = os.path.exists(folder)
    
    if not isExist:
        ## Create a new directory because it does not exist 
        os.makedirs(folder)
        print("The new directory is created!")

In [3]:
data_path = f'data/'
plot_path = f'plots/'
model_path = f'model/'

make_folder(data_path)
make_folder(plot_path)
make_folder(model_path)

## Preprocess data

In [4]:
full_df = pd.read_csv(f'{data_path}cleaned_total_list.csv')

features_list = ['KD', 'HS_perc', 'avg_ability_usage', 'avg_dmg_rec', 
         'avg_spent', 'avg_loadout', 'level', 'avg_assists']

X_train, X_test, y_train, y_test = partition_dataset(full_df, features_list)

# Training a KNN Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

neighbors_accuracy = []

for neighbors in range(1,25,2):
    #Create KNN Classifier
    knn = KNeighborsClassifier(
        n_neighbors=neighbors)

    #Train the model using the training sets
    knn.fit(X_train, y_train)

    #Predict the response for test dataset
    y_pred = knn.predict(X_test)
    
    #Model Accuracy, how often is the classifier correct?
    accuracy = metrics.accuracy_score(y_test, y_pred)

    #Add to depth_accuracy
    neighbors_accuracy.append((neighbors, accuracy))
    print(neighbors)

In [6]:
print("KNN Accuracy:\n", neighbors_accuracy)

KNN Accuracy:
 [(1, 0.07948735743863788), (3, 0.07922132000832813), (5, 0.08263353922317071), (7, 0.08508571031993893), (9, 0.08783861938140514), (11, 0.08989751775510677), (13, 0.09074189742522035), (15, 0.09160941078492608), (17, 0.09245379045503968), (19, 0.09344853910750225), (21, 0.09482499363823536), (23, 0.09501006315497258)]


This is not accurate. I need to either:
1. Change my features
2. Change to a different model

# Training a Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

depth_accuracy = []

for depth in range(1,12):
    #Instantiate dt
    rf = RandomForestClassifier(
        max_depth=depth,
        random_state=3)

    #Train the model using the training sets
    rf.fit(X_train, y_train)

    #Predict the response for test dataset
    y_pred = rf.predict(X_test)

    #Model Accuracy, how often is the classifier correct?
    accuracy = metrics.accuracy_score(y_test, y_pred)
    
    #Add to depth_accuracy
    depth_accuracy.append((depth, accuracy))
    print(depth)

In [8]:
print("Random Forest Accuracy:\n", depth_accuracy)

Random Forest Accuracy:
 [(1, 0.09369144284821987), (2, 0.10129085987924213), (3, 0.10799962986096652), (4, 0.11187452286765216), (5, 0.11370208434543225), (6, 0.11476623406667129), (7, 0.11577254956392995), (8, 0.11606172068383186), (9, 0.11706803618109052), (10, 0.11760011104171005), (11, 0.11871052814213338)]


# Training a Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

depth_accuracy = []

for depth in range(1,12):
    # Instantiate dt
    dt = DecisionTreeClassifier(
        max_depth=depth,
        random_state=3)

    #Train the model using the training sets
    dt.fit(X_train, y_train)

    #Predict the response for test dataset
    y_pred = dt.predict(X_test)
    
    #Model Accuracy, how often is the classifier correct?
    accuracy = metrics.accuracy_score(y_test, y_pred)
    
    #Add to depth_accuracy
    depth_accuracy.append((depth, accuracy))
    print(depth)

In [10]:
print("Decision Tree Accuracy:\n", depth_accuracy)

Decision Tree Accuracy:
 [(1, 0.09895435723043468), (2, 0.10163786522312443), (3, 0.10732875286279409), (4, 0.10967682235639763), (5, 0.11288083836491082), (6, 0.11301964050246374), (7, 0.11455803086034191), (8, 0.11578411640872603), (9, 0.11341291322553033), (10, 0.11377148541420871), (11, 0.11148125014458556)]


# Training a SVC

In [None]:
from sklearn.svm import SVC

svc = SVC()

#Train the model using the training sets
svc.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = svc.predict(X_test)

# Model Accuracy, how often is the classifier correct?
print("SVC Accuracy:", metrics.accuracy_score(y_test, y_pred))