# Imports

In [8]:
import numpy as np
import pandas as pd
import constants as const
import functions as func

from pandas import DataFrame


from sklearn.neighbors import LocalOutlierFactor
from sklearn.covariance import EllipticEnvelope

from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectFromModel

from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score

from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.pipeline import Pipeline

import warnings
warnings.filterwarnings("ignore")

# Load datasets

In [9]:
ranked_dataset=pd.read_csv(const.path_treated_dataset)
rotas_dataframe_array=[pd.read_csv(const.path_routes_datasets+rota_name+"/"+"complete_dataset.csv") 
                       for rota_name in const.rotas_names]

# Seleção de caracteristicas

In [10]:
#aplica a privisao de vitoria usando arvore de busca para determinar quais os atributos
#que mais influenciam na previsao

for rota_name in const.rotas_names:
    dataframe=pd.read_csv(const.path_routes_datasets+rota_name+"/"+"complete_dataset.csv")
    model=ExtraTreesClassifier()
    model=model.fit(dataframe.get(const.atributos_numericos),dataframe["win"])

    data=[[dataframe,feature_importance] for feature_importance,name in zip(model.feature_importances_,const.atributos_numericos)]
    features_selected_dataframe=pd.DataFrame(data=data,columns=["nome","importancia"])
    features_selected_dataframe.to_csv(const.path_tabelas_rotas+rota_name+"/features importance by ExtraTreesClassifier.csv")

In [11]:
array_selection_features=[]

for dataframe,rota_name in zip(rotas_dataframe_array,const.rotas_names):
    trainsize=700
    trainX,trainY=dataframe.get(const.atritubos_numericos_normalizados)[:trainsize],dataframe["win"][:trainsize]
    testX,testY=dataframe.get(const.atritubos_numericos_normalizados)[trainsize:],dataframe["win"][trainsize:]
    testsize=len(testX)

    pipeline=Pipeline([("feature_selection",SelectFromModel(LinearSVC())),("classification",RandomForestClassifier(random_state=1))])
    pipeline.fit(trainX,trainY)
    predict=pipeline.predict(testX)
    acuracia=f1_score(testY,predict)
    
    threshold_=pipeline.named_steps["feature_selection"].threshold_
    dict_atribute={}
    array_atributes=[atribute for coef,atribute in zip(pipeline.named_steps["feature_selection"].estimator_.coef_[0],const.atributos_numericos) if(abs(coef)>threshold_)]
    array_feature_importance=[round(value,4) for value in pipeline.named_steps["classification"].feature_importances_]
    array_atributes_importance=[[atribute,importance] for atribute,importance in zip(array_atributes,array_feature_importance)]
    
    array_selection_features.append([rota_name,trainsize,testsize,"LinearSVC","RandomForestClassifier",acuracia])
    
    dataframe_route_atributes=pd.DataFrame(data=array_atributes_importance,columns=["atributo","importancia"])
    dataframe_route_atributes.to_csv(const.path_tabelas_rotas+rota_name+"/selected_features.csv")

dataframe_feature_selection=pd.DataFrame(data=array_selection_features,columns=["route","train size","test size","Model selection","Model Classification","f1 score accuracy"])
dataframe_feature_selection.to_csv(const.path_table_files+"feature_selection.csv")
dataframe_feature_selection

Unnamed: 0,route,train size,test size,Model selection,Model Classification,f1 score accuracy
0,adc,700,374,LinearSVC,RandomForestClassifier,0.746667
1,sup,700,428,LinearSVC,RandomForestClassifier,0.70098
2,mid,700,355,LinearSVC,RandomForestClassifier,0.664634
3,jg,700,460,LinearSVC,RandomForestClassifier,0.695096
4,top,700,343,LinearSVC,RandomForestClassifier,0.651466


In [12]:
#Salva o score dos jogadores relativo a posição

def calculate_score(dict):
    score=0
    position=dict["position"]
    selected_features=dict_selected_features[position]
    
    for feature in selected_features:
        feature_name=feature[1]
        feature_score_importance=feature[2]
        score=score+dict["norm_"+feature_name]*feature_score_importance*const.atributos_pesos_dict[feature_name]
        
    return score

dict_selected_features={}

for rota_name in const.rotas_names:
    path=const.path_tabelas_rotas+rota_name+"/selected_features.csv"
    dataset_selected_features=pd.read_csv(path)
    dict_selected_features[rota_name]=dataset_selected_features.values
    
 
ranked_dataset["score"]=ranked_dataset.apply(calculate_score,axis=1)

In [13]:
#agrupa as partidas
matchs=ranked_dataset.groupby("gameCreation")
matchs=[match[1].sort_values("position") for match in list(matchs) if(len(match[1].values)==10)]

#aplica o metodo de cross-validation para calcular o score de cada modelo na previsão de partidas

X_values=[match["score"] for match in matchs]
Y_values=[match["win"].values[0] for match in matchs]

model_random_forest=RandomForestClassifier(n_jobs=10,random_state=9)
model_mlp_classifiyer=MLPClassifier(random_state=9)
model_knn_classifiyer=KNeighborsClassifier()

scores_random_forest=cross_val_score(model_random_forest,X_values,Y_values,cv=5)
scores_mlp_classifiyer=cross_val_score(model_mlp_classifiyer,X_values,Y_values,cv=5)
scores_knn_classifiyer=cross_val_score(model_knn_classifiyer,X_values,Y_values,cv=5)

score_random_forest=scores_random_forest.mean()
score_mlp_classifiyer=scores_mlp_classifiyer.mean()
score_knn_classifiyer=scores_knn_classifiyer.mean()

cross_random_forest_std=scores_random_forest.std()
cross_mlp_classifiyer_std=scores_mlp_classifiyer.std()
cross_knn_classifiyer_std=scores_knn_classifiyer.std()

data=[["random_forest",score_random_forest,cross_random_forest_std],["mlp_classifiyer",score_mlp_classifiyer,cross_mlp_classifiyer_std],
     ["knn_classifiyer",score_knn_classifiyer,cross_knn_classifiyer_std]]

cross_validation_dataframe=pd.DataFrame(data=data,columns=["nome modelo","cross val score","cross val std"])
cross_validation_dataframe.to_csv(const.path_models_crossvalidation_score)

In [14]:
#importancia de cada rota para o resultado final da partida de acordo com o atributo feature_importances do modelo RandomForest
model_random_forest.fit(X_values,Y_values)
feature_importances=model_random_forest.feature_importances_
feature_importances=[feature_importances[cont:cont+2] for cont in range(0,len(feature_importances),2)]

_rotas_labels=["adc","jg","mid","sup","top"]
data=[[rota_label,rota_score.mean()] for rota_label,rota_score in zip(_rotas_labels,feature_importances)]

dataframe_routes_importance=pd.DataFrame(data=data,columns=["rota","importância"])
dataframe_routes_importance.to_csv(const.path_routes_importance)

In [16]:
dataframe_routes_importance

Unnamed: 0,rota,importância
0,adc,0.181041
1,jg,0.127626
2,mid,0.064475
3,sup,0.051088
4,top,0.075769
