O processo a seguir representa o que foi aplicado no arquivo "with_all_columns.py", porém a única diferença entre os outros processos é a remoção de uma ou duas colunas, sendo assim não é interessante a recriação de todo arquivo com apenas uma alteração, então caso vá executar todo o notebook fique atento nos dados que serão utilizados

### Primeiramente vamos realizar todos os imports necessários dos nossos modelos e bibliotecas auxiliares

- pandas: Irá nos auxiliar a manipular o dataset
- random: Irá nos auxiliar na escolhe dos modelos "democráticos"
- sklearn: Irá fornecer uma interface para diferentes modelos de aprendizado de máquina

In [None]:
import pandas as pd
import random as rn
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
import warnings

### Vamos definir uma variável global para pode reproduzir os resultados posteriormente

In [None]:
RANDOM_STATE = 30

### Alterar a seed da biblioteca random para a nossa seed escolhida

In [None]:
warnings.simplefilter(action='ignore')
rn.seed(RANDOM_STATE)

### Medidas tomadas, vamos ler o nosso dataset e começar o trabalho

In [None]:
df = pd.read_csv('../clean_dataset.csv')

#### Remove a coluna damage do dataset

In [None]:
df = df.drop(['damange'], axis=1)

#### Remove a coluna games do dataset

In [None]:
df = df.drop(['games'], axis=1)

#### Remove as colunas, games e damage do dataset

In [None]:
df = df.drop(['damange', 'games'], axis=1)

### Vamos separar 80% dos dados restantes para treino e deixar 20% para teste

In [None]:
train_df = df.sample(frac=.8, random_state=RANDOM_STATE)

x_train = train_df.iloc[:, 1:]
y_train = train_df.iloc[:, 0]

In [None]:
test_df = df.sample(frac=.2, random_state=RANDOM_STATE)

x_test = test_df.iloc[:, 1:]
y_test = test_df.iloc[:, 0]

## Com os dados separados vamos instanciar nossos modelos

### Modelos Lineares

In [None]:
logistic_regression = LogisticRegression(solver='lbfgs', multi_class='multinomial')

### Modelos baseados em árvores

In [None]:
tree_classifier = DecisionTreeClassifier()
extra_tree_classifier = ExtraTreeClassifier()

### Modelos baseados na técnica de ensemble

In [None]:
adaboost_classifier = AdaBoostClassifier()
extra_trees_classifier = ExtraTreesClassifier(n_estimators=70)
bagging_classifier = BaggingClassifier()
random_forest_classifier = RandomForestClassifier()
gradient_boost_classifier = GradientBoostingClassifier()

### Modelos baseados em redes neurais

In [None]:
mlp_classifier = MLPClassifier(max_iter=700, random_state=RANDOM_STATE)

### Todos os modelos anteriores não requeriam parâmetros, poderiam ser dados, mas não eram necessários. Os modelos a seguir necessitam de alguns parâmetros e aqui nós construímos eles. Esses parâmetros são os submodelos que farão parte da democracia

In [None]:

tree_estimators = [('tc', DecisionTreeClassifier()), ('etc', ExtraTreeClassifier())]
boost_estimators = [
    ('adac', AdaBoostClassifier()), ('etsc', ExtraTreesClassifier(n_estimators=70)),
    ('bc', BaggingClassifier()), ('rfc', RandomForestClassifier()), ('gbc', GradientBoostingClassifier())]
nb_estimators = [('mc', MLPClassifier()), ('gnb', GaussianNB())]
mix_estimators = [
    ('le', LogisticRegression(solver='lbfgs', max_iter=1000, multi_class='multinomial')),
    ('te', rn.choice(tree_estimators)[1]), *rn.sample(boost_estimators, 4), *rn.sample(nb_estimators, 2)]
all_estimators = [('lgr',  LogisticRegression(solver='lbfgs', max_iter=1000, multi_class='multinomial')),
                  *tree_estimators, *boost_estimators, *nb_estimators]

### Modelos baseados na técnica de ensemble

In [None]:
voting_classifier_tree = VotingClassifier(estimators=tree_estimators)
voting_classifier_boost = VotingClassifier(estimators=boost_estimators, voting='soft')
voting_classifier_nb = VotingClassifier(estimators=nb_estimators)
voting_classifier_mix = VotingClassifier(estimators=mix_estimators)
voting_classifier_all = VotingClassifier(estimators=all_estimators)

### Com todos nossos modelos instanciados, podemos treiná-los

In [None]:
logistic_regression.fit(x_train, y_train)

In [None]:
tree_classifier.fit(x_train, y_train)
extra_tree_classifier.fit(x_train, y_train)

In [None]:
adaboost_classifier.fit(x_train, y_train)
extra_trees_classifier.fit(x_train, y_train)
bagging_classifier.fit(x_train, y_train)
gradient_boost_classifier.fit(x_train, y_train)

In [None]:
mlp_classifier.fit(x_train, y_train)

In [None]:
voting_classifier_tree.fit(x_train, y_train)
voting_classifier_boost.fit(x_train, y_train)
voting_classifier_nb.fit(x_train, y_train)
voting_classifier_mix.fit(x_train, y_train)
voting_classifier_all.fit(x_train, y_train)

### Após treinar nossos modelos vamos avaliar as acurácias com o método score de cada um

In [None]:
f'Logistic Regression Model Score: {logistic_regression.score(x_test, y_test)}'

In [None]:
f'Tree Model Score: {tree_classifier.score(x_test, y_test)}'
f'Extra Tree Model Score: {extra_tree_classifier.score(x_test, y_test)}'

In [None]:
f'AdaBoost Model Score: {adaboost_classifier.score(x_test, y_test)}'
f'Extra Trees Model Score: {extra_trees_classifier.score(x_test, y_test)}'
f'Bagging Model Score: {bagging_classifier.score(x_test, y_test)}'
f'Gradient Boost Model Score: {gradient_boost_classifier.score(x_test, y_test)}'

In [None]:
f'MultiLayer Perceptron Model Score: {mlp_classifier.score(x_test, y_test)}'

In [None]:
f'VotingClassifier Tree Models Score: {voting_classifier_tree.score(x_test, y_test)}'
f'VotingClassifier Boost Models Score: {voting_classifier_boost.score(x_test, y_test)}'
f'VotingClassifier Naive Bayes Models Score: {voting_classifier_nb.score(x_test, y_test)}'
f'VotingClassifier Mixed Models Score: {voting_classifier_mix.score(x_test, y_test)}'
f'VotingClassifier All Models Score: {voting_classifier_all.score(x_test, y_test)}'

### Avaliações dos modelos utilizando cross-validation

#### Logistic Regression cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(LogisticRegression(solver='lbfgs', multi_class='multinomial'), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(LogisticRegression(solver='lbfgs', multi_class='multinomial'), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(LogisticRegression(solver='lbfgs', multi_class='multinomial'), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(LogisticRegression(solver='lbfgs', multi_class='multinomial'), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Decision Tree cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(DecisionTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(DecisionTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(DecisionTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(DecisionTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Extra Tree cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(ExtraTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(ExtraTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(ExtraTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(ExtraTreeClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Voting Tree cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(VotingClassifier(estimators=tree_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(VotingClassifier(estimators=tree_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(VotingClassifier(estimators=tree_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(VotingClassifier(estimators=tree_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### AdaBoost cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(AdaBoostClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(AdaBoostClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(AdaBoostClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(AdaBoostClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Extra Trees cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(ExtraTreesClassifier(n_estimators=70), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(ExtraTreesClassifier(n_estimators=70), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(ExtraTreesClassifier(n_estimators=70), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(ExtraTreesClassifier(n_estimators=70), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Bagging cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(BaggingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(BaggingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(BaggingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(BaggingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro '))/10) * 100, 2)}%"

#### Random Forest cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(RandomForestClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(RandomForestClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(RandomForestClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(RandomForestClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Gradient cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(GradientBoostingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(GradientBoostingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(GradientBoostingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(GradientBoostingClassifier(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Voting Boost cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(VotingClassifier(estimators=boost_estimators, voting='soft'), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(VotingClassifier(estimators=boost_estimators, voting='soft'), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(VotingClassifier(estimators=boost_estimators, voting='soft'), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(VotingClassifier(estimators=boost_estimators, voting='soft'), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### MLP cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(MLPClassifier(max_iter=700, random_state=RANDOM_STATE), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(MLPClassifier(max_iter=700, random_state=RANDOM_STATE), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(MLPClassifier(max_iter=700, random_state=RANDOM_STATE), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(MLPClassifier(max_iter=700, random_state=RANDOM_STATE), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Gaussian cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(GaussianNB(), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(GaussianNB(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(GaussianNB(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(GaussianNB(), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### NaiveBayes Voting cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(VotingClassifier(estimators=nb_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(VotingClassifier(estimators=nb_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(VotingClassifier(estimators=nb_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(VotingClassifier(estimators=nb_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### Mixed Voting cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(VotingClassifier(estimators=mix_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(VotingClassifier(estimators=mix_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(VotingClassifier(estimators=mix_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(VotingClassifier(estimators=mix_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"

#### All Voting cross-validation F1, Precision and Recall

In [None]:
f"Accuracy  : {round((sum(cross_val_score(VotingClassifier(estimators=all_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10))/10) * 100, 2)}%"
f"F1        : {round((sum(cross_val_score(VotingClassifier(estimators=all_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='f1_macro'))/10) * 100, 2)}%"
f"Precision : {round((sum(cross_val_score(VotingClassifier(estimators=all_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='precision_macro'))/10) * 100, 2)}%"
f"Recall    : {round((sum(cross_val_score(VotingClassifier(estimators=all_estimators), df.iloc[:, 1:], df.iloc[:, 0], cv=10, scoring='recall_macro'))/10) * 100, 2)}%"