In [22]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
from imblearn.over_sampling import RandomOverSampler,SMOTE
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
#import tensorflow as tf
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC


In [3]:
cols = ["Começou", "Jogada 1", "Jogada 2","Jogada 3", "Jogada 4", "Jogada 5", "Jogada 6","Jogada 7", "Jogada 8",
        "Jogada 9",'M00','M01','M02','M10','M11','M12','M20','M21','M22',"Terminou na Rodada","Ganhou"]  
   
tabuleiro_cols = ['M00','M01','M02','M10','M11','M12','M20','M21','M22']

jogadas_list_cols = ["Jogada 1", "Jogada 2","Jogada 3", "Jogada 4", "Jogada 5", "Jogada 6",
                        "Jogada 7", "Jogada 8","Jogada 9"]
        

In [4]:
data =  pd.read_csv('jogadas.csv',names=cols)

In [5]:
print(len(data[data['Ganhou']=='Ganhou']))
print(len(data[data['Ganhou']=='Empate']))
print(len(data[data['Ganhou']=='Perdeu']))


9926
2752
9926


In [6]:
for i in cols:
    print(f'Coluna: {i}\nValores {data[i].unique()}\nTipo: {data[i].dtype}')

Coluna: Começou
Valores [0 1]
Tipo: int64
Coluna: Jogada 1
Valores ['Linha: 1 Coluna: 1' 'Linha: 0 Coluna: 2' 'Linha: 2 Coluna: 1'
 'Linha: 1 Coluna: 0' 'Linha: 2 Coluna: 2' 'Linha: 2 Coluna: 0'
 'Linha: 1 Coluna: 2' 'Linha: 0 Coluna: 1' 'Linha: 0 Coluna: 0']
Tipo: object
Coluna: Jogada 2
Valores ['Linha: 2 Coluna: 1' 'Linha: 2 Coluna: 0' 'Linha: 0 Coluna: 2'
 'Linha: 2 Coluna: 2' 'Linha: 0 Coluna: 1' 'Linha: 1 Coluna: 0'
 'Linha: 1 Coluna: 1' 'Linha: 1 Coluna: 2' 'Linha: 0 Coluna: 0']
Tipo: object
Coluna: Jogada 3
Valores ['Linha: 1 Coluna: 0' 'Linha: 1 Coluna: 1' 'Linha: 0 Coluna: 1'
 'Linha: 0 Coluna: 0' 'Linha: 2 Coluna: 2' 'Linha: 2 Coluna: 0'
 'Linha: 1 Coluna: 2' 'Linha: 2 Coluna: 1' 'Linha: 0 Coluna: 2']
Tipo: object
Coluna: Jogada 4
Valores ['Linha: 1 Coluna: 2' 'Linha: 1 Coluna: 0' 'Linha: 2 Coluna: 0'
 'Linha: 2 Coluna: 1' 'Linha: 0 Coluna: 0' 'Linha: 2 Coluna: 2'
 'Linha: 0 Coluna: 1' 'Linha: 1 Coluna: 1' 'Linha: 0 Coluna: 2']
Tipo: object
Coluna: Jogada 5
Valores ['Linha: 

In [7]:
def scale_dataset(dataframe,y_column,oversample = False):
    X_cols = [col for col in dataframe.columns if col != y_column]
    X = dataframe[X_cols].values
    y = dataframe[y_column].values
    
    #scaler = StandardScaler()
    #Scale the number, so we dont have  huge discrepancy between columns, it affects the model
    #X = scaler.fit_transform(X)
    
    #the difference between the len of the values must not be huge, so we have to scale it
    #oversample the one that has the least, taking more of the less class
    
    if oversample:
        ros = RandomOverSampler()
        X,y = ros.fit_resample(X,y)
        #smote = SMOTE(random_state=42)
        #X,y = smote.fit_resample(X,y)
        
    # concat 2 arrays, y is only one dimension so we have to make it 2 
    #in this function using -1 is the same as len(y)
    #same as concat in pandas put hstack in numpay
    df = np.hstack((X,np.reshape(y,(-1,1))))
    
    return df,X,y

In [8]:
data.head(5)

Unnamed: 0,Começou,Jogada 1,Jogada 2,Jogada 3,Jogada 4,Jogada 5,Jogada 6,Jogada 7,Jogada 8,Jogada 9,...,M01,M02,M10,M11,M12,M20,M21,M22,Terminou na Rodada,Ganhou
0,0,Linha: 1 Coluna: 1,Linha: 2 Coluna: 1,Linha: 1 Coluna: 0,Linha: 1 Coluna: 2,Linha: 2 Coluna: 2,Linha: 0 Coluna: 0,Linha: 0 Coluna: 2,Linha: 0 Coluna: 1,Linha: 2 Coluna: 0,...,Computer Played,Opponent Played,Opponent Played,Opponent Played,Computer Played,Opponent Played,Computer Played,Opponent Played,10,Perdeu
1,1,Linha: 0 Coluna: 2,Linha: 2 Coluna: 0,Linha: 1 Coluna: 1,Linha: 1 Coluna: 0,Linha: 0 Coluna: 0,Linha: 0 Coluna: 1,Linha: 2 Coluna: 1,Linha: 1 Coluna: 2,Linha: 2 Coluna: 2,...,Opponent Played,Computer Played,Opponent Played,Computer Played,Opponent Played,Opponent Played,Computer Played,Computer Played,10,Ganhou
2,1,Linha: 2 Coluna: 1,Linha: 0 Coluna: 2,Linha: 0 Coluna: 1,Linha: 2 Coluna: 0,Linha: 2 Coluna: 2,Linha: 1 Coluna: 2,Linha: 0 Coluna: 0,Linha: 1 Coluna: 1,0,...,Computer Played,Opponent Played,Nobody Played,Opponent Played,Opponent Played,Opponent Played,Computer Played,Computer Played,9,Perdeu
3,0,Linha: 1 Coluna: 0,Linha: 2 Coluna: 2,Linha: 0 Coluna: 1,Linha: 2 Coluna: 0,Linha: 2 Coluna: 1,Linha: 0 Coluna: 0,Linha: 1 Coluna: 1,0,0,...,Opponent Played,Nobody Played,Opponent Played,Opponent Played,Nobody Played,Computer Played,Opponent Played,Computer Played,8,Perdeu
4,0,Linha: 2 Coluna: 2,Linha: 0 Coluna: 2,Linha: 1 Coluna: 0,Linha: 2 Coluna: 1,Linha: 0 Coluna: 0,Linha: 2 Coluna: 0,Linha: 0 Coluna: 1,Linha: 1 Coluna: 1,0,...,Opponent Played,Computer Played,Opponent Played,Computer Played,Nobody Played,Computer Played,Computer Played,Opponent Played,9,Ganhou


In [9]:
## Double data
## In this game we can double our data, if we invert the Started columns and the Winerr Column. then change the positions columns
## In case of tie, change the Started column and the position columns
## map_quem_jogou = {'Computer Played':2, 'Nobody Played': 0, 'Opponent Played':1}
#map_quem_jogou_invert =  {'Computer Played':'Opponent Played', 'Nobody Played': 'Nobody Played', 'Opponent Played':'Computer Played'}  
## quem comecou dict original
### self.dict = {'Player': 0,'Computer':1}
#who_started_invert = {1: 0,0:1}
## map_column_ganhou = {'Ganhou': 2,'Perdeu': 0,'Empate': 1}
#map_column_ganhou_invert = {'Ganhou': 'Perdeu','Perdeu': 'Ganhou','Empate': 'Empate'}
#copy_to_double_data = data.copy()
#
#for line_index in range(len(copy_to_double_data)):
#        
#    # Invert Winner Column
#    quem_ganhou_value = copy_to_double_data.loc[line_index,'Ganhou']
#    copy_to_double_data.loc[line_index,'Ganhou'] = map_column_ganhou_invert[quem_ganhou_value]
#    # Invert the Started Column
#    quem_comecou_value = copy_to_double_data.loc[line_index,'Começou']
#    copy_to_double_data.loc[line_index,'Começou'] = who_started_invert[quem_comecou_value]
#    # Invert Position Data
#    for tab_col in tabuleiro_cols:
#        position_value  = copy_to_double_data.loc[line_index,tab_col]
#        copy_to_double_data.loc[line_index,tab_col] = map_quem_jogou_invert[position_value]
#         
#copy_to_double_data.to_csv('jogadas_inv.csv',index=False,header=False)
#
#
## Jogar este codigo no tiktak
#


In [10]:
# Join the data, invert and original

#df_label_to_encode = data.copy().reset_index(drop=True)
#df_inv_label_to_encode = copy_to_double_data.copy().reset_index(drop=True)
#df_label_encoder = 0
#df_label_encoder = pd.concat([df_label_to_encode,df_inv_label_to_encode],axis=0)
df_label_encoder = data.copy().reset_index(drop=True)


encoder = LabelEncoder()
map_column_values = {'0': 0,
                     0:0,
 'Linha: 0 Coluna: 0': 1,
 'Linha: 0 Coluna: 1': 2,
 'Linha: 0 Coluna: 2': 3,
 'Linha: 1 Coluna: 0': 4,
 'Linha: 1 Coluna: 1': 5,
 'Linha: 1 Coluna: 2': 6,
 'Linha: 2 Coluna: 0': 7,
 'Linha: 2 Coluna: 1': 8,
 'Linha: 2 Coluna: 2': 9}

map_column_ganhou = {
    'Ganhou': 2,
    'Perdeu': 0,
    'Empate': 1
}

map_quem_jogou = {'Computer Played':2, 'Nobody Played': 0, 'Opponent Played':1}  




for i in jogadas_list_cols:
    df_label_encoder[i] = df_label_encoder[i].map(map_column_values)

for i in tabuleiro_cols:
    df_label_encoder[i] = df_label_encoder[i].map(map_quem_jogou)
    
df_label_encoder['Ganhou'] = df_label_encoder['Ganhou'].map(map_column_ganhou)
df_label_encoder.fillna(int(0),inplace=True)

#df_label_encoder['Ganhou'] = encoder.fit_transform(df_label_encoder['Ganhou'])
#map_column_ganhou = []
#map_column_ganhou.append(dict(zip(encoder.classes_, range(len(encoder.classes_)))))
#print('predic df',self.pred_dataset_encoded)
#print('DF ENC',self.df_encoded)


In [38]:
map_column_ganhou['Ganhou']

2

In [11]:
## Plota Gráficos para melhor compreenção

import plotly.express as px
import pandas as pd

# Iterando sobre as colunas (exceto a última) para plotar os histogramas
for i in df_label_encoder['Começou'].unique():
    comecou = 'Computer' if i == 1 else 'Player'
    for label in cols[1:-1]:
        # Criando um dataframe temporário para a coluna em questão
        temp_df = df_label_encoder[df_label_encoder['Começou']==i][[label, 'Ganhou']].copy()
        temp_df['Resultado'] = temp_df['Ganhou'].map({map_column_ganhou['Ganhou']: 'Ganhou',
                                                      map_column_ganhou['Empate']: 'Empate',
                                                      map_column_ganhou['Perdeu']: 'Perdeu'})

        # Usando plotly para criar o histograma com densidade
        fig = px.histogram(temp_df, x=label, color='Resultado', 
                           histnorm='probability density',
                           barmode='overlay',  # Sobreposição das barras
                           opacity=0.7,  # Transparência das barras
                           color_discrete_map={'Ganhou': 'blue', 'Empate': 'red', 'Perdeu': 'green'})
        
        # Adicionando título e rótulos
        fig.update_layout(title=f'Distribuição de {label}, Quando Começou: {i}, {comecou} Jogou',
                          xaxis_title=label,
                          yaxis_title='Probabilidade',
                          legend_title='Resultado')
        comecou = 'Computer' if comecou == 'Player' else 'Player'

        # Exibindo o gráfico
        fig.show()


In [12]:
print(len(data[data['Começou'] == 0]))
len(data[data['Começou'] == 1])

11302


11302

In [13]:
#train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder,'Jogada 6',oversample=True)
#test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder,'Jogada 6',oversample=False)


In [14]:
#jogada_a_analisar = 'Jogada 5'
#numero = int(jogada_a_analisar[-1])
#colunas_para_tirar = test_lb_encoder.columns[numero+1:-1].to_list()
#test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)].head()


In [15]:
#jogada_a_analisar = 'Jogada 5'
#numero = int(jogada_a_analisar[-1])
#colunas_para_tirar = test_lb_encoder.columns[numero+1:-2].to_list()
#colunas_para_tirar

In [16]:
#df_label_encoder['Terminou na Rodada'] = 0
#
#for i in range(len(df_label_encoder)):
#    cont = 0
#    for col in cols[1:-1]:
#        if df_label_encoder.loc[i,col] != 0:
#            cont +=1
#        elif df_label_encoder.loc[i,col] == 0:
#            break
#    df_label_encoder.loc[i,'Terminou na Rodada'] = cont
#df_label_encoder.head(1)
        
    

In [17]:
print(df_label_encoder.loc[9,'Começou'])
df_label_encoder.head(10)

1


Unnamed: 0,Começou,Jogada 1,Jogada 2,Jogada 3,Jogada 4,Jogada 5,Jogada 6,Jogada 7,Jogada 8,Jogada 9,...,M01,M02,M10,M11,M12,M20,M21,M22,Terminou na Rodada,Ganhou
0,0,5,8,4,6,9,1,3,2,7,...,2,1,1,1,2,1,2,1,10,0
1,1,3,7,5,4,1,2,8,6,9,...,1,2,1,2,1,1,2,2,10,2
2,1,8,3,2,7,9,6,1,5,0,...,2,1,0,1,1,1,2,2,9,0
3,0,4,9,2,7,8,1,5,0,0,...,1,0,1,1,0,2,1,2,8,0
4,0,9,3,4,8,1,7,2,5,0,...,1,2,1,2,0,2,2,1,9,2
5,0,7,8,4,1,6,9,5,0,0,...,0,0,1,1,1,1,2,2,8,0
6,1,5,2,1,6,3,4,7,0,0,...,1,2,1,2,1,2,0,0,8,2
7,0,7,2,9,6,8,0,0,0,0,...,2,0,0,0,2,1,1,1,6,0
8,0,6,3,7,8,1,4,5,2,9,...,2,2,2,1,1,1,2,1,10,0
9,1,2,3,6,1,4,9,5,0,0,...,2,1,2,2,2,0,0,1,8,2


In [18]:
df_label_encoder.head(2)

Unnamed: 0,Começou,Jogada 1,Jogada 2,Jogada 3,Jogada 4,Jogada 5,Jogada 6,Jogada 7,Jogada 8,Jogada 9,...,M01,M02,M10,M11,M12,M20,M21,M22,Terminou na Rodada,Ganhou
0,0,5,8,4,6,9,1,3,2,7,...,2,1,1,1,2,1,2,1,10,0
1,1,3,7,5,4,1,2,8,6,9,...,1,2,1,2,1,1,2,2,10,2


In [11]:
train_lb_encoder = df_label_encoder.sample(frac=1)
test_lb_encoder = train_lb_encoder.sample(frac=1)
test_pct = 0.2
test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou['Ganhou']].sample(frac=test_pct)
train_lb_encoder.drop(test_lb_encoder.index.to_list(),inplace=True)


In [20]:
for i in range(9):
    jogada_a_analisar = f'Jogada {i+1}'
    y_test = test_lb_encoder[f"{jogada_a_analisar}"]
    print(f"Rodada {i+1} - Distribuição de Classes no Conjunto de Teste:")
    print(y_test.value_counts())
    print()


Rodada 1 - Distribuição de Classes no Conjunto de Teste:
Jogada 1
7    241
4    236
9    230
5    224
2    222
3    222
1    214
6    206
8    190
Name: count, dtype: int64

Rodada 2 - Distribuição de Classes no Conjunto de Teste:
Jogada 2
2    237
5    231
1    227
6    226
7    225
3    217
4    211
8    210
9    201
Name: count, dtype: int64

Rodada 3 - Distribuição de Classes no Conjunto de Teste:
Jogada 3
8    238
7    234
4    234
5    228
2    222
3    221
1    212
9    205
6    191
Name: count, dtype: int64

Rodada 4 - Distribuição de Classes no Conjunto de Teste:
Jogada 4
6    250
9    234
1    225
8    222
5    221
2    217
7    216
4    209
3    191
Name: count, dtype: int64

Rodada 5 - Distribuição de Classes no Conjunto de Teste:
Jogada 5
5    254
4    226
6    225
9    222
1    220
3    216
2    214
7    210
8    198
Name: count, dtype: int64

Rodada 6 - Distribuição de Classes no Conjunto de Teste:
Jogada 6
0    244
7    222
8    204
9    200
1    193
2    192
6    190
5

In [34]:
cols

['Começou',
 'Jogada 1',
 'Jogada 2',
 'Jogada 3',
 'Jogada 4',
 'Jogada 5',
 'Jogada 6',
 'Jogada 7',
 'Jogada 8',
 'Jogada 9',
 'M00',
 'M01',
 'M02',
 'M10',
 'M11',
 'M12',
 'M20',
 'M21',
 'M22',
 'Terminou na Rodada',
 'Ganhou']

['Começou',
 'Jogada 1',
 'Jogada 2',
 'Jogada 3',
 'Jogada 4',
 'Jogada 5',
 'Jogada 6',
 'Jogada 7',
 'Jogada 8',
 'Jogada 9',
 'M00',
 'M01',
 'M02',
 'M10',
 'M11',
 'M12',
 'M20',
 'M21',
 'M22',
 'Terminou na Rodada',
 'Ganhou',
 'Jogada 2',
 'Jogada 3',
 'Jogada 4',
 'Jogada 5',
 'Jogada 6',
 'Jogada 7',
 'Jogada 8',
 'Jogada 9']

In [16]:
verificacao = np.array([[1,0,0,0,0,0,0,0,0,0,1,2]])

In [15]:
train_lb_encoder_verificacao = df_label_encoder.sample(frac=1)
jogada_a_analisar = f'Jogada 1'
numero = int(jogada_a_analisar[-1])
colunas_para_tirar = jogadas_list_cols[numero:]
train_lb_array_ver, X_train_lb_encoder_ver, y_train_lb_encoder_ver = scale_dataset(train_lb_encoder_verificacao[train_lb_encoder_verificacao.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
knn_model = KNeighborsClassifier(n_neighbors=1)
#print(self.X_train)
knn_model.fit(X_train_lb_encoder_ver,y_train_lb_encoder_ver)
y_pred = knn_model.predict(verificacao)
y_pred_pos = [k for k, v in map_column_values.items() if v == y_pred]
print(y_pred_pos,y_pred)

NameError: name 'verificacao' is not defined

['Linha: 2 Coluna: 0']

In [19]:
models = {}

In [14]:
colunas_para_tirar = jogadas_list_cols[2:]
train_lb_encoder.columns.drop(colunas_para_tirar)

Index(['Começou', 'Jogada 1', 'Jogada 2', 'M00', 'M01', 'M02', 'M10', 'M11',
       'M12', 'M20', 'M21', 'M22', 'Terminou na Rodada', 'Ganhou'],
      dtype='object')

In [20]:
## KNN model
# Tirando as colunas das jogadas seguintes o resultado piora
# Retirando as colunas que coloqui posteriormente de posição na matriz o resultado piora
# 'M00','M01','M02','M10','M11','M12','M20','M21','M22',"Terminou na Rodada"
# filtrando apenas pelas vitórias, o que teoricamente poderia ajudar a apenas ter vitórias não ajuda, mas dependendo do numero de vizinhos pode ajudar
# https://www.statology.org/sklearn-classification-report/
dict_knn_per_nei = {} 
for nigb in range(5,6):
    dict_knn = {}
    print(f'Num Neighbors {nigb}')
    for i in range(9):
        jogada_a_analisar = f'Jogada {i+1}'
        numero = int(jogada_a_analisar[-1])
        colunas_para_tirar = jogadas_list_cols[numero:]
        test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou['Ganhou']]
        train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder[train_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
        test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=False)
        knn_model = KNeighborsClassifier(n_neighbors=nigb)
        knn_model.fit(X_train_lb_encoder,y_train_lb_encoder)
        y_pred = knn_model.predict(X_test_lb_encoder)
        #print(y_test_lb_encoder,y_pred)
        print(f'Rodada Analisada {i+1}')
        #print(classification_report(y_test_lb_encoder,y_pred,zero_division=0))
        a = classification_report(y_test_lb_encoder,y_pred,zero_division=0,output_dict=True)
        dict_knn[jogada_a_analisar] = a
    dict_knn_per_nei[f'Número de Vizinhos: {nigb}'] = dict_knn
models['KNN'] = dict_knn

    

Num Neighbors 5
Rodada Analisada 1
Rodada Analisada 2
Rodada Analisada 3
Rodada Analisada 4
Rodada Analisada 5
Rodada Analisada 6
Rodada Analisada 7
Rodada Analisada 8
Rodada Analisada 9


In [24]:
# Decision Tree Classifier
jogada_a_analisar = f'Jogada {1}'
numero = int(jogada_a_analisar[-1])
colunas_para_tirar = jogadas_list_cols[numero:]
test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou['Ganhou']]
train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder[train_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=False)
   
dtc = DecisionTreeClassifier()
dtc.fit(X_train_lb_encoder,y_train_lb_encoder)
y_pred_dtc = dtc.predict(X_test_lb_encoder)
print(f"Decision Tree Classification: \n{classification_report(y_test_lb_encoder,y_pred_dtc)}")



Decision Tree Classification: 
              precision    recall  f1-score   support

           1       0.23      0.29      0.25       206
           2       0.29      0.39      0.33       221
           3       0.24      0.26      0.25       199
           4       0.24      0.28      0.26       211
           5       0.27      0.23      0.25       247
           6       0.26      0.38      0.31       178
           7       0.28      0.18      0.22       252
           8       0.30      0.27      0.28       241
           9       0.27      0.15      0.19       230

    accuracy                           0.26      1985
   macro avg       0.27      0.27      0.26      1985
weighted avg       0.27      0.26      0.26      1985



In [26]:
# Random Forest Classifier
jogada_a_analisar = f'Jogada {1}'
numero = int(jogada_a_analisar[-1])
colunas_para_tirar = jogadas_list_cols[numero:]
test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou['Ganhou']]
train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder[train_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=False)
   
rfc = RandomForestClassifier()
rfc.fit(X_train_lb_encoder,y_train_lb_encoder)
y_pred_rfc = rfc.predict(X_test_lb_encoder)


print(f"Decision Tree Classification: \n{classification_report(y_test_lb_encoder,y_pred_rfc)}")



Decision Tree Classification: 
              precision    recall  f1-score   support

           1       0.20      0.20      0.20       206
           2       0.30      0.32      0.31       221
           3       0.28      0.27      0.28       199
           4       0.24      0.24      0.24       211
           5       0.26      0.23      0.25       247
           6       0.28      0.43      0.34       178
           7       0.24      0.18      0.21       252
           8       0.29      0.30      0.29       241
           9       0.28      0.24      0.26       230

    accuracy                           0.26      1985
   macro avg       0.26      0.27      0.26      1985
weighted avg       0.26      0.26      0.26      1985



In [51]:
classes = sorted(set(y_train_lb_encoder) | set(y_test_lb_encoder))
classes

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [52]:
## Naive Baynes
dict_nb = {}

for i in range(9):
    jogada_a_analisar = f'Jogada {i+1}'
    numero = int(jogada_a_analisar[-1])
    colunas_para_tirar = [] #jogadas_list_cols[numero:]

    #test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou['Ganhou']]

    train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder[train_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
    test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=False)

    nb_model = GaussianNB()
    nb_model = nb_model.fit(X_train_lb_encoder,y_train_lb_encoder)
    y_pred = nb_model.predict(X_test_lb_encoder)
    #print(y_test_lb_encoder,y_pred)
    print(f'Rodada Analisada {i+1}')
    #print(classification_report(y_test_lb_encoder,y_pred,zero_division=0))
    a = classification_report(y_test_lb_encoder,y_pred,zero_division=0,output_dict=True)
    dict_nb[jogada_a_analisar] = a
models['Naive Baynes'] = dict_nb


Rodada Analisada 1
Rodada Analisada 2
Rodada Analisada 3
Rodada Analisada 4
Rodada Analisada 5
Rodada Analisada 6
Rodada Analisada 7
Rodada Analisada 8
Rodada Analisada 9


In [53]:
## Logistic Regression
dict_lr = {}


for i in range(9):
    jogada_a_analisar = f'Jogada {i+1}'
    numero = int(jogada_a_analisar[-1])
    colunas_para_tirar = []#jogadas_list_cols[numero:]
    
    test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou['Ganhou']]
    
    train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder[train_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
    test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=False)
    
    
    lg_model = LogisticRegression()
    lg_model = lg_model.fit(X_train_lb_encoder,y_train_lb_encoder)
    y_pred = lg_model.predict(X_test_lb_encoder)
    #print(y_test_lb_encoder,y_pred)
    print(f'Rodada Analisada {i+1}')
    #print(classification_report(y_test_lb_encoder,y_pred))
    a = classification_report(y_test_lb_encoder,y_pred,zero_division=0,output_dict=True)
    dict_lr[jogada_a_analisar] = a

models['Logistic Regression'] = dict_lr
    


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 1



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 2



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 3



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 4



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 5



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 6



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 7



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



Rodada Analisada 8
Rodada Analisada 9



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



In [54]:
## SVM

dict_svm = {}

for i in range(9):
    jogada_a_analisar = f'Jogada {i+1}'
    numero = int(jogada_a_analisar[-1])
    colunas_para_tirar = [] #jogadas_list_cols[numero:]

    #test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou[0]['Ganhou']]

    train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder[train_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
    test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=False)

    svm_model = SVC()
    svm_model = svm_model.fit(X_train_lb_encoder,y_train_lb_encoder)
    y_pred = svm_model.predict(X_test_lb_encoder)
    #print(y_test_lb_encoder,y_pred)
    #print(y_test_lb_encoder,y_pred)
    print(f'Rodada Analisada {i+1}')
    #print(classification_report(y_test_lb_encoder,y_pred))
    a = classification_report(y_test_lb_encoder,y_pred,zero_division=0,output_dict=True)
    dict_svm[jogada_a_analisar] = a

models['SVM'] = dict_svm


Rodada Analisada 1
Rodada Analisada 2
Rodada Analisada 3
Rodada Analisada 4
Rodada Analisada 5
Rodada Analisada 6
Rodada Analisada 7
Rodada Analisada 8
Rodada Analisada 9


In [55]:
models['KNN']['Jogada 1']['macro avg']

{'precision': 0.5398062509496268,
 'recall': 0.5397757452338179,
 'f1-score': 0.538243352379907,
 'support': 3914.0}

In [56]:
########### Arrumar NN, problema na features, colunas, algumas n tem certos valores, isso se da pelo label incoding manuel, fazer um one hot tbm de test

In [57]:
def train_model(X_train, y_train, num_nodes, dropout_prob, lr, batch_size, epochs):
    input_shape = X_train.shape[1]
    num_classes = len(np.unique(y_train_lb_encoder))
    nn_model = tf.keras.Sequential(
        [tf.keras.layers.Dense(num_nodes,activation='relu',input_shape=(input_shape,)),
         tf.keras.layers.Dropout(dropout_prob),
         tf.keras.layers.Dense(num_nodes,activation='relu'),
         tf.keras.layers.Dropout(dropout_prob),
         tf.keras.layers.Dense(num_classes, activation='softmax') # Saída multiclasse
        ]
    )
    
    nn_model.compile(optimizer=tf.keras.optimizers.Adam(lr),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    history = nn_model.fit(
        X_train, y_train, epochs=epochs, batch_size=batch_size,verbose=0,validation_split=0.2
        )
    return nn_model, history



In [58]:
X_train_lb_encoder.shape

(231960, 20)

In [59]:
num_classes = len(np.unique(y_train_lb_encoder))
num_classes
print("Unique labels in y_train_lb_encoder:", np.unique(y_train_lb_encoder))

[0, num_classes - 1]

Unique labels in y_train_lb_encoder: [0 1 2 3 4 5 6 7 8 9]


[0, 9]

In [60]:
jogada_a_analisar = 'Jogada 5'
numero = int(jogada_a_analisar[-1])
colunas_para_tirar = test_lb_encoder.columns[numero+1:-1].to_list()

#test_lb_encoder = test_lb_encoder[test_lb_encoder['Ganhou']==map_column_ganhou[0]['Ganhou']]

train_lb_array, X_train_lb_encoder, y_train_lb_encoder = scale_dataset(train_lb_encoder[train_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=True)
test_lb_array, X_test_lb_encoder, y_test_lb_encoder = scale_dataset(test_lb_encoder[test_lb_encoder.columns.drop(colunas_para_tirar)],jogada_a_analisar,oversample=False)


num_nodes = 16
dropout_prob = 0.1
lr = 0.001
batch_size = 128
epochs = 32

nn_model,history = train_model(X_train=X_train_lb_encoder,y_train=y_train_lb_encoder,num_nodes=num_nodes,
            dropout_prob=dropout_prob,lr=lr,batch_size=batch_size,epochs=epochs)


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



InvalidArgumentError: Graph execution error:

Detected at node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "c:\Users\vinic\anaconda3\envs\myenv\lib\runpy.py", line 197, in _run_module_as_main

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\runpy.py", line 87, in _run_code

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelapp.py", line 701, in start

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\asyncio\windows_events.py", line 321, in run_forever

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\asyncio\base_events.py", line 601, in run_forever

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\asyncio\base_events.py", line 1905, in _run_once

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\asyncio\events.py", line 80, in _run

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 534, in dispatch_queue

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 523, in process_one

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 429, in dispatch_shell

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel\ipkernel.py", line 429, in do_execute

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code

  File "C:\Users\vinic\AppData\Local\Temp\ipykernel_13780\3901817920.py", line 17, in <module>

  File "C:\Users\vinic\AppData\Local\Temp\ipykernel_13780\416793002.py", line 14, in train_model

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 320, in fit

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 121, in one_step_on_iterator

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 108, in one_step_on_data

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 54, in train_step

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\trainers\trainer.py", line 359, in _compute_loss

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\trainers\trainer.py", line 327, in compute_loss

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\trainers\compile_utils.py", line 611, in __call__

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\trainers\compile_utils.py", line 652, in call

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\losses\loss.py", line 60, in __call__

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\losses\losses.py", line 27, in call

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\losses\losses.py", line 1870, in sparse_categorical_crossentropy

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\ops\nn.py", line 1559, in sparse_categorical_crossentropy

  File "c:\Users\vinic\anaconda3\envs\myenv\lib\site-packages\keras\src\backend\tensorflow\nn.py", line 671, in sparse_categorical_crossentropy

Received a label value of 9 which is outside the valid range of [0, 9).  Label values: 2 1 9 7 1 7 8 9 3 7 3 2 9 5 7 6 5 8 4 6 4 2 6 9 8 3 1 5 7 9 8 6 1 3 9 6 3 4 6 7 6 9 7 3 7 2 3 6 9 5 7 6 9 4 4 1 7 4 2 3 5 1 9 5 5 6 9 4 3 8 6 9 6 2 5 8 4 4 2 4 7 5 7 9 4 6 8 1 1 8 3 8 9 5 4 7 4 4 1 6 6 2 7 9 2 3 1 2 4 8 5 8 5 3 2 1 8 4 4 4 7 8 1 9 3 3 1 6
	 [[{{node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_one_step_on_iterator_1433]

In [None]:
# Previsão usando o modelo treinado
# Previsão usando o modelo treinado
y_pred_proba = nn_model.predict(X_test_lb_encoder)  # Probabilidades para cada posição (1-9)
y_pred = np.argmax(y_pred_proba, axis=1) + 1  # Adiciona 1 porque as classes são de 1 a 9

print(classification_report(y_test_lb_encoder,y_pred))

## Relatório dos Modelos

In [61]:
def reduce_dict(report,model):
    """
    Compara os modelos fornecidos usando classification_report e retorna um resultado formatado.
    """
    results = {}
    #print(report[model])
        # Guardar as métricas principais
    results[model] = {
            "Precision (Macro)": report["macro avg"]["precision"],
            "Recall (Macro)": report["macro avg"]["recall"],
            "F1-Score (Macro)": report["macro avg"]["f1-score"],
            "Accuracy": report["accuracy"],
            "Full Report": report  # Inclui o relatório completo para a saída bonita
        }

    return results

In [62]:
def print_comparison(dict_models,jogada):
    """
    Exibe os resultados dos modelos em um formato similar ao classification_report.
    """
    result = {}
    
    for model in dict_models:
        result[model] = reduce_dict(report=dict_models[model][jogada],model=model)

    #print(result)

    print("\nModel Comparison Report\n")
    print("{:<20} {:<15} {:<15} {:<15} {:<15}".format("Model", "Precision (Macro)", "Recall (Macro)", "F1-Score (Macro)", "Accuracy"))
    print("-" * 80)
    
    #return result.items()
    for model, metrics in result.items():
        print("{:<20} {:<15.2f} {:<15.2f} {:<15.2f} {:<15.2f}".format(
            model,
            metrics[model]["Precision (Macro)"],
            metrics[model]["Recall (Macro)"],
            metrics[model]["F1-Score (Macro)"],
            metrics[model]["Accuracy"]
        ))


In [63]:
result = {}
for model in dict_knn_per_nei:
    result[model] = reduce_dict(report=dict_knn_per_nei[model][f'Jogada 1'],model=model)
result

{'Número de Vizinhos: 10': {'Número de Vizinhos: 10': {'Precision (Macro)': 0.5398062509496268,
   'Recall (Macro)': 0.5397757452338179,
   'F1-Score (Macro)': 0.538243352379907,
   'Accuracy': 0.5403679100664283,
   'Full Report': {'1': {'precision': 0.5992438563327032,
     'recall': 0.744131455399061,
     'f1-score': 0.6638743455497382,
     'support': 426.0},
    '2': {'precision': 0.5067567567567568,
     'recall': 0.5306603773584906,
     'f1-score': 0.5184331797235023,
     'support': 424.0},
    '3': {'precision': 0.49061032863849763,
     'recall': 0.4940898345153664,
     'f1-score': 0.4923439340400471,
     'support': 423.0},
    '4': {'precision': 0.532520325203252,
     'recall': 0.5435684647302904,
     'f1-score': 0.5379876796714579,
     'support': 482.0},
    '5': {'precision': 0.5160550458715596,
     'recall': 0.5269320843091335,
     'f1-score': 0.5214368482039398,
     'support': 427.0},
    '6': {'precision': 0.4702842377260982,
     'recall': 0.4396135265700483,

## Relatório

In [71]:
# comparação entre os diferentes modelos
for i in range(9):
    print(f'\nRodada {i+1}')
    print_comparison(models,f'Jogada {i+1}')



Rodada 1

Model Comparison Report

Model                Precision (Macro) Recall (Macro)  F1-Score (Macro) Accuracy       
--------------------------------------------------------------------------------
KNN                  0.53            0.53            0.52            0.53           
Naive Baynes         0.28            0.25            0.25            0.25           
Logistic Regression  0.33            0.34            0.33            0.34           
SVM                  0.70            0.70            0.70            0.70           

Rodada 2

Model Comparison Report

Model                Precision (Macro) Recall (Macro)  F1-Score (Macro) Accuracy       
--------------------------------------------------------------------------------
KNN                  0.46            0.46            0.46            0.46           
Naive Baynes         0.20            0.18            0.18            0.18           
Logistic Regression  0.22            0.23            0.22            0.23       

In [72]:
## KNN por número de vizinhos
for rod in range(9):
    print(f'\nJogada {rod+1}')
    print_comparison(dict_knn_per_nei,f'Jogada {rod+1}')
    


Jogada 1

Model Comparison Report

Model                Precision (Macro) Recall (Macro)  F1-Score (Macro) Accuracy       
--------------------------------------------------------------------------------
Número de Vizinhos: 1 0.89            0.89            0.89            0.89           
Número de Vizinhos: 2 0.69            0.67            0.67            0.67           
Número de Vizinhos: 3 0.53            0.53            0.52            0.53           
Número de Vizinhos: 4 0.52            0.52            0.52            0.52           
Número de Vizinhos: 5 0.55            0.54            0.54            0.55           
Número de Vizinhos: 6 0.55            0.54            0.54            0.54           
Número de Vizinhos: 7 0.55            0.54            0.54            0.54           
Número de Vizinhos: 8 0.54            0.54            0.54            0.54           
Número de Vizinhos: 9 0.54            0.54            0.54            0.54           
Número de Vizinhos: 1

In [73]:
valores = [1,-5**(0.5),1]
x = np.roots(valores)
print(f'Valor X1 = {x[0]}\nValor X2 = {x[1]}')


Valor X1 = 1.618033988749895
Valor X2 = 0.6180339887498948


In [74]:
for i in x:
    print(f'Teste com o valor de X: {i}') 
    print((i+(1/i))**2)

Teste com o valor de X: 1.618033988749895
5.000000000000001
Teste com o valor de X: 0.6180339887498948
5.000000000000001


In [75]:
for i in x:
    print(f'Para o valor X = {i}')
    print((i**3+(1/i**3))/5**(0.5))

Para o valor X = 1.618033988749895
2.0
Para o valor X = 0.6180339887498948
2.0000000000000004


In [3]:
a = [[1,2],[3,4]] 

In [4]:
a[0][1]

2

In [6]:
not False

True