INSTALAÇÃO PYCARET

In [1]:
!pip install pycaret

Collecting pycaret
  Downloading pycaret-3.2.0-py3-none-any.whl (484 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.7/484.7 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting category-encoders>=2.4.0 (from pycaret)
  Downloading category_encoders-2.6.3-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting deprecation>=2.1.0 (from pycaret)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Collecting kaleido>=0.2.1 (from pycaret)
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
Collecting matplotlib<=3.6,>=3.3.0 (from pycaret)
  Downloading matplotlib-3.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/1

IMPORTAÇÃO DE BIBLIOTECAS

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from pycaret.classification import *

CARREGAMENTO DO CONJUNTO DE DADOS

In [4]:
url = 'csgo_data/csgo_round_snapshots.xlsx'
csgo_data = pd.read_excel(url)

DEFINIÇÃO DA VARIÁVEL ALVO

In [5]:
target_variable = 'round_winner'

REMOVENDO A COLUNA MAP

In [6]:
csgo_data.drop(['map'], axis=1, inplace=True)

SEPARAÇÃO DE FEATURES E TAGS

In [7]:
X = csgo_data.drop(target_variable, axis=1)
y = csgo_data[target_variable]

DIVISÃO DO CONJUNTO DE DADOS

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


DEFINIÇÃO DE TRANSFORMAÇÕES PARA DADOS NUMÉRICOS E CATEGÓRICOS

In [9]:
numeric_features = X.select_dtypes(include=['number']).columns
categorical_features = X.select_dtypes(include=['object']).columns

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

# Aplicar transformações
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])


TRANSFORMANDO DADOS DE TREINO E TESTE

In [10]:
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

CONFIGURANDO O AMBIENTE DO PYCARET

In [11]:
clf1 = setup(data=pd.concat([X_train, y_train], axis=1), target=target_variable, session_id=42)

Unnamed: 0,Description,Value
0,Session id,42
1,Target,round_winner
2,Target type,Binary
3,Target mapping,"CT: 0, T: 1"
4,Original data shape,"(91807, 93)"
5,Transformed data shape,"(91807, 93)"
6,Transformed train set shape,"(64264, 93)"
7,Transformed test set shape,"(27543, 93)"
8,Numeric features,92
9,Preprocess,True


COMPARANDO MODELOS E AVALIANDO DESEMPENHO

In [12]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8469,0.9296,0.8469,0.8473,0.8469,0.6938,0.6941,15.739
rf,Random Forest Classifier,0.8424,0.9258,0.8424,0.8428,0.8424,0.6848,0.6851,12.778
xgboost,Extreme Gradient Boosting,0.7887,0.8868,0.7887,0.7892,0.7888,0.5776,0.578,2.624
dt,Decision Tree Classifier,0.7799,0.7819,0.7799,0.7799,0.7799,0.5596,0.5596,1.586
lightgbm,Light Gradient Boosting Machine,0.7747,0.8739,0.7747,0.7764,0.7745,0.5498,0.5512,3.964
gbc,Gradient Boosting Classifier,0.752,0.8552,0.752,0.7566,0.7514,0.5051,0.509,18.303
ridge,Ridge Classifier,0.7471,0.0,0.7471,0.7479,0.7471,0.4944,0.495,0.517
lda,Linear Discriminant Analysis,0.7471,0.8406,0.7471,0.7479,0.7471,0.4944,0.495,2.257
ada,Ada Boost Classifier,0.7413,0.8395,0.7413,0.7444,0.7408,0.4833,0.4859,4.888
lr,Logistic Regression,0.7394,0.8274,0.7394,0.7406,0.7394,0.4792,0.4801,12.727


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

AVALIANDO O MODELO NO CONJUNTO DE TESTE

In [13]:
evaluate_model(best_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

FAZENDO PREVISÕES NO CONJUNTO DE TESTE

In [14]:
predictions = predict_model(best_model, data=X_test)

IDENTIFICANDO A COLUNA DE PREVISÕES DINAMICAMENTE

In [15]:
prediction_column = predictions.columns[predictions.columns.str.contains('Label', case=False)].tolist()
if not prediction_column:
    raise ValueError("Nenhuma coluna de previsão encontrada. Verifique a estrutura da saída do PyCaret.")
else:
    prediction_column = prediction_column[0]


AVALIANDO PREVISÕES DE MELHOR TREINAMENTO

In [16]:
rf_accuracy_pycaret = accuracy_score(y_test, predictions[prediction_column])
print(f"Melhor Modelo (PyCaret) Accuracy: {rf_accuracy_pycaret:.2f}")
print("Melhor Modelo (PyCaret) Classification Report:")
print(classification_report(y_test, predictions[prediction_column]))

Melhor Modelo (PyCaret) Accuracy: 0.85
Melhor Modelo (PyCaret) Classification Report:
              precision    recall  f1-score   support

          CT       0.85      0.86      0.85     15053
           T       0.86      0.85      0.86     15550

    accuracy                           0.85     30603
   macro avg       0.85      0.85      0.85     30603
weighted avg       0.85      0.85      0.85     30603



=======================================ETAPA 2=======================================

IMPORTAÇÃO DE BIBLIOTECAS

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from pycaret.classification import *


CARREGAMENTO DO CONJUNTO DE DADOS

In [18]:
url = 'csgo_data/csgo_round_snapshots.xlsx'
csgo_data = pd.read_excel(url)

DIVISÃO DO CONJUNTO DE DADOS

In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    csgo_data.drop('round_winner', axis=1),
    csgo_data['round_winner'],
    test_size=0.25,
    random_state=42
)

CONFIGURANDO O AMBIENTE DO PYCARET

In [20]:
clf1 = setup(data=pd.concat([X_train, y_train], axis=1), target='round_winner', session_id=42)

Unnamed: 0,Description,Value
0,Session id,42
1,Target,round_winner
2,Target type,Binary
3,Target mapping,"CT: 0, T: 1"
4,Original data shape,"(91807, 94)"
5,Transformed data shape,"(91807, 101)"
6,Transformed train set shape,"(64264, 101)"
7,Transformed test set shape,"(27543, 101)"
8,Numeric features,92
9,Categorical features,1


COMPARANDO MODELOS E AVALIANDO DESEMPENHO

In [21]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.859,0.9371,0.859,0.8594,0.859,0.7181,0.7183,17.34
rf,Random Forest Classifier,0.8508,0.9317,0.8508,0.8512,0.8508,0.7016,0.7019,13.955
xgboost,Extreme Gradient Boosting,0.7968,0.8907,0.7968,0.7973,0.7968,0.5937,0.5941,2.871
dt,Decision Tree Classifier,0.7852,0.7865,0.7852,0.7852,0.7852,0.5702,0.5702,1.989
lightgbm,Light Gradient Boosting Machine,0.7792,0.8759,0.7792,0.7806,0.7791,0.5587,0.5599,5.16
gbc,Gradient Boosting Classifier,0.7554,0.8565,0.7554,0.759,0.755,0.5117,0.5147,19.947
ridge,Ridge Classifier,0.7455,0.0,0.7455,0.7465,0.7454,0.4913,0.492,1.023
lda,Linear Discriminant Analysis,0.7455,0.8414,0.7455,0.7466,0.7455,0.4914,0.4921,2.454
ada,Ada Boost Classifier,0.7449,0.8402,0.7449,0.7477,0.7446,0.4907,0.493,5.589
lr,Logistic Regression,0.7425,0.8303,0.7425,0.7441,0.7423,0.4855,0.4868,14.655


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

AVALIANDO MODELO NO CONJUNTO DE TESTES

In [22]:
evaluate_model(best_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

FAZER PREVISÕES NO CONJUNTO DE TESTES

In [23]:
predictions = predict_model(best_model, data=X_test)

IDENTIFICANDO A COLUNA DE PREVISÃO DE MANEIRA DINAMICA

In [24]:
prediction_column = predictions.columns[predictions.columns.str.contains('Label', case=False)].tolist()
if not prediction_column:
    raise ValueError("Nenhuma coluna de previsão encontrada. Verifique a estrutura da saída do PyCaret.")
else:
    prediction_column = prediction_column[0]

AVALIANDO A PREVISÃO

In [25]:
rf_accuracy_pycaret = accuracy_score(y_test, predictions[prediction_column])
print(f"Melhor Modelo (PyCaret) Accuracy: {rf_accuracy_pycaret:.2f}")
print("Melhor Modelo (PyCaret) Classification Report:")
print(classification_report(y_test, predictions[prediction_column]))


Melhor Modelo (PyCaret) Accuracy: 0.87
Melhor Modelo (PyCaret) Classification Report:
              precision    recall  f1-score   support

          CT       0.86      0.87      0.86     15053
           T       0.87      0.86      0.87     15550

    accuracy                           0.87     30603
   macro avg       0.87      0.87      0.87     30603
weighted avg       0.87      0.87      0.87     30603



CRIANDO PIPELINE

In [26]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', best_model)
])


TREINANDO PIPELINE NO CONJUNTO DE TREINO

In [27]:
pipeline.fit(X_train, y_train)

FAZENDO PREVISÕES NO CONJUNTO DE TESTES

In [28]:
predictions_pipeline = pipeline.predict(X_test)

MOSTRANDO PREVISÕES

In [29]:
print("Predições do Pipeline:")
print(predictions_pipeline)

Predições do Pipeline:
['CT' 'T' 'CT' ... 'CT' 'T' 'T']


CALCULANDO E EXIBINDO MÉTRICAS

In [30]:
from sklearn.metrics import accuracy_score, classification_report

# Calcular e exibir métricas
accuracy = accuracy_score(y_test, predictions_pipeline)
classification_report_result = classification_report(y_test, predictions_pipeline)

print(f"Acurácia: {accuracy:.2f}")
print("Relatório de Classificação:")
print(classification_report_result)

Acurácia: 0.88
Relatório de Classificação:
              precision    recall  f1-score   support

          CT       0.87      0.88      0.88     15053
           T       0.88      0.87      0.88     15550

    accuracy                           0.88     30603
   macro avg       0.88      0.88      0.88     30603
weighted avg       0.88      0.88      0.88     30603

