# Treinamento Básico - RandomForestClassifier | K-Nearest Neighbors | Support Vector Machine
Este notebook treina um modelo simples usando o arquivo `features_v2.csv`.

In [6]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

import pickle



def hex_to_rgb(h):
    h = h.replace("#", "")
    try:
        return [
            int(h[0:2], 16),
            int(h[2:4], 16),
            int(h[4:6], 16)
        ]
    except:
        return [0, 0, 0]


df = pd.read_csv("/home/chines/Documentos/sistemas-inteligentes-rg/features_v3.csv")


df = df.drop(columns=['imagem'])


X = df.drop(columns=['classe'])
y = df['classe']


for col in ['color1_hex', 'color2_hex', 'color3_hex']:
    rgb_vals = np.array(X[col].apply(hex_to_rgb).tolist())
    X[f'{col}_r'] = rgb_vals[:, 0]
    X[f'{col}_g'] = rgb_vals[:, 1]
    X[f'{col}_b'] = rgb_vals[:, 2]

X = X.drop(columns=['color1_hex', 'color2_hex', 'color3_hex'])



le = LabelEncoder()
y = le.fit_transform(y)



X_train, X_test, y_train, y_test = train_test_split(
    X.reset_index(drop=True), 
    y, 
    test_size=0.25, 
    random_state=42
)



def treinar_modelo(nome, pipeline):
    print("\n" + "=" * 60)
    print(f" Treinando modelo: {nome}")
    print("=" * 60)

    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    print(f'\nAcurácia: {accuracy_score(y_test, y_pred)*100:.2f}%')
    print('\nRelatório de Classificação:')
    print(classification_report(y_test, y_pred))
    print('\nMatriz de Confusão:')
    print(confusion_matrix(y_test, y_pred))

    scores = cross_val_score(pipeline, X, y, cv=5)
    print("\nValidação Cruzada (5-fold):")
    print("Scores:", scores)
    print("Média:", scores.mean())

    pickle.dump(pipeline, open(f'modelo_rg_{nome}.pkl', 'wb'))
    pickle.dump(le, open('label_encoder.pkl', 'wb'))

    print(f"\nModelo salvo como modelo_rg_{nome}.pkl")
    print("\n======================================================")



pipeline_knn = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', KNeighborsClassifier(n_neighbors=5, weights='distance'))
])

pipeline_rf = Pipeline([
    ('clf', RandomForestClassifier(
        n_estimators=400,
        max_depth=12,
        random_state=42,
        class_weight='balanced'
    ))
])

pipeline_svm = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', SVC(kernel='rbf', C=2, gamma='scale', probability=True))
])



treinar_modelo("knn", pipeline_knn)
treinar_modelo("random_forest", pipeline_rf)
treinar_modelo("svm", pipeline_svm)



 Treinando modelo: knn

Acurácia: 100.00%

Relatório de Classificação:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00        24
           2       1.00      1.00      1.00        29

    accuracy                           1.00        57
   macro avg       1.00      1.00      1.00        57
weighted avg       1.00      1.00      1.00        57


Matriz de Confusão:
[[ 4  0  0]
 [ 0 24  0]
 [ 0  0 29]]

Validação Cruzada (5-fold):
Scores: [0.97777778 0.93333333 1.         0.97777778 1.        ]
Média: 0.9777777777777779

Modelo salvo como modelo_rg_knn.pkl


 Treinando modelo: random_forest

Acurácia: 100.00%

Relatório de Classificação:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00        24
           2       1.00      1.00      1.00        29

    accuracy                   