# Treinamento Básico - RandomForestClassifier
Este notebook treina um modelo simples usando o arquivo `features.csv`.

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle


def hex_to_rgb(h):
    try:
        h = str(h).strip().lstrip('#')
        if len(h) != 6:
            return [0, 0, 0]
        return [int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)]
    except:
        return [0, 0, 0]

df = pd.read_csv('/home/chines/Documentos/sistemas-inteligentes-rg/features_v3.csv')

print("Linhas carregadas:", len(df))
print("Colunas:", df.columns.tolist())

drop_cols = ['imagem', 'tem_qrcode', 'tem_digital']
df = df.drop(columns=[c for c in drop_cols if c in df.columns], errors="ignore")


X = df.drop(columns=['classe'])
y = df['classe']

hex_cols = ['color1_hex', 'color2_hex', 'color3_hex']

for col in hex_cols:
    if col in X.columns:
        rgb = X[col].apply(hex_to_rgb).tolist()
        rgb = np.array(rgb)

        X[f'{col}_r'] = rgb[:, 0]
        X[f'{col}_g'] = rgb[:, 1]
        X[f'{col}_b'] = rgb[:, 2]

X = X.drop(columns=[c for c in hex_cols if c in X.columns], errors="ignore")

#
X = X.fillna(0)


le = LabelEncoder()
y = le.fit_transform(y)


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

model = Pipeline(steps=[
    ('clf', GradientBoostingClassifier(
        learning_rate=0.05,
        n_estimators=300,
        max_depth=3,
        subsample=0.9,
        random_state=42
    ))
])

model.fit(X_train, y_train)


y_pred = model.predict(X_test)

print(f'Acurácia: {accuracy_score(y_test, y_pred)*100:.2f}%')
print('\nRelatório de Classificação:')
print(classification_report(y_test, y_pred))
print('\nMatriz de Confusão:')
print(confusion_matrix(y_test, y_pred))

pickle.dump(model, open('modelo_rg.pkl', 'wb'))
pickle.dump(le, open('label_encoder.pkl', 'wb'))


Linhas carregadas: 58
Colunas: ['imagem', 'classe', 'tem_qrcode', 'tem_digital', 'color1_hex', 'color2_hex', 'color3_hex', 'tem_nome', 'tem_nome_social', 'tem_nacionalidade', 'qtd_palavras', 'qtd_numeros', 'brilho_medio', 'contraste', 'bordas_normalizado', 'area_foto_normalizada']
Acurácia: 86.67%

Relatório de Classificação:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.83      0.83      0.83         6
           2       0.83      0.83      0.83         6

    accuracy                           0.87        15
   macro avg       0.89      0.89      0.89        15
weighted avg       0.87      0.87      0.87        15


Matriz de Confusão:
[[3 0 0]
 [0 5 1]
 [0 1 5]]
