# 🌾 Classificação de Grãos de Trigo com Scikit-Learn

Notebook final da atividade - FASE 04/CTWP/Cap11

In [None]:
# 📦 Importação de bibliotecas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

## 📥 Carregamento do Dataset

In [None]:
df = pd.read_csv('seeds_dataset.csv')
df.head()

## 🔎 Análise Exploratória

In [None]:
df.describe()

In [None]:
sns.set(style="whitegrid")
df.hist(bins=15, figsize=(15, 10), color='steelblue')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(data=df)
plt.xticks(rotation=45)
plt.title("Boxplots dos atributos")
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap="Blues")
plt.title("Correlação entre atributos")
plt.show()

In [None]:
sns.pairplot(df, hue="classe", palette="tab10", diag_kind="hist")
plt.suptitle("Gráfico de Dispersão - Seeds Dataset", y=1.02)
plt.show()

## 🧹 Pré-processamento

In [None]:
X = df.drop("classe", axis=1)
y = df["classe"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

## 🤖 Treinamento Inicial dos Modelos

In [None]:
# KNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# SVM
svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

## 📊 Avaliação Inicial dos Modelos

In [None]:
def avaliar(nome, y_true, y_pred):
    print(f"Modelo: {nome}")
    print("Acurácia:", accuracy_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))
    print("Matriz de Confusão:\n", confusion_matrix(y_true, y_pred))
    print("\n")

avaliar("KNN", y_test, y_pred_knn)
avaliar("SVM", y_test, y_pred_svm)
avaliar("Random Forest", y_test, y_pred_rf)

## 🔧 Otimização com GridSearchCV (SVM)

In [None]:
param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=5, scoring='accuracy')
grid_svm.fit(X_train, y_train)

best_svm = grid_svm.best_estimator_
y_pred_best_svm = best_svm.predict(X_test)

avaliar("SVM Otimizado", y_test, y_pred_best_svm)
print("Melhores parâmetros:", grid_svm.best_params_)