<a href="https://colab.research.google.com/github/Welberth77/Projeto-IA/blob/main/student-performance-repo/notebooks/analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Instalar dependências

!pip install pandas numpy scikit-learn matplotlib joblib

# 2. Imports principais

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, roc_auc_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import joblib
import requests, zipfile, io

# 3. Download automático do dataset

print("Baixando dataset do UCI...")
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00320/student.zip'
r = requests.get(url)
z = zipfile.ZipFile(io.BytesIO(r.content))
df = pd.read_csv(z.open('student-mat.csv'), sep=';')

print("\nDataset carregado com sucesso!")
df.head()


In [None]:
# 4. Preparação dos dados

df['pass'] = (df['G3'] >= 10).astype(int)

numeric_features = ['age', 'absences', 'G1', 'G2']
categorical_features = ['sex', 'school', 'address', 'famsize', 'Pstatus']

X = pd.get_dummies(df[numeric_features + categorical_features], drop_first=True)

y_reg = df['G3']
y_clf = df['pass']

X_train, X_test, yreg_train, yreg_test, yclf_train, yclf_test = train_test_split(
    X, y_reg, y_clf, test_size=0.2, random_state=42, stratify=y_clf
)


In [None]:
# 5. Regressão Linear

lr = LinearRegression()
lr.fit(X_train, yreg_train)
pred_reg = lr.predict(X_test)

r2 = r2_score(yreg_test, pred_reg)
rmse = np.sqrt(mean_squared_error(yreg_test, pred_reg))

print("\nRegressão Linear")
print("R²:", r2)
print("RMSE:", rmse)


In [None]:
# 6. Regressão Logística

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, yclf_train)

pred_clf = clf.predict(X_test)
probs = clf.predict_proba(X_test)[:,1]

acc = accuracy_score(yclf_test, pred_clf)
auc = roc_auc_score(yclf_test, probs)

print("\nRegressão Logística")
print("Accuracy:", acc)
print("AUC:", auc)
print("Confusion Matrix:\n", confusion_matrix(yclf_test, pred_clf))
print("\nClassification Report:\n", classification_report(yclf_test, pred_clf))


In [None]:
# 7. Visualização simples

plt.figure(figsize=(6,4))
plt.scatter(yreg_test, pred_reg)
plt.xlabel("Real")
plt.ylabel("Predito")
plt.title("Regressão Linear — Notas")
plt.grid(True)
plt.show()


In [None]:
# 8. Salvar modelos (opcional)

joblib.dump(lr, "linear_regression.joblib")
joblib.dump(clf, "logistic_regression.joblib")

print("\nModelos salvos!")


In [None]:
import joblib
from pprint import pprint
m = joblib.load('../models/linear_regression.joblib')
print(m.keys())


**Note:** run `python src/train_models.py --data data/student-mat.csv --output models` before executing cells.