In [1]:
!pip -q install fastapi uvicorn scikit-learn joblib pyngrok

# Creación de directorios y alojamiento de modelo

In [4]:
import pandas as pd, joblib
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# -------- Rutas --------
BASE  = Path.cwd()
RAW   = BASE / "data" / "raw" / "data_limpia.csv"
if not RAW.exists():
    RAW = BASE / "data_limpia.csv"

MODEL = BASE / "deployment" / "fastapi_app" / "model.joblib"
MODEL.parent.mkdir(parents=True, exist_ok=True)

# -------- Datos + features --------
df = pd.read_csv(RAW)
df["mean_grade"] = df[["Curricular units 1st sem (grade)",
                       "Curricular units 2nd sem (grade)"]].mean(axis=1)
df["total_approved"] = (df["Curricular units 1st sem (approved)"] +
                        df["Curricular units 2nd sem (approved)"])
cat_vars = ["Gender", "Scholarship holder", "International",
            "Debtor", "Displaced", "Marital status"]
df = pd.get_dummies(df, columns=cat_vars, drop_first=True)
df["Target_bin"] = df["Target"].str.lower().str.contains("drop").astype(int)

X = df.drop(["Target", "Target_bin"], axis=1)
y = df["Target_bin"]

X_tr, X_te, y_tr, y_te = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# -------- Modelo ORIGINAL --------
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_tr, y_tr)
print(classification_report(y_te, rf.predict(X_te)))

joblib.dump(rf, MODEL, compress=3)
print("✔ Modelo guardado en", MODEL)

              precision    recall  f1-score   support

           0       0.88      0.95      0.91       601
           1       0.86      0.74      0.80       284

    accuracy                           0.88       885
   macro avg       0.87      0.84      0.86       885
weighted avg       0.88      0.88      0.88       885

✔ Modelo guardado en /content/deployment/fastapi_app/model.joblib


In [5]:
import pandas as pd, joblib
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

BASE  = Path("/content")      # ajusta si trabajas en otra ruta
RAW   = BASE / "data_limpia.csv"
MODEL = BASE / "model.joblib"  # ← se sobrescribirá

# 1. Datos + feature-engineering idéntico
df = pd.read_csv(RAW)
df["mean_grade"] = df[["Curricular units 1st sem (grade)",
                       "Curricular units 2nd sem (grade)"]].mean(axis=1)
df["total_approved"] = (df["Curricular units 1st sem (approved)"] +
                        df["Curricular units 2nd sem (approved)"])
cat_vars = ["Gender", "Scholarship holder", "International",
            "Debtor", "Displaced", "Marital status"]
df = pd.get_dummies(df, columns=cat_vars, drop_first=True)
df["Target_bin"] = df["Target"].str.lower().str.contains("drop").astype(int)

X = df.drop(["Target", "Target_bin"], axis=1)
y = df["Target_bin"]

X_tr, X_te, y_tr, y_te = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# 2. Modelo ORIGINAL: 100 árboles
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_tr, y_tr)
print(classification_report(y_te, rf.predict(X_te)))

joblib.dump(rf, MODEL, compress=3)
print("✔ Modelo guardado en", MODEL)

              precision    recall  f1-score   support

           0       0.88      0.95      0.91       601
           1       0.86      0.74      0.80       284

    accuracy                           0.88       885
   macro avg       0.87      0.84      0.86       885
weighted avg       0.88      0.88      0.88       885

✔ Modelo guardado en /content/model.joblib


# Escritura main.py

In [6]:
%%writefile /content/main.py
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Dict, Any
import joblib, pandas as pd
from pathlib import Path

model = joblib.load(Path(__file__).with_name("model.joblib"))

app = FastAPI(
    title="Student Dropout Prediction API",
    description="Devuelve etiqueta (0/1) y probabilidad de abandono",
    version="1.0.0"
)

class Record(BaseModel):
    inputs: List[Dict[str, Any]]

@app.post("/predict")
async def predict(data: Record):
    df = pd.DataFrame(data.inputs)
    proba = model.predict_proba(df)[:, 1]
    preds = (proba >= 0.5).astype(int)
    return {"predictions": preds.tolist(),
            "prob_dropout": proba.round(3).tolist()}

Writing /content/main.py


# Activación uvicorn

In [7]:
import subprocess, time, psutil, signal, re, os

# Mata procesos uvicorn viejos (para evitar puertos ocupados)
for p in psutil.process_iter(['pid', 'cmdline']):
    if 'uvicorn' in ' '.join(p.info['cmdline']):
        os.kill(p.pid, signal.SIGTERM)

uvicorn_proc = subprocess.Popen(
    ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)
time.sleep(3)   # espera a "Application startup complete."
print("✅ Uvicorn PID:", uvicorn_proc.pid)

✅ Uvicorn PID: 1460


# Apertura de tunel en ngrok para ver despliegue

In [8]:
from pyngrok import ngrok, conf
conf.get_default().auth_token = "token"

public_url = ngrok.connect(8000, "http").public_url
print("🌐 URL pública:", public_url)
print("📄 Swagger docs:", public_url + "/docs")

🌐 URL pública: https://e2e8d71ba494.ngrok-free.app
📄 Swagger docs: https://e2e8d71ba494.ngrok-free.app/docs


In [9]:
import requests, json

sample = {"inputs": [X_te.iloc[0].to_dict(),
                     X_te.iloc[1].to_dict()]}

resp = requests.post(f"{public_url}/predict", json=sample)
print("Status :", resp.status_code)
print("Respuesta:", resp.json())

Status : 200
Respuesta: {'predictions': [0, 1], 'prob_dropout': [0.05, 0.55]}


# Muestra

In [10]:
sample

{'inputs': [{'Application mode': 1,
   'Application order': 2,
   'Course': 9670,
   'Daytime/evening attendance\t': 1,
   'Previous qualification': 1,
   'Previous qualification (grade)': 131.0,
   'Nacionality': 1,
   "Mother's qualification": 1,
   "Father's qualification": 19,
   "Mother's occupation": 4,
   "Father's occupation": 7,
   'Admission grade': 135.9,
   'Educational special needs': 0,
   'Tuition fees up to date': 1,
   'Age at enrollment': 18,
   'Curricular units 1st sem (credited)': 0,
   'Curricular units 1st sem (enrolled)': 6,
   'Curricular units 1st sem (evaluations)': 6,
   'Curricular units 1st sem (approved)': 6,
   'Curricular units 1st sem (grade)': 13.166666666666666,
   'Curricular units 1st sem (without evaluations)': 0,
   'Curricular units 2nd sem (credited)': 0,
   'Curricular units 2nd sem (enrolled)': 6,
   'Curricular units 2nd sem (evaluations)': 8,
   'Curricular units 2nd sem (approved)': 6,
   'Curricular units 2nd sem (grade)': 12.428571428571