# 📊 Análisis de Churn desde dos fuentes de datos
Este notebook ejecuta el análisis predictivo de churn desde:
1. Un archivo CSV
2. Una base de datos SQLite

## 🔹 Parte 1: Lectura desde CSV

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Leer CSV desde GitHub
df_csv = pd.read_csv("https://raw.githubusercontent.com/christanov/telco-churn-project/main/data/telco_dataset_final.csv")
df_csv.head()

In [None]:
# Preparar y entrenar modelo desde CSV
features = ['tenure', 'MonthlyCharges', 'TotalCharges', 'Contract', 'InternetService',
            'OnlineSecurity', 'TechSupport', 'OnlineBackup', 'DeviceProtection']
X = df_csv[features].copy()
y = df_csv['Churn']

# Codificación
for col in X.select_dtypes(include='object').columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

# Entrenamiento
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train[X.columns] = scaler.fit_transform(X_train[X.columns])
X_test[X.columns] = scaler.transform(X_test[X.columns])

model_csv = RandomForestClassifier(n_estimators=100, random_state=42)
model_csv.fit(X_train, y_train)
y_pred = model_csv.predict(X_test)

print("📘 Reporte desde CSV:")
print(classification_report(y_test, y_pred))

## 🔸 Parte 2: Lectura desde base de datos SQLite

In [None]:
import sqlite3
import urllib.request

# Descargar DB desde GitHub raw
db_path = "telco_churn.db"
urllib.request.urlretrieve("https://raw.githubusercontent.com/christanov/telco-churn-project/main/data/telco_churn.db", db_path)

# Conectar y consultar
conn = sqlite3.connect(db_path)
df_sql = pd.read_sql_query("SELECT * FROM telco_customer", conn)
df_sql.head()

In [None]:
# Preparar y entrenar modelo desde base de datos
X2 = df_sql[features].copy()
y2 = df_sql['Churn']

for col in X2.select_dtypes(include='object').columns:
    le = LabelEncoder()
    X2[col] = le.fit_transform(X2[col])

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)
X2_train[X2.columns] = scaler.fit_transform(X2_train[X2.columns])
X2_test[X2.columns] = scaler.transform(X2_test[X2.columns])

model_sql = RandomForestClassifier(n_estimators=100, random_state=42)
model_sql.fit(X2_train, y2_train)
y2_pred = model_sql.predict(X2_test)

print("📘 Reporte desde SQLite DB:")
print(classification_report(y2_test, y2_pred))