In [None]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

pd.set_option("display.max_columns", 200)

candidates = [
    "../data/WA_Fn-UseC_-Telco-Customer-Churn.csv",
    "../data/telco.csv"
]
csv = next((p for p in candidates if Path(p).exists()), None)
assert csv, "Place le CSV telco dans ../data/"
df = pd.read_csv(csv)
df.head()

In [None]:
df.info()
df.describe(include="all").T
df.isna().mean().sort_values(ascending=False).head(15)

In [None]:
if "TotalCharges" in df.columns:
    df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
    df["TotalCharges"] = df["TotalCharges"].fillna(df["TotalCharges"].median())
df.isna().sum().sort_values(ascending=False).head(10)

In [None]:
df["Churn"] = df["Churn"].astype(str)
print(df["Churn"].value_counts(normalize=True))
sns.countplot(x="Churn", data=df); plt.title("Répartition Churn"); plt.show()

In [None]:
num = df.select_dtypes(include=[np.number]).columns
cat = df.select_dtypes(exclude=[np.number]).columns.drop("Churn", errors="ignore")

df[num].hist(bins=30, figsize=(12,8)); plt.tight_layout(); plt.show()

for c in cat[:6]:
    plt.figure(figsize=(5,3))
    sns.countplot(x=c, data=df, hue="Churn")
    plt.title(c); plt.xticks(rotation=45); plt.tight_layout(); plt.show()

In [None]:
corr = df[num].corr(numeric_only=True)
plt.figure(figsize=(8,6))
sns.heatmap(corr, cmap="coolwarm", center=0)
plt.title("Corrélation variables numériques")
plt.show()
