In [2]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Paths
_CWD = Path.cwd()
if (_CWD / "data").exists():               # we're already at project root
    PROJECT_ROOT = _CWD
else:                                      # we’re inside notebooks/
    PROJECT_ROOT = _CWD.parent

CLEAN_PATH = PROJECT_ROOT / "data" / "clean" / "telco_clean.parquet"
df = pd.read_parquet(CLEAN_PATH)
FIG_DIR    = Path("figures")
FIG_DIR.mkdir(exist_ok=True)

# Load cleaned data
df = pd.read_parquet(CLEAN_PATH)
print("Data shape:", df.shape)


Data shape: (7043, 21)


## 2. Numeric Feature Histograms  

In [4]:
num_cols = df.select_dtypes(include="number").columns.tolist()
print("Numeric columns:", num_cols)


Numeric columns: ['tenure', 'MonthlyCharges', 'TotalCharges']


In [7]:
for col in num_cols:
    plt.figure(figsize=(6, 4))
    sns.histplot(df[col], kde=True)
    plt.title(f"Histogram of {col}")
    plt.xlabel(col)
    plt.ylabel("Count")
    for ext in ("png", "svg"):
        plt.savefig(FIG_DIR / f"dist_{col}_hist.{ext}", bbox_inches="tight")
    plt.close()

## 3. Numeric Feature Boxplots  

In [9]:
for col in num_cols:
    plt.figure(figsize=(4, 6))
    sns.boxplot(y=df[col])
    plt.title(f"Boxplot of {col}")
    plt.ylabel(col)
    for ext in ("png", "svg"):
        plt.savefig(FIG_DIR / f"dist_{col}_box.{ext}", bbox_inches="tight")
    plt.close()

## 4. Categorical Feature Countplots  

In [10]:
cat_cols = df.select_dtypes(include=["category", "object", "bool"]).columns.tolist()
print("Categorical columns:", cat_cols)

Categorical columns: ['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'Churn']


In [11]:
for col in cat_cols:
    plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x=col, order=df[col].value_counts().index)
    plt.xticks(rotation=45, ha="right")
    plt.title(f"Countplot of {col}")
    plt.xlabel(col)
    plt.ylabel("Count")
    for ext in ("png", "svg"):
        plt.savefig(FIG_DIR / f"dist_{col}_count.{ext}", bbox_inches="tight")
    plt.close()
