In [None]:
import pandas as pd
import numpy as np
from typing import Tuple, List
from sklearn.model_selection import train_test_split


def load_processsed_data(filepath: str) -> pd.DataFrame:
    """Load processed data from csv

    Args:
        filepath (str): Path to processed data CSV.

    Returns:
        pd.DataFrame: Loaded dataframe
    """
    return pd.read_csv(filepath)


def handle_missing_values(df: pd.DataFrame) -> pd.DataFrame:
    """Handle missing values in the dataframe.
       For this dataset, convert "TotalCharge" to numeric, coerce errors, then fill missing with median.

    Args:
        df (pd.DataFrame): Input dataframe.

    Returns:
        pd.DataFrame: Dataframe with missing values handled.
    """
    df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
    median_value = df["TotalCharges"].median()
    df["TotalCharges"].fillna(median_value, inplace=True)
    return df


    def encode_binary_feature(df: pd.DataFrame, binary_cols: List[str]) -> pd.DataFrame:
        """Encode binary categorical feature to 0 or 1.

        Args:
            df (pd.DataFrame): Input dataframe.
            binary_cols (List[str]): List of binary columns to encode.

        Returns:
            pd.DataFrame: Dataframe with binary columns encoded.
        """
        for col in binary_cols: 
            df[col] = df[col].map({"Yes": 1, "No": 0})
        return df


        def encode_categorical_features(df: pd.DataFrame, categorical_cols: List[str]) -> pd.DataFrame:
            """One-hot encode categorical features with more than two categories.

            Args:
                df (pd.DataFrame): Input dataframe.
                categorical_cols (List[str]): List of categorical columns.

            Returns:
                pd.DataFrame: Dataframe with one-hot encoded columns.
            """
            df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
            return df


        def scale_features(df: pd.DataFrame, numberic_cols: List[str]) -> Tuple[pd.DataFrame, StandardScale]:
            """Scale numeric features using StandardScaler.

            Args:
                df (pd.DataFrame): Input dataframe.
                numberic_cols (List[str]): List of numeric columns to scale.

            Returns:
                Tuple[pd.DataFrame, StandardScale]: Dataframe with scaled features and scaler object.
            """
            scaler = StandardScaler()
            df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
            return df, scaler
        

        def split_data(df: pd.DataFrame, target_col: str, test_size: float = 0.2, random_state: int = 42) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
            """Split dataframe into train and test sets.

            Args:
                df (pd.DataFrame): Input dataframe.
                target_col (str): Target variable name.
                test_size (float, optional): Proportion of data for test set. Defaults to 0.2.
                random_state (int, optional): Random seed. Defaults to 42.

            Returns:
                Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]: The splitted dataset and their labels.
            """
            X = df.drop(columns=[target_col])
            y = df[target_col]
            X_train, X_test, y_train, y_tes = train_test_split(
                x, y, test_size=test_size, random_state=random_state, stratify=y
            )
            return X_train, X_test, y_train, y_test
