In [7]:
import os
import logging
import pandas as pd
import churn_library as cls

logging.basicConfig(
    filename='./logs/churn_library.log',
    level=logging.INFO,
    filemode='w',
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

def test_import(import_data):
    """
    Test data import - this example is completed for you to assist with the other test functions.
    """
    try:
        df = import_data("./data/bank_data.csv")
        logging.info("Testing import_data: SUCCESS")
    except FileNotFoundError as err:
        logging.error("Testing import_data: The file wasn't found - %s", err)
        raise err

    try:
        assert df.shape[0] > 0
        assert df.shape[1] > 0
    except AssertionError as err:
        logging.error("Testing import_data: The file doesn't appear to have rows and columns - %s", err)
        raise err

def test_eda(perform_eda, df):
    """
    Test perform EDA function.
    """
    try:
        perform_eda(df)
        logging.info("Testing perform_eda: SUCCESS")
    except Exception as err:
        logging.error("Testing perform_eda: Failed to perform EDA - %s", err)
        raise err


def test_encoder_helper(encoder_helper, df):
    """
    Test the encoder helper function by ensuring it successfully adds encoded columns to the dataframe.

    Args:
        encoder_helper (function): The encoder function to test.
        df (DataFrame): The dataframe to test the encoder function on.

    Raises:
        Exception: If the 'Churn' column is missing or if the encoded columns are not added correctly.
    """
    # Check if the 'Churn' column exists in the DataFrame, which is required for the encoder function
    if 'Churn' not in df.columns:
        logging.error("Churn column is not present in the DataFrame for testing.")
        raise Exception("Test data does not include the expected 'Churn' column.")

    category_lst = ['Gender', 'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category']
    expected_columns = [f"{cat}_Churn" for cat in category_lst]

    try:
        # Call the encoder_helper function with the test dataframe and category list
        df_encoded = encoder_helper(df, category_lst)

        # Check if all expected encoded columns are present in the dataframe
        missing_cols = [col for col in expected_columns if col not in df_encoded.columns]
        assert not missing_cols, f"Missing encoded columns: {missing_cols}"

        logging.info("Testing encoder_helper: SUCCESS")

    except AssertionError as err:
        logging.error(f"Testing encoder_helper: Failed - {err}")
        raise

    except Exception as err:
        logging.error(f"Testing encoder_helper encountered an unexpected error: {err}")
        raise


def test_perform_feature_engineering(perform_feature_engineering, df):
    """
    Test perform feature engineering function.
    """
    try:
        X_train, X_test, y_train, y_test = perform_feature_engineering(df, 'Churn')
        assert X_train.shape[0] > 0
        assert X_test.shape[0] > 0
        assert y_train.shape[0] > 0
        assert y_test.shape[0] > 0
        logging.info("Testing perform_feature_engineering: SUCCESS")
    except AssertionError as err:
        logging.error("Testing perform_feature_engineering: Failed to split data - %s", err)
        raise err


def test_train_models(train_models, X_train, X_test, y_train, y_test):
    """
    Test the train_models function to ensure models are trained and predictions are generated.
    """
    try:
        (
        y_train,
        y_test,
        y_train_preds_lr,
        y_train_preds_rf,
        y_test_preds_lr,
        y_test_preds_rf) = train_models(X_train, X_test, y_train, y_test)

        # Log lengths to help diagnose issues
        logging.info(f"Length of y_train: {len(y_train_func)}")
        logging.info(f"Length of y_test: {len(y_test_func)}")
        logging.info(f"Length of y_train_preds_lr: {len(y_train_preds_lr)}")
        logging.info(f"Length of y_train_preds_rf: {len(y_train_preds_rf)}")
        logging.info(f"Length of y_test_preds_lr: {len(y_test_preds_lr)}")
        logging.info(f"Length of y_test_preds_rf: {len(y_test_preds_rf)}")

        # Check if predictions are not None
        assert y_train_func is not None, "y_train predictions are None"
        assert y_test_func is not None, "y_test predictions are None"
        assert y_train_preds_lr is not None, "LR training predictions are None"
        assert y_train_preds_rf is not None, "RF training predictions are None"
        assert y_test_preds_lr is not None, "LR testing predictions are None"
        assert y_test_preds_rf is not None, "RF testing predictions are None"

        # Check if predictions are the correct form
        assert len(y_train_func) == len(y_train), "LR training predictions length mismatch"
        assert len(y_test_func) == len(y_test), "RF training predictions length mismatch"
        assert len(y_train_preds_lr) == len(y_train), "LR training predictions length mismatch"
        assert len(y_train_preds_rf) == len(y_train), "RF training predictions length mismatch"
        assert len(y_test_preds_lr) == len(y_test), "LR testing predictions length mismatch"
        assert len(y_test_preds_rf) == len(y_test), "RF testing predictions length mismatch"

        logging.info("Testing train_models: SUCCESS")

    except AssertionError as err:
        logging.error("Testing train_models: The function's output is not as expected - %s", err)
        raise err
    except Exception as err:
        logging.error("Testing train_models: An error occurred - %s", err)
        raise RuntimeError("Failed to test train_models.") from err



def run_tests():
    """
    Run all tests to ensure the functionality of the churn prediction library.
    """
    try:
        # Load and test the data import
        df = cls.import_data("./data/bank_data.csv")
        test_import(cls.import_data)

        # Perform and test EDA
        cls.perform_eda(df)
        test_eda(cls.perform_eda, df)

        # Perform encoding, test encoder helper
        category_lst = ['Gender', 'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category']
        df_encoded = cls.encoder_helper(df, category_lst)
        test_encoder_helper(cls.encoder_helper, df_encoded)

        # Perform feature engineering and test it
        X_train, X_test, y_train, y_test = cls.perform_feature_engineering(df_encoded, 'Churn')
        test_perform_feature_engineering(cls.perform_feature_engineering, df_encoded)

        # Train models and test the training
        test_train_models(cls.train_models, X_train, X_test, y_train, y_test)

        logging.info("All tests passed successfully!")

    except Exception as e:
        logging.error("Error during testing: %s", str(e))
        raise e

if __name__ == "__main__":
    run_tests()