In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

print("Loading data for A/B Hypothesis Testing...")

file_path = '../data/MachineLearningRating_v3.txt'

df = None

try:
    df = pd.read_csv(file_path, delimiter='|')
    print("Data loaded successfully!")
    print(f"Shape of data: {df.shape}")

    df.columns = df.columns.str.strip()
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].astype(str).str.strip()
    df['TransactionMonth'] = pd.to_datetime(df['TransactionMonth'])

    df = df[(df['TotalPremium'] >= 0) & (df['TotalClaims'] >= 0)].copy()
    print("Filtered out rows with negative TotalPremium or TotalClaims.")

    df['LossRatio'] = df['TotalClaims'] / df['TotalPremium']
    df['LossRatio'].replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(subset=['LossRatio'], inplace=True)
    print("Calculated LossRatio and handled NaN/inf values.")

    print("\nCleaned Data Info:")
    df.info()

except FileNotFoundError:
    print(f"Error: The file was not found at {file_path}. Please check the path.")
    print("Ensure 'MachineLearningRating_v3.txt' is in your 'data' folder.")
except Exception as e:
    print(f"An unexpected error occurred while loading or preprocessing the data: {e}")

if df is None:
    print("\nData loading failed. Please resolve the error before proceeding with analysis.")

Loading data for A/B Hypothesis Testing...
Error: The file was not found at ../data/MachineLearningRating_v3.txt. Please check the path.
Ensure 'MachineLearningRating_v3.txt' is in your 'data' folder.

Data loading failed. Please resolve the error before proceeding with analysis.
