# AB Hyphothesis testing Notebook

In [1]:
# Importing required libraries
import sys
import os
import pandas as pd

In [2]:
# Get the current working directory of the project
current_dir = os.getcwd()
print(current_dir)

# Get the parent directory
parent_dir = os.path.dirname(current_dir)
print(parent_dir)

# Insert the path to the parent directory
sys.path.insert(0, parent_dir)

# # Insert the path to the Scripts directory
# sys.path.insert(0, os.path.join(parent_dir, 'Scripts'))

# print(sys.path)

c:\Users\HP\Desktop\KAIM-Cohort-3\Week 3\AlphaCare-Insurance-Solutions-(ACIS)-Insurance-Claim-Data Analysis\notebooks
c:\Users\HP\Desktop\KAIM-Cohort-3\Week 3\AlphaCare-Insurance-Solutions-(ACIS)-Insurance-Claim-Data Analysis


In [3]:

from scripts.data_prep import load_data, preprocess_data
from scripts.ab_testing import perform_t_test, perform_chi_squared_test

In [4]:
# Load the data
try:
    file_path = "../data/MachineLearningRating_v3.txt"
    data = pd.read_csv(file_path, delimiter="|")
    print("Data loaded successfully.")
except Exception as e:
    print(f"Error loading data: {e}")

  data = pd.read_csv(file_path, delimiter="|")


Data loaded successfully.


In [5]:
# Preprocess the data
try:
    data = preprocess_data(data)
    print(data.columns)
    print("Data preprocessed successfully.")
except Exception as e:
    print(f"Error preprocessing data: {e}")

Index(['UnderwrittenCoverID', 'PolicyID', 'IsVATRegistered', 'PostalCode',
       'mmcode', 'RegistrationYear', 'Cylinders', 'cubiccapacity', 'kilowatts',
       'NumberOfDoors', 'CustomValueEstimate', 'NumberOfVehiclesInFleet',
       'SumInsured', 'CalculatedPremiumPerTerm', 'TotalPremium',
       'TotalClaims'],
      dtype='object')
Data preprocessed successfully.


In [6]:
print(data[['RegistrationYear', 'NumberOfVehiclesInFleet']].dtypes)

RegistrationYear             int64
NumberOfVehiclesInFleet    float64
dtype: object


In [7]:
data['NumberOfVehiclesInFleet'] = data['NumberOfVehiclesInFleet'].round(0).astype('int').astype('category')

In [8]:
data['RegistrationYear'] = data['RegistrationYear'].astype('category')

In [9]:
bins = [0, 1, 3, 5, 10, float('inf')]
labels = ['1', '2-3', '4-5', '6-10', '10+']
data['NumberOfVehiclesInFleet'] = pd.cut(data['NumberOfVehiclesInFleet'], bins=bins, labels=labels)

In [10]:
# Chi-squared test
try:
    # Ensure no missing values in relevant columns
    if data[['RegistrationYear', 'NumberOfVehiclesInFleet']].isnull().any().any():
        raise ValueError("Missing values detected in 'RegistrationYear' or 'NumberOfVehiclesInFleet'.")

    # Convert 'RegistrationYear' to categorical
    data['RegistrationYear'] = data['RegistrationYear'].astype('category')

    # Bin 'NumberOfVehiclesInFleet' into categories
    bins = [0, 1, 3, 5, 10, float('inf')]
    labels = ['1', '2-3', '4-5', '6-10', '10+']
    data['NumberOfVehiclesInFleet'] = pd.cut(data['NumberOfVehiclesInFleet'], bins=bins, labels=labels)

    # Check unique values
    print("Unique RegistrationYear values:", data['RegistrationYear'].unique())
    print("Unique NumberOfVehiclesInFleet values:", data['NumberOfVehiclesInFleet'].unique())

    # Create contingency table
    contingency_table = pd.crosstab(data['RegistrationYear'], data['NumberOfVehiclesInFleet'])
    print("Contingency Table:")
    print(contingency_table)

    # Ensure the table is non-empty
    if contingency_table.empty:
        raise ValueError("Contingency table is empty. Cannot perform Chi-squared test.")

    # Perform Chi-squared test
    chi2, p_value, dof = perform_chi_squared_test(contingency_table)
    print(f"Chi-squared test completed. P-value: {p_value}")
except KeyError as e:
    print(f"KeyError: {e}")
except ValueError as e:
    print(f"ValueError: {e}")
except Exception as e:
    print(f"Error in Chi-squared test: {e}")


Unique RegistrationYear values: [], Categories (0, int64): []
Unique NumberOfVehiclesInFleet values: [], Categories (5, object): ['1' < '2-3' < '4-5' < '6-10' < '10+']
Contingency Table:
Empty DataFrame
Columns: []
Index: []
ValueError: Contingency table is empty. Cannot perform Chi-squared test.
