# 3_Hypothesis_Testing


In [7]:


## 1. Setup and Data Preparation
import pandas as pd
import numpy as np
import sys
import os

# Add src to path to import local module
sys.path.append(os.path.abspath('../src')) 

from hypothesis_testing import prepare_metrics, run_chi_squared_test, run_mean_test, run_anova_test

# Load the data (Adjust path as necessary)
df = pd.read_csv('../data/MachineLearningRating_v3.txt', sep='|') 

# 1. Standardize Gender (Crucial for H0: Gender)
df['Gender'] = df['Gender'].astype(str).str.strip().str.capitalize()
df['Gender'] = df['Gender'].replace({
    'Male': 'Men',
    'Female': 'Women',
    'M': 'Men',
    'F': 'Women',
    'nan': np.nan,
    '': np.nan,
})
# 2. Standardize Province (Crucial for H0: Province)
df['Province'] = df['Province'].astype(str).str.strip()

# 3. Standardize PostalCode (Crucial for H0: Zip Codes)
df['PostalCode'] = df['PostalCode'].astype(str).str.strip()
# --- END CLEANING FIX ---

# Prepare the core metrics (Frequency, Severity, Margin)
# This step now receives cleaned Gender, Province, and PostalCode columns.
df_metrics = prepare_metrics(df)

# List to store all test- results    
all_results = []
ALPHA = 0.05

print("--- Starting Task 3 Hypothesis Testing ---")



  df = pd.read_csv('../data/MachineLearningRating_v3.txt', sep='|')


--- Starting Task 3 Hypothesis Testing ---


## 2. Hypothesis Testing Execution

In [8]:
# -----------------------------------------------------------
# H₀: There are no risk differences between Women and Men
# -----------------------------------------------------------
GENDER_GROUPS = ['Women', 'Men'] # Assuming clean labels

# Filter data for only the two groups being compared
df_gender = df_metrics[df_metrics['Gender'].isin(GENDER_GROUPS)].copy()

# A. Frequency Test (Chi-Squared)
all_results.append(run_chi_squared_test(df_gender, 'Gender', ALPHA))

# B. Severity Test (T-Test) - Must filter for claims > 0
df_gender_claimed = df_gender[df_gender['ClaimStatus'] == 1].copy()
all_results.append(run_mean_test(df_gender_claimed, 'Gender', 'ClaimSeverity', GENDER_GROUPS, ALPHA))

# -----------------------------------------------------------
# H₀: There are no risk differences across provinces
# -----------------------------------------------------------
# We select the top 3 highest volume provinces for a robust multi-group test.
TOP_PROVINCES = df_metrics['Province'].value_counts().nlargest(3).index.tolist()
df_province = df_metrics[df_metrics['Province'].isin(TOP_PROVINCES)].copy()

# A. Frequency Test (Chi-Squared)
all_results.append(run_chi_squared_test(df_province, 'Province', ALPHA))

# B. Severity Test (ANOVA) - Multi-group mean test
df_province_claimed = df_province[df_province['ClaimStatus'] == 1].copy()
all_results.append(run_anova_test(df_province_claimed, 'Province', 'ClaimSeverity', TOP_PROVINCES, ALPHA))


# -----------------------------------------------------------
# H₀: Risk/Margin difference across Zip Codes
# -----------------------------------------------------------
# Select the Top 2 highest-volume Zip Codes for a robust T-Test comparison.
TOP_ZIP_CODES = df_metrics['PostalCode'].value_counts().nlargest(2).index.tolist()
df_zip = df_metrics[df_metrics['PostalCode'].isin(TOP_ZIP_CODES)].copy()

# A. Risk (Severity) Test (T-Test)
df_zip_claimed = df_zip[df_zip['ClaimStatus'] == 1].copy()
all_results.append(run_mean_test(df_zip_claimed, 'PostalCode', 'ClaimSeverity', TOP_ZIP_CODES, ALPHA))

# B. Margin Test (T-Test)
all_results.append(run_mean_test(df_zip, 'PostalCode', 'Margin', TOP_ZIP_CODES, ALPHA))



--- Testing Claim Frequency for: Gender ---
--- Testing Mean ClaimSeverity for: Gender (Groups: ['Women', 'Men']) ---

--- Testing Claim Frequency for: Province ---
--- Testing Mean ClaimSeverity for: Province (Multiple Groups) ---
--- Testing Mean ClaimSeverity for: PostalCode (Groups: ['2000', '122']) ---
--- Testing Mean Margin for: PostalCode (Groups: ['2000', '122']) ---


## 3. Analyze and Report Results

In [9]:
# Convert results to a clean DataFrame for easy review
results_df = pd.DataFrame(all_results)
print("\n\n--- FINAL HYPOTHESIS TESTING RESULTS ---")
print(results_df[['Test', 'Feature', 'p_value', 'Reject_H0']].to_markdown(index=False))

# --- GENERATE BUSINESS RECOMMENDATIONS ---

print("\n\n--- BUSINESS RECOMMENDATIONS ---")

for index, row in results_df.iterrows():
    if row['Reject_H0']:
        print(f"\n✅ **REJECTED H₀** for {row['Feature']} ({row['Test']}, p={row['p_value']:.4f})")
        
        if 'Severity' in row['Test'] or 'Margin' in row['Test']:
            # Example interpretation for T-Test
            if row['Mean_A'] > row['Mean_B']:
                 higher_group = row['Group_A']
                 lower_group = row['Group_B']
            else:
                 higher_group = row['Group_B']
                 lower_group = row['Group_A']

            difference = abs(row.get('Mean_A', 0) - row.get('Mean_B', 0))
            
            if row['Test'] == 'T-Test (Margin)':
                print(f"  **ACTION:** Margin difference is statistically significant. {higher_group} generates higher average profit per policy (Difference: {difference:.2f} Rand). **RECOMMENDATION:** Target {higher_group} with premium retention campaigns.")
            
            elif row['Test'] == 'T-Test (ClaimSeverity)':
                print(f"  **ACTION:** Severity difference is statistically significant. {higher_group} claims are costlier. **RECOMMENDATION:** Introduce a claim severity rating factor or mandatory higher deductibles for {higher_group}.")

        elif 'Frequency' in row['Test']:
             # Example interpretation for Chi-Squared
             print(f"  **ACTION:** Claim frequency is not homogeneous across {row['Feature']}. **RECOMMENDATION:** This factor is highly predictive and must be included in the Frequency GLM as a primary rating variable.")
    else:
        print(f"❌ **FAILED TO REJECT H₀** for {row['Feature']} ({row['Test']}, p={row['p_value']:.4f})")
        print("  **CONCLUSION:** The observed differences are likely due to random chance. This feature may be excluded from the final pricing model to simplify the rating structure.")



--- FINAL HYPOTHESIS TESTING RESULTS ---
| Test                    | Feature    |     p_value | Reject_H0   |
|:------------------------|:-----------|------------:|:------------|
| Chi-Squared (Frequency) | Gender     | 0.951464    | False       |
| T-Test (ClaimSeverity)  | Gender     | 0.568029    | False       |
| Chi-Squared (Frequency) | Province   | 2.33832e-13 | True        |
| ANOVA (ClaimSeverity)   | Province   | 0.00053695  | True        |
| T-Test (ClaimSeverity)  | PostalCode | 0.700208    | False       |
| T-Test (Margin)         | PostalCode | 0.244462    | False       |


--- BUSINESS RECOMMENDATIONS ---
❌ **FAILED TO REJECT H₀** for Gender (Chi-Squared (Frequency), p=0.9515)
  **CONCLUSION:** The observed differences are likely due to random chance. This feature may be excluded from the final pricing model to simplify the rating structure.
❌ **FAILED TO REJECT H₀** for Gender (T-Test (ClaimSeverity), p=0.5680)
  **CONCLUSION:** The observed differences are likely due