In [12]:
import numpy as np
import pandas as pd
from scipy.stats import norm, poisson, gamma
import matplotlib.pyplot as plt
from scipy.stats import norm

In [None]:

data = pd.read_csv('claim_data_group5_2024.csv')


print(data.head())


       IDpol  ClaimNb  Exposure Area  VehPower  VehAge  DrivAge  BonusMalus   
0  2271893.0      0.0      0.83    E       5.0    17.0     53.0        64.0  \
1  1111864.0      0.0      0.24    E       5.0     2.0     27.0        64.0   
2    72908.0      0.0      0.50    E       7.0    11.0     67.0        50.0   
3  2283027.0      0.0      0.08    B       5.0     8.0     28.0        60.0   
4  1123838.0      0.0      0.03    A      11.0     1.0     38.0        50.0   

  VehBrand   VehGas  Density Region  ClaimAmount  
0       B2   Diesel   3317.0    R93          0.0  
1       B3   Diesel   2740.0    R22          0.0  
2       B3  Regular   4762.0    R93          0.0  
3       B1   Diesel     64.0    R91          0.0  
4       B2  Regular     16.0    R24          0.0  


In [4]:
# Print the column names to verify them
print(data.columns)


Index(['IDpol', 'ClaimNb', 'Exposure', 'Area', 'VehPower', 'VehAge', 'DrivAge',
       'BonusMalus', 'VehBrand', 'VehGas', 'Density', 'Region', 'ClaimAmount',
       'RiskGroup'],
      dtype='object')


In [None]:

data['RiskGroup'] = pd.cut(data['DrivAge'], bins=[18, 25, 35, 50, 65, 100], labels=['Young', 'Young Adult', 'Adult', 'Senior', 'Elder'])

# frequency and severity distributions per group
frequency_params = {}
severity_params = {}

for group, group_data in data.groupby('RiskGroup'):
    
    lambda_freq = group_data['ClaimNb'].mean()
    frequency_params[group] = lambda_freq
    
    
    mean_severity = group_data['ClaimAmount'].mean()
    var_severity = group_data['ClaimAmount'].var()

    
    alpha = mean_severity**2 / var_severity
    beta = var_severity / mean_severity
    severity_params[group] = (alpha, beta)
    
    print(f"Group: {group}, Frequency (Lambda): {lambda_freq}, Severity Params (Alpha, Beta): {alpha}, {beta}")


Group: Young, Frequency (Lambda): 0.06827379904979765, Severity Params (Alpha, Beta): 0.01964747952012325, 6207.896245220362
Group: Young Adult, Frequency (Lambda): 0.033688506007708, Severity Params (Alpha, Beta): 0.004919542888139972, 12511.368333019273
Group: Adult, Frequency (Lambda): 0.03744802635131487, Severity Params (Alpha, Beta): 0.0037408291068754844, 17187.687515738457
Group: Senior, Frequency (Lambda): 0.03854398221657669, Severity Params (Alpha, Beta): 0.003988534291448176, 18055.872228309032
Group: Elder, Frequency (Lambda): 0.04106133979015335, Severity Params (Alpha, Beta): 0.0014624172389984653, 82906.65829033543


In [None]:
simulated_losses = {}

for group in frequency_params.keys():
    lambda_freq = frequency_params[group]
    alpha, beta = severity_params[group]
    
    # number of claims using Poisson
    num_claims = poisson.rvs(mu=lambda_freq, size=10000)  # Adjust the size if needed
    
    # Sclaim severity using Gamma
    claim_severity = gamma.rvs(a=alpha, scale=beta, size=num_claims.sum())
    
    # total losses for each simulation
    total_losses = [sum(claim_severity[i:i + n]) for i, n in enumerate(num_claims) if n > 0]
    simulated_losses[group] = total_losses


In [None]:
# expected losses for each group 
expected_losses = {}

for group, losses in simulated_losses.items():
    expected_loss = np.mean(losses)
    expected_losses[group] = expected_loss

# average expected loss across all groups
average_expected_loss = np.mean(list(expected_losses.values()))
buffered_minimum_premium = average_expected_loss * 1.1  # Setting a 10% buffer

print(f"Average Expected Loss across all groups: ${average_expected_loss:.2f}")
print(f"Buffered Minimum Premium (10% above average expected loss): ${buffered_minimum_premium:.2f}")

# premiums with buffered minimum premium for each group
for group, losses in simulated_losses.items():
    expected_loss = np.mean(losses)
    std_dev = np.std(losses)
    
    # Calculate premium based on confidence level
    z_score = norm.ppf(confidence_level)  # 99.5% confidence level
    premium = expected_loss + z_score * std_dev
    
    
    premium = max(premium, buffered_minimum_premium)
    premiums[group] = premium
    
    print(f"Risk Group: {group}, Expected Loss: {expected_loss}, Premium: {premium}")


Average Expected Loss across all groups: $5.75
Buffered Minimum Premium (10% above average expected loss): $6.33
Risk Group: Young, Expected Loss: 1.9320508347486918, Premium: 108.6900622190589
Risk Group: Young Adult, Expected Loss: 7.025776242479371e-06, Premium: 6.3269253554300935
Risk Group: Adult, Expected Loss: 4.99660653802954e-27, Premium: 6.3269253554300935
Risk Group: Senior, Expected Loss: 26.825504043565612, Premium: 1371.0392818740702
Risk Group: Elder, Expected Loss: 0.0011897115007840772, Premium: 6.3269253554300935


In [11]:
print("\nPremiums by Risk Group:")
for group, premium in premiums.items():
    print(f"{group}: ${premium:.2f}")



Premiums by Risk Group:
Young: $108.69
Young Adult: $6.33
Adult: $6.33
Senior: $1371.04
Elder: $6.33


In [None]:


data_2025 = pd.read_csv('claim_data_group5_2025.csv')


data_2025['RiskGroup'] = pd.cut(data_2025['DrivAge'], bins=[18, 25, 35, 50, 65, 100], labels=['Young', 'Young Adult', 'Adult', 'Senior', 'Elder'])


data_2025['RiskGroup'] = data_2025['RiskGroup'].astype(str)

# Updated parameters from previous outputs
frequency_params = {'Young': 0.0683, 'Young Adult': 0.0337, 'Adult': 0.0374, 'Senior': 0.0385, 'Elder': 0.0411}
severity_params = {
    'Young': (0.0196, 6207.90),
    'Young Adult': (0.0049, 12511.37),
    'Adult': (0.0037, 17187.69),
    'Senior': (0.0039, 18055.87),
    'Elder': (0.0015, 82906.66)
}
buffered_minimum_premium = 6.33  # Buffered minimum premium from previous output
confidence_level = 0.995
z_score = norm.ppf(confidence_level)

# Calculate premiums for each policy in 2025 data using updated parameters
def calculate_premium(risk_group):
    lambda_freq = frequency_params.get(risk_group, 0)
    alpha, beta = severity_params.get(risk_group, (1, 1))
    
    # Expected loss and standard deviation
    expected_loss = lambda_freq * alpha * beta
    std_dev = np.sqrt(lambda_freq) * alpha * beta
    
    
    premium = expected_loss + z_score * std_dev
    
    
    return max(premium, buffered_minimum_premium)


data_2025['PremiumCharged'] = data_2025['RiskGroup'].apply(calculate_premium)


data_2025['PremiumCharged'] = pd.to_numeric(data_2025['PremiumCharged'], errors='coerce')
data_2025['ClaimAmount'] = pd.to_numeric(data_2025['ClaimAmount'], errors='coerce')

# Group by RiskGroup and sum up total premiums and total claims
grouped_data = data_2025.groupby('RiskGroup').agg({
    'PremiumCharged': 'sum',
    'ClaimAmount': 'sum'
}).rename(columns={'PremiumCharged': 'TotalPremiums', 'ClaimAmount': 'TotalClaims'})

# Calculate loss ratio for each group
grouped_data['LossRatio'] = grouped_data['TotalClaims'] / grouped_data['TotalPremiums']


print("Loss Ratios by Risk Group:")
print(grouped_data[['TotalPremiums', 'TotalClaims', 'LossRatio']])


average_loss_ratio = grouped_data['LossRatio'].mean()
loss_ratio_variance = grouped_data['LossRatio'].var()

print(f"\nAverage Loss Ratio: {average_loss_ratio:.2f}")
print(f"Variance in Loss Ratios: {loss_ratio_variance:.2f}")


Loss Ratios by Risk Group:
             TotalPremiums  TotalClaims  LossRatio
RiskGroup                                         
Adult         1.262204e+06   2992640.26   2.370964
Elder         6.854603e+05    955935.16   1.394589
Senior        9.695206e+05   1762008.03   1.817401
Young         5.100968e+05    608750.50   1.193402
Young Adult   6.852904e+05   1894313.39   2.764249
nan           7.596000e+02      6754.78   8.892549

Average Loss Ratio: 3.07
Variance in Loss Ratios: 8.48
