In [1]:
import pandas as pd

In [17]:
results_df = pd.read_csv('results.csv')
results_df

Unnamed: 0,Actual,Predicted,Prob_Class_0,Prob_Class_1,age,sex,bmi,smoker,charges,region_1,region_2,region_3,children_1,children_2,children_3,children_4,children_5
0,1,1,0.153846,0.846154,60,1,36.955,0,12741.16745,0,0,0,0,0,0,0,0
1,0,0,1.000000,0.000000,53,1,21.400,0,10065.41300,0,0,1,1,0,0,0,0
2,0,0,1.000000,0.000000,46,1,43.890,0,8944.11510,0,1,0,0,0,1,0,0
3,1,1,0.000000,1.000000,20,1,33.000,0,1980.07000,0,0,1,1,0,0,0,0
4,1,1,0.000000,1.000000,63,0,31.800,0,13880.94900,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,0,0,1.000000,0.000000,40,0,23.370,0,8252.28430,0,0,0,0,0,1,0,0
252,0,0,1.000000,0.000000,48,0,27.265,0,9447.25035,0,0,0,1,0,0,0,0
253,0,0,0.923077,0.076923,31,1,31.065,0,5425.02335,1,0,0,0,0,1,0,0
254,0,0,0.875000,0.125000,63,0,25.080,0,14254.60820,1,0,0,0,0,0,0,0


In [19]:
# Count the rows where 'Actual' and 'Predicted' are the same
matching_rows = (results_df['Actual'] == results_df['Predicted']).sum()

print(f"Number of rows with matching values: {matching_rows}")
print(f"Model accuracy is: {(matching_rows/results_df.shape[0])*100}%")

Number of rows with matching values: 231
Model accuracy is: 90.234375%


In [22]:
import numpy as np

# Define a function to calculate premium
def calculate_premium(row):
    # Assign weights to factors
    base_score = (
        0.4 * row['age'] +                # Higher weight for age
        0.3 * row['bmi'] +                # Medium weight for BMI
        0.2 * row['smoker'] * 100 +       # Significant weight for smoker (scaled for importance)
        0.1 * sum(row[f'children_{i}'] * i for i in range(1, 6))  # Children count adds a small risk
    )
    # Scale base score into premium ranges
    if row['Actual'] == 1:  # Higher range for claims
        premium = 5000 + (3000 * base_score / (100 + row['charges']))  # Scale within 5000-8000
    else:  # Lower range for no claims
        premium = 1000 + (3000 * base_score / (100 + row['charges']))  # Scale within 1000-4000
    
    # Ensure the premium stays within bounds
    return max(1, min(8000, premium))

# Apply the function to each row in the DataFrame
results_df['Company_A_Premium'] = results_df.apply(calculate_premium, axis=1)

# Display the updated DataFrame
# import ace_tools as tools; tools.display_dataframe_to_user(name="Updated Dataset with Premiums", dataframe=df)

results_df['Company_B_Premium'] = results_df['Predicted'].apply(lambda x: 5000 if x == 1 else 1000)
results_df

Unnamed: 0,Actual,Predicted,Prob_Class_0,Prob_Class_1,age,sex,bmi,smoker,charges,region_1,region_2,region_3,children_1,children_2,children_3,children_4,children_5,Company_A_Premium,Company_B_Premium
0,1,1,0.153846,0.846154,60,1,36.955,0,12741.16745,0,0,0,0,0,0,0,0,5008.197035,5000
1,0,0,1.000000,0.000000,53,1,21.400,0,10065.41300,0,0,1,1,0,0,0,0,1008.180681,1000
2,0,0,1.000000,0.000000,46,1,43.890,0,8944.11510,0,1,0,0,0,1,0,0,1010.570520,1000
3,1,1,0.000000,1.000000,20,1,33.000,0,1980.07000,0,0,1,1,0,0,0,0,5025.960665,5000
4,1,1,0.000000,1.000000,63,0,31.800,0,13880.94900,0,0,1,0,0,0,0,0,5007.454430,5000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,0,0,1.000000,0.000000,40,0,23.370,0,8252.28430,0,0,0,0,0,1,0,0,1008.372919,1000
252,0,0,1.000000,0.000000,48,0,27.265,0,9447.25035,0,0,0,1,0,0,0,0,1008.634790,1000
253,0,0,0.923077,0.076923,31,1,31.065,0,5425.02335,1,0,0,0,0,1,0,0,1011.956239,1000
254,0,0,0.875000,0.125000,63,0,25.080,0,14254.60820,1,0,0,0,0,0,0,0,1006.839058,1000


In [26]:
import pandas as pd
from scipy.optimize import minimize

# Define the objective function
def objective_function(company_b_premium, prob_class_0, prob_class_1, company_a_premium, gamma):
    term1 = (-0.95 * company_b_premium + 500) * prob_class_0
    term2 = (2500 - 0.5 * company_b_premium) * prob_class_1
    term3 = gamma * (company_b_premium - company_a_premium) ** 2
    return term1 + term2 + term3

# Define the gradient of the objective function
def gradient(company_b_premium, prob_class_0, prob_class_1, company_a_premium, gamma):
    grad_term1 = -0.95 * prob_class_0
    grad_term2 = -0.5 * prob_class_1
    grad_term3 = 2 * gamma * (company_b_premium - company_a_premium)
    return grad_term1 + grad_term2 + grad_term3

# Optimization function for a single row
def optimize_premium(row, gamma=1.0):
    prob_class_0 = row["Prob_Class_0"]
    prob_class_1 = row["Prob_Class_1"]
    company_a_premium = row["Company_A_Premium"]
    initial_guess = row["Company_B_Premium"]

    # Minimize the objective function
    result = minimize(
        lambda x: objective_function(x, prob_class_0, prob_class_1, company_a_premium, gamma),
        x0=initial_guess,
        jac=lambda x: gradient(x, prob_class_0, prob_class_1, company_a_premium, gamma),
        method="L-BFGS-B"
    )
    return result.x[0]

# Load the data
df = results_df

# Apply optimization for each row
gamma = 1.0  # Example value for gamma
df["Optimized_Company_B_Premium"] = df.apply(lambda row: optimize_premium(row, gamma), axis=1)

# Display the resulting DataFrame
df


Unnamed: 0,Actual,Predicted,Prob_Class_0,Prob_Class_1,age,sex,bmi,smoker,charges,region_1,region_2,region_3,children_1,children_2,children_3,children_4,children_5,Company_A_Premium,Company_B_Premium,Optimized_Company_B_Premium
0,1,1,0.153846,0.846154,60,1,36.955,0,12741.16745,0,0,0,0,0,0,0,0,5008.197035,5000,5008.481650
1,0,0,1.000000,0.000000,53,1,21.400,0,10065.41300,0,0,1,1,0,0,0,0,1008.180681,1000,1008.655681
2,0,0,1.000000,0.000000,46,1,43.890,0,8944.11510,0,1,0,0,0,1,0,0,1010.570520,1000,1011.045520
3,1,1,0.000000,1.000000,20,1,33.000,0,1980.07000,0,0,1,1,0,0,0,0,5025.960665,5000,5026.210665
4,1,1,0.000000,1.000000,63,0,31.800,0,13880.94900,0,0,1,0,0,0,0,0,5007.454430,5000,5007.704430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,0,0,1.000000,0.000000,40,0,23.370,0,8252.28430,0,0,0,0,0,1,0,0,1008.372919,1000,1008.847919
252,0,0,1.000000,0.000000,48,0,27.265,0,9447.25035,0,0,0,1,0,0,0,0,1008.634790,1000,1009.109790
253,0,0,0.923077,0.076923,31,1,31.065,0,5425.02335,1,0,0,0,0,1,0,0,1011.956239,1000,1012.413931
254,0,0,0.875000,0.125000,63,0,25.080,0,14254.60820,1,0,0,0,0,0,0,0,1006.839058,1000,1007.285933
