In [3]:
import pandas as pd

In [33]:
df = pd.read_csv("Car_Insurance_Claim.csv")
print(df.shape)
df.isnull().sum()

(10000, 17)


ID                       0
AGE                      0
GENDER                   0
DRIVING_EXPERIENCE       0
EDUCATION                0
INCOME                   0
CREDIT_SCORE           982
VEHICLE_OWNERSHIP        0
VEHICLE_YEAR             0
MARRIED                  0
CHILDREN                 0
ANNUAL_MILEAGE         957
VEHICLE_TYPE             0
SPEEDING_VIOLATIONS      0
DUIS                     0
PAST_ACCIDENTS           0
OUTCOME                  0
dtype: int64

In [34]:
df["ANNUAL_MILEAGE"].fillna(df["ANNUAL_MILEAGE"].mean().round(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["ANNUAL_MILEAGE"].fillna(df["ANNUAL_MILEAGE"].mean().round(), inplace=True)


In [36]:
df.columns

Index(['ID', 'AGE', 'GENDER', 'DRIVING_EXPERIENCE', 'EDUCATION', 'INCOME',
       'CREDIT_SCORE', 'VEHICLE_OWNERSHIP', 'VEHICLE_YEAR', 'MARRIED',
       'CHILDREN', 'ANNUAL_MILEAGE', 'VEHICLE_TYPE', 'SPEEDING_VIOLATIONS',
       'DUIS', 'PAST_ACCIDENTS', 'OUTCOME'],
      dtype='object')

In [37]:
df.dtypes

ID                       int64
AGE                     object
GENDER                  object
DRIVING_EXPERIENCE      object
EDUCATION               object
INCOME                  object
CREDIT_SCORE           float64
VEHICLE_OWNERSHIP        int64
VEHICLE_YEAR            object
MARRIED                  int64
CHILDREN                 int64
ANNUAL_MILEAGE         float64
VEHICLE_TYPE            object
SPEEDING_VIOLATIONS      int64
DUIS                     int64
PAST_ACCIDENTS           int64
OUTCOME                  int64
dtype: object

In [39]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ID,10000.0,500521.9068,290030.768758,101.0,249638.5,501777.0,753974.5,999976.0
CREDIT_SCORE,9018.0,0.515813,0.137688,0.053358,0.417191,0.525033,0.618312,0.960819
VEHICLE_OWNERSHIP,10000.0,0.697,0.459578,0.0,0.0,1.0,1.0,1.0
MARRIED,10000.0,0.4982,0.500022,0.0,0.0,0.0,1.0,1.0
CHILDREN,10000.0,0.6888,0.463008,0.0,0.0,1.0,1.0,1.0
ANNUAL_MILEAGE,10000.0,11697.0029,2680.167384,2000.0,10000.0,11697.0,13000.0,22000.0
SPEEDING_VIOLATIONS,10000.0,1.4829,2.241966,0.0,0.0,0.0,2.0,22.0
DUIS,10000.0,0.2392,0.55499,0.0,0.0,0.0,0.0,6.0
PAST_ACCIDENTS,10000.0,1.0563,1.652454,0.0,0.0,0.0,2.0,15.0
OUTCOME,10000.0,0.3133,0.463858,0.0,0.0,0.0,1.0,1.0


In [48]:
for col in df:
    print(df[col].unique())

[569520 750365 199901 ... 468409 903459 442696]
['65+' '16-25' '26-39' '40-64']
['female' 'male']
['0-9y' '10-19y' '20-29y' '30y+']
['high school' 'none' 'university']
['upper class' 'poverty' 'working class' 'middle class']
[0.62902731 0.35775712 0.49314579 ... 0.47094023 0.36418479 0.43522478]
[1 0]
['after 2015' 'before 2015']
[0 1]
[1 0]
[12000. 16000. 11000. 13000. 14000. 10000.  8000. 11697. 18000. 17000.
  7000. 15000.  9000.  5000.  6000. 19000.  4000.  3000.  2000. 20000.
 21000. 22000.]
['sedan' 'sports car']
[ 0  2  3  7  6  4 10 13  1  5  9  8 12 11 15 17 19 18 16 14 22]
[0 2 1 3 4 5 6]
[ 0  1  3  7  2  5  4  6  8 10 11  9 12 14 15]
[0 1]


In [85]:
#adding prtice column
#maps
age_map = {
    '16-25': 1.6,
    '26-39': 1.2,
    '40-64': 1.0,
    '65+': 1.1
}

experience_map = {
    '0-9y': 1.3,
    '10-19y': 1.1,
    '20-29y': 1.0,
    '30y+': 0.9
}

education_map = {
    'none': 1.2,
    'high school': 1.1,
    'university': 0.9
}

income_map = {
    'poverty': 1.3,
    'working class': 1.1,
    'middle class': 1.0,
    'upper class': 0.9
}

vehicle_year_map = {
    'before 2015': 1.3,
    'after 2015': 1.0
}

vehicle_type_map = {
    'sedan': 1.0,
    'sports car': 1.4
}

In [86]:
def estimate_price(row):
    base = 500 
    if(row['OUTCOME']==1):
        age_factor = age_map.get(row['AGE'], 1)
        exp_factor = experience_map.get(row['DRIVING_EXPERIENCE'], 1)
        edu_factor = education_map.get(row['EDUCATION'], 1)
        income_factor = income_map.get(row['INCOME'], 1)
        credit_factor = 1.5 if row['CREDIT_SCORE'] < 0.5 else 1.0
        vehicle_factor = vehicle_type_map.get(row['VEHICLE_TYPE'], 1)
        year_factor = vehicle_year_map.get(row['VEHICLE_YEAR'], 1)
    
        violations = row['SPEEDING_VIOLATIONS']
        duis = row['DUIS']
        accidents = row['PAST_ACCIDENTS']
        
        risk_factor = 1 + 0.1 * violations + 0.5 * duis + 0.15 * accidents
        marriage_discount = 0.95 if row['MARRIED'] == 1 else 1.0
        children_discount = 0.98-0.01*row['CHILDREN'] if row['CHILDREN'] > 0 else 1.0
    
        final_price = base * age_factor * exp_factor * edu_factor * income_factor
        final_price *= credit_factor * vehicle_factor * year_factor * risk_factor
        final_price *= marriage_discount * children_discount
    else:
        return 0

    return round(final_price)

In [87]:
df['PRICE'] = df.apply(estimate_price, axis=1)

In [88]:
df

Unnamed: 0,ID,AGE,GENDER,DRIVING_EXPERIENCE,EDUCATION,INCOME,CREDIT_SCORE,VEHICLE_OWNERSHIP,VEHICLE_YEAR,MARRIED,CHILDREN,ANNUAL_MILEAGE,VEHICLE_TYPE,SPEEDING_VIOLATIONS,DUIS,PAST_ACCIDENTS,OUTCOME,PRICE
0,569520,65+,female,0-9y,high school,upper class,0.629027,1,after 2015,0,1,12000.0,sedan,0,0,0,0,0
1,750365,16-25,male,0-9y,none,poverty,0.357757,0,before 2015,0,0,16000.0,sedan,0,0,0,1,3164
2,199901,16-25,female,0-9y,high school,working class,0.493146,1,before 2015,0,0,11000.0,sedan,0,0,0,0,0
3,478866,16-25,male,0-9y,university,working class,0.206013,1,before 2015,0,1,11000.0,sedan,0,0,0,0,0
4,731664,26-39,male,10-19y,none,working class,0.388366,1,before 2015,0,0,12000.0,sedan,2,0,1,1,2293
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,323164,26-39,female,10-19y,university,upper class,0.582787,1,before 2015,0,0,16000.0,sedan,0,0,1,0,0
9996,910346,26-39,female,10-19y,none,middle class,0.522231,1,after 2015,0,1,11697.0,sedan,1,0,0,0,0
9997,468409,26-39,male,0-9y,high school,middle class,0.470940,1,before 2015,0,1,14000.0,sedan,0,0,0,0,0
9998,903459,26-39,female,10-19y,high school,poverty,0.364185,0,before 2015,0,1,13000.0,sedan,2,0,1,1,2410
