In [1]:
from pprint import pprint
%load_ext autoreload
%autoreload 2

In [5]:
import gc
import os
import sys
import polars as pl

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))))

from utilities.load_utils import *
from utilities.model_utils import *

import analysis_utilities_polars as apl
import export_analysis as ea
import pricing.pricing_logic as prl
import api.config as ac
import api.main as api

In [6]:
path_manager = PathManager('mubi')
load_manager = LoadManager(path_manager)

In [16]:
api_config = ac.ApiConfig.load_from_json('mubi')
pricing_config = api_config.pricing_config['OMINIMO-(OC)-price']

In [19]:
on_top = load_manager.load_on_top_file()
models = load_manager.load_pricing_config_models(api_config.train_data_name, pricing_config)

In [20]:
data_hu, fi, fo, fm = load_manager.load_data('mubi_v24') 
data_hu = data_hu[fi + fo + fm]

2025-03-03 11:15:57,227 - INFO - Imported data...
2025-03-03 11:15:57,228 - INFO - Imported feature data...


In [21]:
addition_on_exp_drivers = data_hu[data_hu['driver_experience'].between(0, 1)].sample(500)
addition_young_cars = data_hu[data_hu['vehicle_age'] < 2].sample(500)
addition_young_people = data_hu[data_hu['contractor_age'].between(17, 18)].sample(500)
addition_old_people = data_hu[data_hu['contractor_age'].between(70, 90)].sample(500)
addition_warsawa = data_hu[data_hu['county'].astype(str).str.contains('Warszawa')].sample(500)
addition_random = data_hu[data_hu['licence_at_age'] == 18].sample(50000)

addition = pd.concat(
    [
     addition_on_exp_drivers,
     addition_young_cars,
     addition_young_people,
     addition_old_people,
     addition_warsawa,
     addition_random
    ]).drop_duplicates()
addition['crawling_date'] = '2025.01.01'
addition['policy_start_date'] = None
addition['contractor_birth_date'] = None
addition['contractor_driver_licence_date'] = None

In [22]:
addition = predict_multiple_models(addition, models, on_top, inplace=True)

for target_variable in models.keys():
    addition[target_variable] = addition[target_variable + "_model_prediction"]
    
addition['data_source'] = 'hungary_quotes'

In [24]:
data_pl, fi, fo, fm = load_manager.load_data('mubi_v23') 
data_pl = data_pl[fi + fo + fm + list(pricing_config.target_variables_and_model_config.keys())]
data_pl = data_pl[~((data_pl['vehicle_maker'] == 'FORD') & (data_pl['vehicle_model'] == 'Fiesta') & (data_pl['vehicle_infoexpert_version'].isna()) & (data_pl['contractor_birth_date'].astype(str) == '1984.01.01'))]
data_pl = data_pl[~data_pl['UNIQA-(OC),(NNW),(Assistance=75 km PL,After breakdown,Replacement vehicle)-price'].isnull()]

2025-03-03 11:17:04,210 - INFO - Imported data...
2025-03-03 11:17:04,211 - INFO - Imported feature data...


In [25]:
data_pl = predict_multiple_models(data_pl, models, on_top, inplace=True)
data_pl['data_source'] = 'crawled_data'

In [26]:
data_dtypes = dict(zip(data_pl.columns, data_pl.dtypes))
sim_data = pd.concat([data_pl, addition])
sim_data = sim_data.astype(data_dtypes)

In [27]:
sim_data[sim_data['data_source'] == 'crawled_data'].filter(like = 'MTU')

Unnamed: 0,MTU24-(OC)-price,MTU24-(OC)-price_model_prediction
0,928.0,925.534082
2,1349.0,1349.016598
3,1339.0,1358.041608
4,873.0,860.729625
5,1215.0,1255.552385
...,...,...
15401,842.0,870.132627
15402,1325.0,1326.706183
15403,1614.0,1611.147115
15404,1190.0,1188.245796


In [28]:
gc.collect()

150

In [30]:
api_configs = {
    'base_config' : pricing_config
}

for uniqa_weight in [0.76]:
    for top_k in [3]:
        for undercut_factor in [0.93, 0.95, 0.99]:
            current_pricing_config = pricing_config.model_copy(deep=True)
            pricing_config.tp_kernel[0]['weight'] = uniqa_weight
            sum_rest = sum([x['weight'] for x in current_pricing_config.tp_kernel[1:]])
            for i in range(1, len(current_pricing_config.tp_kernel)):
                current_pricing_config.tp_kernel[i]['weight'] = round((current_pricing_config.tp_kernel[i]['weight'] / sum_rest) * (1 - uniqa_weight), 3)
            current_pricing_config.rank1_undercut_factor = undercut_factor
            current_pricing_config.tp_take_top_k = top_k
            api_configs[f'Uniqa weight={uniqa_weight},Undercut Factor={undercut_factor},Take TP top {top_k}'] = current_pricing_config

In [31]:
api_configs[f'Uniqa weight=0.76,Undercut Factor=0.93,Take TP top 3'].tp_kernel

[{'target_variable': 'UNIQA-(OC),(NNW),(Assistance=75 km PL,After breakdown,Replacement vehicle)-price',
  'cost_estimate': 0.93,
  'weight': 0.76},
 {'target_variable': 'MTU24-(OC)-price',
  'cost_estimate': 0.89,
  'weight': 0.16},
 {'target_variable': 'ALLIANZ-(OC)-price',
  'cost_estimate': 0.89,
  'weight': 0.04},
 {'target_variable': 'LINK4-(OC),(Assistance=100 km PL,Replacement vehicle)-price',
  'cost_estimate': 0.87,
  'weight': 0.04}]

In [32]:
base_pricing_config = ac.ApiConfig.load_from_json('mubi').pricing_config['OMINIMO-(OC)-price']

In [33]:
price_cols_wo_ominimo = sorted([col for col in data_pl if col.endswith('-price')])
insurers_wo_ominimo = sorted([col.split('-')[0] for col in price_cols_wo_ominimo])

In [38]:
bas = {
    'Without Ominimo' : apl.create_basic_analysis(pl.from_pandas(sim_data), insurers_wo_ominimo, price_cols_wo_ominimo, has_conversion_data=False, country='pl').to_pandas()
}

for name, current_api_config in list(api_configs.items())[:]:
    
    calculated_prices = prl.calculate_price(sim_data.copy(), current_pricing_config)

    price_cols = sorted([col for col in calculated_prices if col.endswith('-price')])
    insurers = sorted([col.split('-')[0] for col in price_cols]) 
    
    ba = apl.create_basic_analysis(pl.from_pandas(calculated_prices), insurers, price_cols, has_conversion_data = False, country = 'pl')
    ba_crawled = apl.create_basic_analysis(pl.from_pandas(calculated_prices).filter(pl.col('data_source') == 'crawled_data'), insurers, price_cols, has_conversion_data = False, country = 'pl')
    
    bas[name] = ba.to_pandas()
    
    bas[name + ", crawled_data_only"] = ba_crawled.to_pandas()
    
    gc.collect()
    

In [39]:

reversed_dict = {
    "Contractor Age : Under 18" : True,
    "Contractor Age : 18-24" : True,
    "Contractor Age : Over 75" : True,
    "Number of Damages : 1 damage" : True,
    "Number of Damages : 2 damages" : True,
    "Number of Damages : 3 damages" : True,
    "Driver Experience : 0" : True,
    "Driver Experience : 1" : True,
    "Vehicle Power : 201+" : True,
    "Postal Code Population Density : Dense" : True
}
ea.export_workbook(bas, "sim_data.xlsx", reversed_dict)

