In [31]:
import pandas as pd

# Load datasets
client_data = pd.read_csv('clean_data_after_eda.csv')
price_data = pd.read_csv('Price_Data.csv')

# Feature Engineering on client_data
client_data['activation_year'] = pd.to_datetime(client_data['date_activ']).dt.year
client_data['activation_month'] = pd.to_datetime(client_data['date_activ']).dt.month
client_data['activation_day'] = pd.to_datetime(client_data['date_activ']).dt.day

client_data['end_year'] = pd.to_datetime(client_data['date_end']).dt.year
client_data['end_month'] = pd.to_datetime(client_data['date_end']).dt.month
client_data['end_day'] = pd.to_datetime(client_data['date_end']).dt.day

client_data['contract_duration'] = (pd.to_datetime(client_data['date_end']) - pd.to_datetime(client_data['date_activ'])).dt.days
client_data['time_since_last_modif'] = (pd.to_datetime('today') - pd.to_datetime(client_data['date_modif_prod'])).dt.days
client_data['time_to_renewal'] = (pd.to_datetime(client_data['date_renewal']) - pd.to_datetime('today')).dt.days

client_data['cons_ratio'] = client_data['cons_last_month'] / client_data['cons_12m']
client_data['total_cons'] = client_data['cons_12m'] + client_data['cons_gas_12m']
client_data['forecast_accuracy'] = client_data['forecast_cons_12m'] / client_data['cons_12m']

client_data['price_diff_peak'] = client_data['forecast_price_energy_peak'] - client_data['forecast_price_energy_off_peak']
client_data['price_diff_pow'] = client_data['forecast_price_pow_off_peak'] - client_data['forecast_price_energy_off_peak']

client_data['gross_net_margin_diff'] = client_data['margin_gross_pow_ele'] - client_data['margin_net_pow_ele']
client_data['avg_products_per_year'] = client_data['nb_prod_act'] / client_data['num_years_antig']

# Feature Engineering on price_data
price_data['peak_to_off_peak_ratio'] = price_data['price_peak_var'] / price_data['price_off_peak_var']
price_data['mid_peak_to_off_peak_ratio'] = price_data['price_mid_peak_var'] / price_data['price_off_peak_var']
price_data['price_diff_var'] = price_data['price_peak_var'] - price_data['price_off_peak_var']
price_data['price_diff_fix'] = price_data['price_peak_fix'] - price_data['price_off_peak_fix']

# Total energy cost
client_data['total_energy_cost'] = (
    client_data['forecast_price_energy_off_peak'] * client_data['cons_12m'] +
    client_data['forecast_price_energy_peak'] * client_data['cons_12m']
)

# Energy price ratio
client_data['energy_price_ratio'] = (
    client_data['forecast_price_energy_peak'] / client_data['forecast_price_energy_off_peak']
)

# One-hot encode 'origin_up' and 'channel_sales' columns
client_data = pd.get_dummies(client_data, columns=['origin_up', 'channel_sales'])

# Save the encoded dataset
client_data.to_csv('encoded_client_data.csv', index=False)

# Merging datasets
combined_data = pd.merge(client_data, price_data, on='id')

# Save the transformed dataset
combined_data.to_csv('transformed_data.csv', index=False)

In [32]:
combined_data.head()

Unnamed: 0,id,cons_12m,cons_gas_12m,cons_last_month,date_activ,date_end,date_modif_prod,date_renewal,forecast_cons_12m,forecast_cons_year,...,price_off_peak_var,price_peak_var,price_mid_peak_var,price_off_peak_fix,price_peak_fix,price_mid_peak_fix,peak_to_off_peak_ratio,mid_peak_to_off_peak_ratio,price_diff_var,price_diff_fix
0,24011ae4ebbe3035111d65fa7c15bc57,0,54946,0,2013-06-15,2016-06-15,2015-11-01,2015-06-23,0.0,0,...,0.125976,0.103395,0.071536,40.565969,24.339581,16.226389,0.820752,0.567854,-0.022581,-16.226389
1,24011ae4ebbe3035111d65fa7c15bc57,0,54946,0,2013-06-15,2016-06-15,2015-11-01,2015-06-23,0.0,0,...,0.125976,0.103395,0.071536,40.565969,24.339581,16.226389,0.820752,0.567854,-0.022581,-16.226389
2,24011ae4ebbe3035111d65fa7c15bc57,0,54946,0,2013-06-15,2016-06-15,2015-11-01,2015-06-23,0.0,0,...,0.125976,0.103395,0.071536,40.565973,24.339578,16.226383,0.820752,0.567854,-0.022581,-16.226395
3,24011ae4ebbe3035111d65fa7c15bc57,0,54946,0,2013-06-15,2016-06-15,2015-11-01,2015-06-23,0.0,0,...,0.125976,0.103395,0.071536,40.565973,24.339578,16.226383,0.820752,0.567854,-0.022581,-16.226395
4,24011ae4ebbe3035111d65fa7c15bc57,0,54946,0,2013-06-15,2016-06-15,2015-11-01,2015-06-23,0.0,0,...,0.125976,0.103395,0.071536,40.565973,24.339578,16.226383,0.820752,0.567854,-0.022581,-16.226395
