In [1]:
import pandas as pd
import plotly.express as px


# Combination results for 2024

In [17]:
pred_col = 'TARGET_VARIABLE'
true_col = 'Value(true)'

In [18]:
no_annual_fp = 'outputs_offline_2024/useCoolerV_False_useActiveSp_False_useCO2_False_useHumidity_False_useCtrlBldg_False/submission.csv'
no_annual_df = pd.read_csv(no_annual_fp)
no_annual_df.rename(columns={pred_col: 'no_annual', true_col: 'label'}, inplace=True)
no_annual_df.head()

Unnamed: 0,ID,no_annual,label
0,2024-06-01_00:00:00,9.635131,10.793529
1,2024-06-01_00:10:00,9.621962,10.806132
2,2024-06-01_00:20:00,9.608439,10.749218
3,2024-06-01_00:30:00,9.595465,10.699082
4,2024-06-01_00:40:00,9.583726,10.877648


In [19]:
with_cooler_valves_fp = 'outputs_offline_2024/useCoolerV_True_useActiveSp_False_useCO2_False_useHumidity_False_useCtrlBldg_False/submission.csv'
with_cooler_valves_df = pd.read_csv(with_cooler_valves_fp)
with_cooler_valves_df.rename(columns={pred_col: 'with_cooler_valves'}, inplace=True)
#with_cooler_valves_df.head()

In [20]:
with_coolerv_activesp_fp = 'outputs_offline_2024/useCoolerV_True_useActiveSp_True_useCO2_False_useHumidity_False_useCtrlBldg_False/submission.csv'
with_coolerv_activesp_df = pd.read_csv(with_coolerv_activesp_fp)
with_coolerv_activesp_df.rename(columns={pred_col: 'with_coolerv_activesp'}, inplace=True)
#with_coolerv_activesp_df.head()

In [21]:
with_coolerv_activesp_co2_fp = 'outputs_offline_2024/useCoolerV_True_useActiveSp_True_useCO2_True_useHumidity_False_useCtrlBldg_False/submission.csv'
with_coolerv_activesp_co2_df = pd.read_csv(with_coolerv_activesp_co2_fp)
with_coolerv_activesp_co2_df.rename(columns={pred_col: 'with_coolerv_activesp_co2'}, inplace=True)
#with_coolerv_activesp_co2_df.head()

In [22]:
prediction_df = no_annual_df.copy()
prediction_df = pd.merge(prediction_df, with_cooler_valves_df[['ID', 'with_cooler_valves']], on='ID', how='outer')
prediction_df = pd.merge(prediction_df, with_coolerv_activesp_df[['ID', 'with_coolerv_activesp']], on='ID', how='outer')
prediction_df = pd.merge(prediction_df, with_coolerv_activesp_co2_df[['ID', 'with_coolerv_activesp_co2']], on='ID', how='outer')
prediction_df['annual_and_cooler_combi'] = 0.8 * prediction_df['no_annual'] + 0.2 * prediction_df['with_cooler_valves']
prediction_df['annual_and_coolerv_activesp_combi'] = 0.8 * prediction_df['no_annual'] + 0.2 * prediction_df['with_coolerv_activesp']
prediction_df['annual_and_coolerv_activesp_co2_combi'] = 0.8 * prediction_df['no_annual'] + 0.2 * prediction_df['with_coolerv_activesp_co2']
prediction_df.head()

Unnamed: 0,ID,no_annual,label,with_cooler_valves,with_coolerv_activesp,with_coolerv_activesp_co2,annual_and_cooler_combi,annual_and_coolerv_activesp_combi,annual_and_coolerv_activesp_co2_combi
0,2024-06-01_00:00:00,9.635131,10.793529,10.13941,10.444136,10.000911,9.735986,9.796932,9.708287
1,2024-06-01_00:10:00,9.621962,10.806132,10.221801,10.469741,10.006223,9.74193,9.791518,9.698814
2,2024-06-01_00:20:00,9.608439,10.749218,10.230701,10.362116,9.992249,9.732891,9.759174,9.685201
3,2024-06-01_00:30:00,9.595465,10.699082,10.238467,10.290839,9.98661,9.724065,9.73454,9.673694
4,2024-06-01_00:40:00,9.583726,10.877648,10.243085,10.224933,9.991155,9.715598,9.711967,9.665212


In [23]:
melted_df = prediction_df.melt(id_vars=['ID'], value_vars=prediction_df.drop(columns=['ID']).columns, var_name='prediction', value_name='value')
melted_df.head()

Unnamed: 0,ID,prediction,value
0,2024-06-01_00:00:00,no_annual,9.635131
1,2024-06-01_00:10:00,no_annual,9.621962
2,2024-06-01_00:20:00,no_annual,9.608439
3,2024-06-01_00:30:00,no_annual,9.595465
4,2024-06-01_00:40:00,no_annual,9.583726


In 2024,
- model is not responsive for weekends!!!

In [24]:
px.line(melted_df, x='ID', y='value', color='prediction', title='No Annual Features vs True Values')

In [25]:
from datetime import datetime

def get_rmse_for_period(df: pd.DataFrame, start_date: datetime, end_date: datetime, label_col: str = 'label', id_col: str = 'ID') -> float:
    mask = (df[id_col] >= start_date) & (df[id_col] < end_date)
    filtered_df = df.loc[mask]
    prediction_cols = [col for col in filtered_df.columns if col not in [id_col, label_col]]
    performance = {}
    for pred_col in prediction_cols:
        mse = ((filtered_df[pred_col] - filtered_df[label_col]) ** 2).mean()
        rmse = mse ** 0.5
        performance[pred_col] = rmse
    return performance

periods = [
    (datetime(2024, 6, 1), datetime(2024, 7, 1)),
    (datetime(2024, 7, 1), datetime(2024, 8, 1)),
]

prediction_df['ID'] = pd.to_datetime(prediction_df['ID'], format='%Y-%m-%d_%H:%M:%S')
period_results = {}
for start_date, end_date in periods:
    rmse_results = get_rmse_for_period(prediction_df, start_date, end_date)
    period_key = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"
    period_results[period_key] = rmse_results
results_df = pd.DataFrame(period_results)
results_df['mean_rmse'] = results_df.mean(axis=1)
results_df

Unnamed: 0,2024-06-01 to 2024-07-01,2024-07-01 to 2024-08-01,mean_rmse
no_annual,2.58795,2.409865,2.498908
with_cooler_valves,2.002534,1.501136,1.751835
with_coolerv_activesp,1.632386,1.1715,1.401943
with_coolerv_activesp_co2,1.951887,1.904257,1.928072
annual_and_cooler_combi,2.392297,2.025747,2.209022
annual_and_coolerv_activesp_combi,2.224775,1.932432,2.078603
annual_and_coolerv_activesp_co2_combi,2.358021,2.28358,2.320801


In [26]:
results_df.rank()

Unnamed: 0,2024-06-01 to 2024-07-01,2024-07-01 to 2024-08-01,mean_rmse
no_annual,7.0,7.0,7.0
with_cooler_valves,3.0,2.0,2.0
with_coolerv_activesp,1.0,1.0,1.0
with_coolerv_activesp_co2,2.0,3.0,3.0
annual_and_cooler_combi,6.0,5.0,5.0
annual_and_coolerv_activesp_combi,4.0,4.0,4.0
annual_and_coolerv_activesp_co2_combi,5.0,6.0,6.0
