In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df_results = pd.read_csv("../results/stats/stats_2019_z2_Floor6_m3_months_validation.csv", index_col=[0])
df_results.head()

In [None]:
df_results = df_results.fillna(0) 

In [None]:
set_columns = df_results.columns
print(set_columns)

In [None]:
df_new = pd.DataFrame()

month_classification = {
    3: 'Winter',
    4: 'Spring',
    5: 'Spring',
    6: 'Spring',
    7: 'Summer',
    8: 'Summer',
    9: 'Summer',
    10: 'Fall'
}

f = 20

dict_result = {}
dict_result['history'] = f
dict_result['future'] = f
selected_f_h = df_results.loc[(df_results.history == f) & (df_results.future == f), df_results.columns[4:]]
selected_f_h.index = range(selected_f_h.shape[0])

for col in selected_f_h:
    value = selected_f_h.loc[0, col]
    split_col = col.split("_")
    month = int(split_col[0].split("-")[1])
    bin_index = int(split_col[1].split("-")[1])
    
    dict_result["month"] = month
    dict_result['season'] = month_classification[month]
    dict_result['bin'] = bin_index
    dict_result['value'] = value
    
    df_new = pd.concat([df_new, pd.DataFrame.from_dict([dict_result])])
df_new.index = range(df_new.shape[0])

In [None]:
sns.set_theme(style = 'darkgrid')

fig, ax = plt.subplots(figsize=  (20, 7))
bar = sns.barplot(data = df_new, x = 'season', y='value', hue='bin', ax = ax)
hatches = ['|', '-', 'x', '\\', '//', '+']

for i,thisbar in enumerate(bar.patches):
    if i <= 3 and i >= 0:
        thisbar.set_hatch(hatches[0])
    elif (i >= 4) and (i <= 7): 
        thisbar.set_hatch(hatches[1])
    elif (i >= 8) and (i <= 11): 
        thisbar.set_hatch(hatches[2])
    elif (i >= 12) and (i <= 15): 
        thisbar.set_hatch(hatches[3])
    elif (i >= 16) and (i <= 19): 
        thisbar.set_hatch(hatches[4])
    else:
        thisbar.set_hatch(hatches[5])
        
ax.set_xlabel("Season", fontsize=40)
ax.set_ylabel("MAE", fontsize=40)
plt.tick_params(labelsize = 35)
plt.legend(fontsize=25, title="Bins", title_fontsize = 30, fancybox = True)
plt.title(f"MAE for Prediction Window  = {f}", fontsize = 45)
plt.savefig(f"../results/figures/bin_season_{f}.png", bbox_inches='tight', pad_inches=0)
plt.savefig(f"../results/figures/bin_season_{f}.eps", bbox_inches='tight', pad_inches=0)

In [None]:
df_wo_fall = pd.read_csv("../results/stats/stats_2019_z2_h-10_Floor6_m3_month-10_feature_imp.csv", index_col=[0])
df_wo_fall.head()

In [None]:
df_wo1_summer = pd.read_csv("../results/stats/stats_2019_z2_h-10_Floor6_m3_month-7_feature_imp.csv", index_col = [0])
df_wo2_summer = pd.read_csv("../results/stats/stats_2019_z2_h-10_Floor6_m3_month-8_feature_imp.csv", index_col = [0])
df_wo3_summer = pd.read_csv("../results/stats/stats_2019_z2_h-10_Floor6_m3_month-9_feature_imp.csv", index_col = [0])

In [None]:
combined_df = pd.concat([df_wo_fall, df_wo1_summer,df_wo2_summer,df_wo3_summer], axis = 1)
combined_df.columns = ['importance_wo_fall', 'importance_wo1_summer','importance_wo2_summer','importance_wo3_summer']
combined_df.head()

In [None]:
combined_df['perc_diff1'] = (combined_df.importance_wo_fall.values - combined_df.importance_wo1_summer.values) / (combined_df.importance_wo1_summer.values)
combined_df['perc_diff2'] = (combined_df.importance_wo_fall.values - combined_df.importance_wo2_summer.values) / (combined_df.importance_wo2_summer.values)
combined_df['perc_diff3'] = (combined_df.importance_wo_fall.values - combined_df.importance_wo3_summer.values) / (combined_df.importance_wo3_summer.values)

In [None]:
combined_df['perc_diff1'] = combined_df['perc_diff1'] * 100
combined_df['perc_diff2'] = combined_df['perc_diff3'] * 100
combined_df['perc_diff3'] = combined_df['perc_diff3'] * 100

In [None]:
combined_df.head()

In [None]:
sorted_df = combined_df.sort_values(by='importance_wo_fall', ascending=False)

In [None]:
sorted_df.head()

In [None]:
sorted_df['perc_change'] = sorted_df[['perc_diff1', 'perc_diff2', 'perc_diff3']].mean(axis = 1)
sorted_df.head(22)

In [None]:
np.sum(sorted_df.loc[sorted_df.index[:22], 'importance_wo_fall'])

In [None]:
sns.set_theme(style = 'whitegrid')

fig, ax = plt.subplots(figsize = (18, 10))

bars = sorted_df.loc[sorted_df.index[:6], 'perc_change'].sort_values().plot(kind ='barh')
ax.tick_params(labelsize= 30)
ax.set_xlabel("")

In [None]:
sns.set_theme(style = 'whitegrid')

fig, ax = plt.subplots(figsize = (18, 10))

# bars = sorted_df.loc[sorted_df.index[:6], 'perc_change'].sort_values().plot(kind ='barh')
bars = ax.barh(sorted_df.loc[sorted_df.index[:6], 'perc_change'].sort_values().index, sorted_df.loc[sorted_df.index[:6], 'perc_change'].sort_values())
# bars = sorted_df.loc[sorted_df.index[:6], 'perc_change'].sort_values().plot(kind ='barh')
ax.bar_label(bars, fmt='%.2f', fontsize=25, color='b', padding=5, rotation=-90)
ax.tick_params(labelsize= 30)
ax.set_xlabel("Percentage Difference", fontsize=35)
ax.set_ylabel("Features", fontsize=35)
ax.set_title("Percentage differences of top 6 features", fontsize= 40)
plt.tight_layout()
plt.savefig(f"../results/figures/importance_differences.png", bbox_inches='tight', pad_inches=0)
plt.savefig(f"../results/figures/importance_differences.eps", bbox_inches='tight', pad_inches=0)