In [11]:
from pathlib import Path
import pandas as pd
import numpy as np
import yaml

class Backup:
    def __init__(self):
        self.data_dict = {}

    def save_with_key(self, data, key, copy=False):
        if key in self.data_dict.keys():
            print(f'Data Key "f{key}" already exists, Skip Saving')
        else:
            self.data_dict[key] = data.copy() if copy else data

    def check_key(self, key):
        return key in self.data_dict.keys()

    def copy_data_with_key(self, key):
        return self.data_dict[key].copy()
    
backup = Backup()

with open('../configs/base.yaml', 'r', encoding='utf-8') as file:
    base_config = yaml.safe_load(file)

base_output_path = '/home/minchan/electric/output/xgboost'

### Ensemble Path
ensemble_list = [ # 'name', ver, date_dir, time_dir
                ('ens1:', '0626_re', '24-6-28', '11-56-42'),
                ('ens2:', '0627_re', '24-6-28', '11-38-19')
]

names = [name for name, _, _, _ in ensemble_list]
### ### ### ### ##

result_paths = {}
for name, ver, date_dir, time_dir in ensemble_list:
    result_paths[name] = Path(base_output_path+f'_{ver}', date_dir, time_dir)

In [12]:
def npz_path_to_dict(npz_path):
    npz = np.load(npz_path)
    return {key: npz[key] for key in npz.files}

total_answers = {}
for name, result_path in result_paths.items():
    test_answers_path = Path(result_path, 'test_answers.npz')
    test_answers_dict = npz_path_to_dict(test_answers_path)
    # give name prefix to keys
    test_answers_dict = {name+key: value for key, value in test_answers_dict.items()}
    total_answers.update(test_answers_dict)

In [13]:
ensemble_weights = {
    'ens1:val_year:2020': 0.0833333,
    'ens1:val_year:2021': 0.0833333,
    'ens1:val_year:2022': 0.0833333,
    'ens1:total': 0.25,
    'ens2:val_year:2020': 0.0833333,
    'ens2:val_year:2021': 0.0833333,
    'ens2:val_year:2022': 0.0833333,
    'ens2:total': 0.25,
}

ensemble_weights2 = {
    'ens1:total': 0.5,
    'ens2:total': 0.5, 
}

In [14]:
## --> full ensemble; submitted as 0627-1
ensemble = np.zeros_like(list(total_answers.values())[0])
for key, value in total_answers.items():
    if key in ensemble_weights.keys():
        ensemble += value * ensemble_weights[key]

In [15]:
ensemble

array([ 98.594864,  87.22595 ,  79.83209 , ..., 127.43308 , 119.246704,
       105.11684 ], dtype=float32)

In [16]:
final_answers = ensemble #ensemble # total_answers['ens1:val_year:2020']

___

### Final Submission

In [17]:
reload = True

if not backup.check_key('submit_format'):
    submit_format = pd.read_csv(Path('/media/user/h/minchan/elec_data/assets', 'electric_test_raw.csv'))
    backup.save_with_key(submit_format, 'submit_format')
elif reload:
    submit_format = backup.copy_data_with_key('submit_format')

In [18]:
submit_format['elect'] = ensemble

In [19]:
submit_format.head()

Unnamed: 0,NUM,TM,HH24,STN,nph_ta,nph_hm,nph_ws_10m,nph_rn_60m,nph_ta_chi,weekday,week_name,elect
0,4816,2023-01-01 01:00:00,1,752,3.0,68.6,2.9,0.0,-0.1,6,1.0,98.594864
1,4816,2023-01-01 02:00:00,2,752,3.1,69.4,2.7,0.0,0.3,6,1.0,87.225952
2,4816,2023-01-01 03:00:00,3,752,3.6,68.3,2.3,0.0,1.2,6,1.0,79.832092
3,4816,2023-01-01 04:00:00,4,752,4.0,69.2,3.1,0.0,1.1,6,1.0,76.121529
4,4816,2023-01-01 05:00:00,5,752,4.2,69.5,2.5,0.0,2.0,6,1.0,74.160797


In [20]:
submit_format.to_csv(Path('./240228.csv'), index=False)