## Combining and preparing final data

- The goal here is to combine the data so that we end with the following information:
    - **Z**: base features
    - **ZS**: base features + MeSH-based symptoms
    - **T0**: original text
    - **T1**: t5-small
    - **T2**: bart large cnn
    - **T3**: medical summarization

    - **ZST0**: base + MeSH + original text
    - **ZST1**: base + MeSH + t5-small
    - **ZST2**: base + MeSH + bart large cnn
    - **ZST3**: base + MeSH + medical summarization

    


In [9]:
import numpy as np
import pickle
from common import *

# FUNCTIONS
def save_list_as_pickle(L, given_path, file_name):
    # Ensure the directory exists
    if not os.path.exists(given_path):
        os.makedirs(given_path)
        print(f'\tDirectory created: {given_path}')
    
    # Save the list as a pickle file
    print(f'\tSaving to {given_path}/{file_name}.pkl')
    with open(os.path.join(given_path, f'{file_name}.pkl'), 'wb') as file:
        pickle.dump(L, file)

def classify_los_3_classes(los_list):
    return [0 if los < 3 else 1 if 3 <= los <= 7 else 2 for los in los_list]

print('Reading the data ...')
XB = load_dict_from_pickle('../../Data/XY/XB.pkl')
XD = load_dict_from_pickle('../../Data/XY/XD.pkl')
XL = load_dict_from_pickle('../../Data/XY/XL.pkl')
XM = load_dict_from_pickle('../../Data/XY/XM.pkl')
XP = load_dict_from_pickle('../../Data/XY/XP.pkl')
XS = load_dict_from_pickle('../../Data/XY/XS.pkl')

T0 = load_dict_from_pickle('../../Data/XY/EMB_TEXT.pkl')
T1 = load_dict_from_pickle('../../Data/XY/EMB_1_t5_small2_SUMMARY.pkl')
T2 = load_dict_from_pickle('../../Data/XY/EMB_3_bart_large_cnn_SUMMARY.pkl')
T3 = load_dict_from_pickle('../../Data/XY/EMB_4_medical_summarization_SUMMARY.pkl')

Z = np.concatenate((XB, XD, XL, XM, XP), axis=1)
ZS = np.concatenate((Z, XS), axis=1)
ZST0 = np.concatenate((ZS, T0), axis=1)
ZST1 = np.concatenate((ZS, T1), axis=1)
ZST2 = np.concatenate((ZS, T2), axis=1)
ZST3 = np.concatenate((ZS, T3), axis=1)

print('Saving ...')

save_list_as_pickle(Z, '../../Data/XY', 'Z')
save_list_as_pickle(ZS, '../../Data/XY', 'ZS')
save_list_as_pickle(T0, '../../Data/XY', 'T0')
save_list_as_pickle(T1, '../../Data/XY', 'T1')
save_list_as_pickle(T2, '../../Data/XY', 'T2')
save_list_as_pickle(T3, '../../Data/XY', 'T3')
save_list_as_pickle(ZST0, '../../Data/XY', 'ZST0')
save_list_as_pickle(ZST1, '../../Data/XY', 'ZST1')
save_list_as_pickle(ZST2, '../../Data/XY', 'ZST2')
save_list_as_pickle(ZST3, '../../Data/XY', 'ZST3')

print(ZST3.shape)


Y = load_dict_from_pickle('../../Data/XY/LoS.pkl')
newY = classify_los_3_classes(Y)
save_list_as_pickle(newY, '../../Data/XY', 'Y')

Reading the data ...
Saving ...
	Saving to ../../Data/XY/Z.pkl
	Saving to ../../Data/XY/ZS.pkl
	Saving to ../../Data/XY/T0.pkl
	Saving to ../../Data/XY/T1.pkl
	Saving to ../../Data/XY/T2.pkl
	Saving to ../../Data/XY/T3.pkl
	Saving to ../../Data/XY/ZST0.pkl
	Saving to ../../Data/XY/ZST1.pkl
	Saving to ../../Data/XY/ZST2.pkl
	Saving to ../../Data/XY/ZST3.pkl
(42142, 2676)
