In [30]:
import pandas as pd
import DataLoading
import torch
from sklearn.model_selection import train_test_split

In [2]:
def load_data_ext_out(folder,executions,period,sc,il_os = None):
    dfs_in = dict()
    dfs_out = dict()
    dfs_inter = dict()
    for execution in executions:
        # Read the data from desired execution
        df_in_e = pd.read_csv(f"{folder}/input_f_{sc}_{execution}_{period}.csv", header=[0],index_col=0)
        df_out_e = pd.read_csv(f"{folder}/output_f_{sc}_{execution}_{period}_SystemCosts.csv", header=[0],index_col=0)
        
        print(f"input_f_{sc}_{execution}_{period}.csv")

        # And order the variables:

        print(len(df_in_e.columns))
        for col in df_out_e.columns:
            df_out_e[col] = df_out_e[col].astype(float)
        for col in df_in_e.columns:
            df_in_e[col] = df_in_e[col].astype(float)

        
        if il_os != None:
            dfs_ilo = dict()
            for il_o in il_os:
                df_inter = pd.read_csv(f"{folder}/output_f_{sc}_{execution}_{period}_{il_o}.csv", header=[0],index_col=0)
                for col in df_inter.columns:
                    df_inter[col] = df_inter[col].astype(float)
                dfs_ilo[il_o]=df_inter
            dfs_inter[execution] = dfs_ilo
        
        dfs_in[execution] = df_in_e
        dfs_out[execution] = df_out_e
        
    return dfs_in,dfs_out,dfs_inter

In [71]:
def split_tr_val_te_ext_out(dfs_in,dfs_out,dfs_inter_j,executions,te_s,val_s):
    ts_in = dict()
    ts_out = dict()
    ts_inter = dict()

    ts_in["train"] = dict()
    ts_in["test"] = dict()
    ts_in["val"] = dict()

    ts_out["train"] = dict()
    ts_out["test"] = dict()
    ts_out["val"] = dict()
    
    ts_inter["train"] = dict()
    ts_inter["test"] = dict()
    ts_inter["val"] = dict()

    # Test size as fraction of full dataset, validation size as fraction of training data set
    test_size, validation_size = te_s, val_s
    
    for execution in executions:
        
        # Convert input dataframes numpy arrays sum the columns of the output:
        np_in = dfs_in[execution].to_numpy()
        np_out = dfs_out[execution].to_numpy().sum(axis=1)
        np_inter = dfs_inter_j[execution].to_numpy()        
                                  
        # We don't normalize the separate runs, but will do it afterward, all together

        # Convert to torch tensors
        t_in = torch.from_numpy(np_in)
        t_out = torch.from_numpy(np_out)
        t_inter = torch.from_numpy(np_inter)

        # And split into train, validation, and test set:
        train_in, ts_in["test"][execution], train_out, ts_out["test"][execution],train_inter,ts_inter["test"][execution] = train_test_split(t_in, t_out,t_inter,
                                                                                                    test_size=test_size,
                                                                                                    shuffle=False)
        ts_in["train"][execution], ts_in["val"][execution], ts_out["train"][execution], ts_out["val"][
            execution],ts_inter["train"][execution], ts_inter["val"][
            execution] = train_test_split(train_in, train_out,train_inter, test_size=validation_size, shuffle=False)
    return ts_in,ts_out,ts_inter


In [83]:
def concat_and_normalize_ext_out(ts_in,ts_out,ts_inter,executions):
    # concatenate all the training and testing sets to a single tensor, and normalize:
    first = True
    for execution in executions:
        if first:
            tr_in = ts_in["train"][execution]
            tr_out = ts_out["train"][execution]
            tr_inter = ts_inter["train"][execution]
            
            te_in = ts_in["test"][execution]
            te_out = ts_out["test"][execution]
            te_inter = ts_inter["test"][execution]
            
            val_in = ts_in["val"][execution]
            val_out = ts_out["val"][execution]
            val_inter = ts_inter["val"][execution]
            first = False
        else:
            tr_in = torch.cat((tr_in, ts_in["train"][execution]))
            tr_out = torch.cat((tr_out, ts_out["train"][execution]))
            tr_inter = torch.cat((tr_inter, ts_inter["train"][execution]))

            
            te_in = torch.cat((te_in, ts_in["test"][execution]))
            te_out = torch.cat((te_out, ts_out["test"][execution]))
            te_inter = torch.cat((te_inter, ts_inter["test"][execution]))
            
            val_in = torch.cat((val_in, ts_in["val"][execution]))
            val_out = torch.cat((val_out, ts_out["val"][execution]))
            val_inter = torch.cat((val_inter, ts_inter["val"][execution]))
            
    maxs=dict()
    maxs["in"] = torch.cat((tr_in, te_in, val_in)).abs().max(dim=0).values
    maxs["inter"] = torch.cat((tr_inter, te_inter, val_inter)).abs().max(dim=0).values
    # maxs_te = te_in.abs().max(dim = 0).values
    
    tr_in = torch.nan_to_num(tr_in / maxs["in"])
    te_in = torch.nan_to_num(te_in / maxs["in"])
    val_in = torch.nan_to_num(val_in / maxs["in"])
    
    tr_inter = torch.nan_to_num(tr_inter / maxs["inter"])
    te_inter = torch.nan_to_num(te_inter / maxs["inter"])
    val_inter = torch.nan_to_num(val_inter / maxs["inter"])

    d_ft_in = {"train": tr_in,"val": val_in,"test": te_in}
    d_ft_out = {"train": tr_out,"val": val_out,"test": te_out}
    d_ft_inter = {"train": tr_inter,"val": val_inter,"test": te_inter}


    return d_ft_in,d_ft_out,d_ft_inter


In [19]:
def join_frames_inter_layer(dfs_inter):
    dfs_inter_j = dict()
    for execution in dfs_inter.keys(): 
        dfs_inter_j[execution] = pd.concat([dfs_inter[execution][t] for t in dfs_inter[execution].keys()],axis=1)
    return dfs_inter_j

In [13]:
dfs_inter["Network_Existing_Generation_Full"]


Unnamed: 0_level_0,101_CT_1,101_CT_2,101_PV_1,101_PV_2,101_PV_3,101_PV_4,101_STEAM_3,101_STEAM_4,102_CT_1,102_CT_2,...,N_116_N_119_eac1,N_117_N_118_eac1,N_117_N_122_eac1,N_118_N_121_eac1,N_118_N_121_eac2,N_119_N_120_eac1,N_119_N_120_eac2,N_120_N_123_eac1,N_120_N_123_eac2,N_121_N_122_eac1
LoadLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01-01 00:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.218225,1.048046e-13,-1.048046e-13,-0.126601,0.000000,0.149412,0.000000,-0.313527,0.414275,1.028067e-13
01-01 01:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.362954,-1.261298e-02,-1.942157e-01,0.000000,-0.139305,0.000000,0.294092,0.000000,0.245394,0.000000e+00
01-01 02:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.566124,-5.128005e-01,-1.522577e-01,0.111183,-0.752714,0.000000,0.496153,0.446671,0.000000,1.522577e-01
01-01 03:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.576701,-6.517065e-01,-9.281058e-02,-0.784878,0.000000,0.504317,0.000000,0.000000,0.453128,0.000000e+00
01-01 04:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.958394,-8.900097e-01,0.000000e+00,0.000000,-1.014065,-1.038019,0.000000,-1.094328,0.000000,-1.857127e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12-09 19:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.546701,-6.287569e-01,-4.863786e-01,0.000000,-0.785065,-0.636252,0.000000,-0.699581,0.000000,-3.694580e-01
12-09 20:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.443962,-4.946630e-02,-6.749629e-01,-0.209100,0.000000,0.000000,-0.530729,-0.592090,0.000000,-1.808066e-01
12-09 21:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.364393,0.000000e+00,-4.486570e+00,-0.146719,0.000000,-0.444141,0.000000,-0.500538,0.000000,3.632750e+00
12-09 22:00:00+01:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.845543,1.340774e-01,-1.019884e+00,0.000000,0.000000,0.000000,0.772666,0.000000,0.721129,2.014181e-01


In [86]:
executions = ["Network_Existing_Generation_Full"]

sc = "sc01"
period = "2030"
folder = "../Data/RTS24_AC_12w_ext_o"
te_s = 0.1
val_s = 0.2

dfs_in,dfs_out,dfs_inter = load_data_ext_out(folder,executions,period,sc,["PowerOutput","PowerFlow"])
dfs_inter_j = join_frames_inter_layer(dfs_inter)
ts_in,ts_out,ts_inter = split_tr_val_te_ext_out(dfs_in,dfs_out,dfs_inter_j,executions,te_s,val_s)
d_ft_in, d_ft_out,d_ft_inter = concat_and_normalize_ext_out(ts_in,ts_out,ts_inter,executions)


input_f_sc01_Network_Existing_Generation_Full_2030.csv
1203


In [None]:
train = TensorDataset(d_ft_in['train'].float(), d_ft_out['train'].float())
validation = TensorDataset(d_ft_in['val'].float(), d_ft_out['val'].float())

training_loader = DataLoader(train,batch_size=32)
validation_loader = DataLoader(train,batch_size=32)


In [85]:
ts_in,ts_out =  DataLoading.split_tr_val_te(dfs_in,dfs_out,executions,te_s,val_s)
ts_in_2,ts_out_2,ts_inter = split_tr_val_te_ext_out(dfs_in,dfs_out,dfs_inter_j,executions,te_s,val_s)

In [61]:
key = "val"
key2 = "Network_Existing_Generation_Full"
for key in ["train", "test","val"]:
    print(key,"in", torch.equal(ts_in[key][key2],ts_in_2[key][key2]))
    print(key,"out", torch.equal(ts_out[key][key2],ts_out_2[key][key2]))

train in True
train out True
test in True
test out True
val in True
val out True


In [75]:
ts_inter["train"]["Network_Existing_Generation_Full"].shape,ts_out["train"]["Network_Existing_Generation_Full"].shape,ts_in["train"]["Network_Existing_Generation_Full"].shape

(torch.Size([1572, 89]), torch.Size([1572]), torch.Size([1572, 1203]))

In [45]:
ts_in["train"].keys()

dict_keys(['Network_Existing_Generation_Full'])