# 1 Imports and inputs

### 1.1 Import packages

In [1]:
import io, os, sys, types
import datetime
import glob
import gdxpds as gp
import pandas as pd
import numpy as np

### 1.2 User inputs

In [2]:
filetype_input = 'gdx' #Choose input file type: 'gdx' or 'csv'
gams_dir = 'C:/GAMS/win64/28.2'
markets = ['DayAhead'] #Choose from: 'DayAhead', 'Balancing', 'FullYear', 'Investment'
output_name = 'TrialRunsOctober'
output_type = ['csv', 'Excel'] #Choose desired output type, from: 'Excel' or 'csv' (or both)

### 1.3 Read which are relevant variables + files

In [3]:
# select which variables should be displayed
include_file = pd.read_csv('.\input\include.csv', encoding='utf8')

# drop all variables that shall NOT be included and set the index to the
# variable names
include_file = include_file[include_file.include == 'YES']
del include_file['include']
include_file = include_file.set_index('variable')

# list of all variable names
var_list = list(include_file.index)

### 1.4 Create column names dictionary

In [4]:
df_colnames = pd.read_csv('.\input\Dict_column_names.csv')
dict_colnames = dict(zip(df_colnames['Old'], df_colnames['New']))

# 2 Processing

### 2.1 Function 1: reading gdx-files

In [5]:
def df_creation(gdx_file, varname):
    df = pd.DataFrame()
    if '_' in gdx_file:
            # if yes: extract scenario name from gdx filename
        scenario = gdx_file.split('_', 3)[-3]
        year = gdx_file.split('_', 3)[-2]
        subset = gdx_file.split('_', 3)[-1][:-4]
        market = gdx_file.split('\\', 1)[0].split('/',3)[-1]
    else:
           # if no: use nan instead
        scenario = 'nan'

    # create empty temporary dataframe and load the gdx data into it
    temp = pd.DataFrame()
    temp = gp.to_dataframe(gdx_file, varname, gams_dir=gams_dir,
                           old_interface=False)

    # add a scenario column with the scenario name of the current iteration
    temp['Scenario'] = scenario
    temp['Market']  = market
    temp['run'] = scenario + '_' + year + '_' + subset
    
    # rearrange the columns' order
    cols = list(temp.columns)
    cols = [cols[-1]] + cols[:-1]
    temp = temp[cols]

    # concatenate the temporary dataframe to the preceeding data
    df = pd.concat([df, temp], sort=False)
    return df

### 2.2 Use function 1 to read data

In [6]:
runs = list()
gdx_file_list = list()

# directory to the input gdx file(s)
for market in markets:
    gdx_file_list = gdx_file_list + glob.glob('./input/results/'+ market + '/*.gdx')

all_df = {gdx_file : {varname: df for varname, df in zip(var_list,var_list)} for gdx_file in gdx_file_list}


for gdx_file in gdx_file_list:
    for varname, df in zip(var_list, var_list):
        all_df[gdx_file][varname] = df_creation(gdx_file, varname)
        if all_df[gdx_file][varname]['run'][0] not in runs:
            runs.append(all_df[gdx_file][varname]['run'][0])
            
run_dict = dict(zip(gdx_file_list, runs) )
all_df = dict((run_dict[key], value) for (key, value) in all_df.items())


### 2.3 Function 2: column aggregation

In [7]:
def column_aggregator(input_df, settings):
    temp = input_df.copy()
    output_df = pd.DataFrame()
    if len(list(settings.columns)) > 0:
        condition = list(settings.columns)[0]
    else:
        condition = ''
   
    #Single condition
    if 'List_condition' in condition:
        info = list(settings.iloc[:,0])
        info = [x for x in info if str(x) != 'nan']
        column_in = settings[info[0]][0]
        key_list = list(settings[info[1]])
        key_list = [x for x in key_list if str(x) != 'nan']
        column_out = settings[info[2]][0]
        out_value_list = list(settings[info[3]])
        out_value_list = [x for x in out_value_list if str(x) != 'nan']
        temp.loc[temp[column_in].isin(key_list), column_out] = ''
        for val in out_value_list:
            if val[0:7] == 'column_':
                temp.loc[temp[column_in].isin(key_list), column_out] = \
                temp.loc[temp[column_in].isin(key_list), column_out] + temp[val[7:]]
            if val[0:7] == 'string_':
                temp.loc[temp[column_in].isin(key_list), column_out] = \
                temp.loc[temp[column_in].isin(key_list), column_out] + val[7:]       

    if 'Dict_condition' in condition:
        info = list(settings.iloc[:,0])
        info = [x for x in info if str(x) != 'nan']
        column_in = settings[info[0]][0]
        key_list = list(settings[info[1]]).copy()
        key_list = [x for x in key_list if str(x) != 'nan']
        column_out = settings[info[2]][0]
        out_value_list = list(settings[info[3]]).copy()
        out_value_list = [x for x in out_value_list if str(x) != 'nan']
        dict_value_list = [value[5:] for value in out_value_list if 'dict_' in value]
        str_value = str([value[7:] for value in out_value_list if 'string_' in value])[2:-2]
        if len(dict_value_list) > 0:
            dictionary = dict(zip(key_list, dict_value_list))
            for i,row in temp.iterrows():
                if temp.loc[i,column_in] in key_list:
                    temp.loc[i,column_out] = dictionary[temp.loc[i,column_in]] + str_value
    
    if 'na_condition' in condition:
        info = list(settings.iloc[:,0])    
        info = [x for x in info if str(x) != 'nan']
        column_out = settings[info[0]][0]
        out_value_list = list(settings[info[1]])
        out_value_list = [x for x in out_value_list if str(x) != 'nan']
        temp.loc[temp[column_out].isna(), column_out] = ''
        for val in out_value_list:
            if val[0:7] == 'column_':
                temp.loc[temp[column_out]=='', column_out] = \
                temp.loc[temp[column_out]=='', column_out] + temp[val[7:]]
            if val[0:7] == 'string_':
                temp.loc[temp[column_out]=='', column_out] = \
                temp.loc[temp[column_out]=='', column_out] + val[7:]  



    #Two conditions
    if 'List_list_condition' in condition:
        info = list(settings.iloc[:,0])    
        info = [x for x in info if str(x) != 'nan']
        column_in1 = settings[info[0]][0]
        key_list1 = list(settings[info[1]])
        key_list1 = [x for x in key_list1 if str(x) != 'nan']
        
        column_in2 = settings[info[2]][0]
        key_list2 = list(settings[info[3]])
        key_list2 = [x for x in key_list2 if str(x) != 'nan']
        
        column_out = settings[info[4]][0]
        out_value_list = list(settings[info[5]])
        out_value_list = [x for x in out_value_list if str(x) != 'nan']
        
        temp.loc[(temp[column_in1].isin(key_list1)) & (temp[column_in2].isin(key_list2)), column_out ] = ''
        for val in out_value_list:
            if val[0:7] == 'column_':
                temp.loc[(temp[column_in1].isin(key_list1)) & (temp[column_in2].isin(key_list2)), column_out ] = \
                temp.loc[(temp[column_in1].isin(key_list1)) & (temp[column_in2].isin(key_list2)), column_out ] + temp[val[7:]]
            if val[0:7] == 'string_':
                temp.loc[(temp[column_in1].isin(key_list1)) & (temp[column_in2].isin(key_list2)), column_out ] = \
                temp.loc[(temp[column_in1].isin(key_list1)) & (temp[column_in2].isin(key_list2)), column_out ] + val[7:]      


    if 'Dict_list_condition' in condition:
        info = list(settings.iloc[:,0])    
        info = [x for x in info if str(x) != 'nan']
        column_in1 = settings[info[0]][0]
        key_list1 = list(settings[info[1]]).copy()
        key_list1 = [x for x in key_list1 if str(x) != 'nan']
        
        column_in2 = settings[info[2]][0]
        key_list2 = list(settings[info[3]]).copy()
        key_list2 = [x for x in key_list2 if str(x) != 'nan']
        
        column_out = settings[info[4]][0]
        out_value_list = list(settings[info[5]]).copy()
        out_value_list = [x for x in out_value_list if str(x) != 'nan']
        
        dict_value_list = [value[5:] for value in out_value_list if 'dict_' in value]
        str_value = str([value[7:] for value in out_value_list if 'string_' in value])[2:-2]
        if len(dict_value_list) > 0:
            dictionary = dict(zip(key_list1, dict_value_list))
            for i,row in temp.loc[temp[column_in2].isin(key_list2), ].iterrows():
                if temp.loc[i,column_in1] in key_list1:
                    temp.loc[i,column_out] = dictionary[temp.loc[i,column_in1]]   + str_value     
    
    if 'na_list_condition' in condition:
        info = list(settings.iloc[:,0])    
        info = [x for x in info if str(x) != 'nan']
        column_in = settings[info[0]][0]
        key_list = list(settings[info[1]])
        key_list = [x for x in key_list if str(x) != 'nan']
                
        column_out = settings[info[2]][0]
        out_value_list = list(settings[info[3]])
        out_value_list = [x for x in out_value_list if str(x) != 'nan']

        temp.loc[(temp[column_out].isna()) & (temp[column_in].isin(key_list)), column_out] = ''
        for val in out_value_list:
            if val[0:7] == 'column_':
                temp.loc[temp[column_out]=='', column_out] = \
                temp.loc[temp[column_out]=='', column_out] + temp[val[7:]]
            if val[0:7] == 'string_':
                temp.loc[temp[column_out]=='', column_out] = \
                temp.loc[temp[column_out]=='', column_out] + val[7:]      

    if 'na_dict_condition' in condition:
        info = list(settings.iloc[:,0])    
        info = [x for x in info if str(x) != 'nan']
        column_in = settings[info[0]][0]
        key_list = list(settings[info[1]])
        key_list = [x for x in key_list if str(x) != 'nan']
        
        column_out = settings[info[2]][0]
        out_value_list = list(settings[info[3]])
        out_value_list = [x for x in out_value_list if str(x) != 'nan']

        dict_value_list = [value[5:] for value in out_value_list if 'dict_' in value]
        str_value = str([value[7:] for value in out_value_list if 'string_' in value])[2:-2]
        if len(dict_value_list) > 0:
            dictionary = dict(zip(key_list, dict_value_list))
            for i,row in temp.loc[temp[column_out].isna(), ].iterrows():
                if temp.loc[i,column_in] in key_list:
                    temp.loc[i,column_out] = dictionary[temp.loc[i,column_in]] + str_value


    output_df = output_df.append(temp)
    return output_df

### 2.4 Use function 2 to create output dataframes

In [8]:
df_dict={i:pd.DataFrame() for i in var_list}
for var in var_list:
    if os.path.isfile('.\input\settings\settings_'+ var  +'.xlsx'):
        df_test = pd.read_excel('.\input\settings\settings_'+ var  +'.xlsx', None)
    else:
        df_test = pd.DataFrame()
        
    sheets = list(df_test.keys())
    for run in runs:
        temp = all_df[run][var]
        for sheet in sheets:
            temp = column_aggregator(input_df = temp, settings = df_test[sheet])
        df_dict[var] = df_dict[var].append(temp)

### 2.5 Change column names

In [9]:
for var in var_list:
    cols = list(df_dict[var].columns)
    cols.insert(0,cols[cols.index('Market')])
    cols.insert(1,cols[cols.index('Scenario')])
    cols.remove('run')
    temp = pd.DataFrame()
    for i in cols:
        temp[i] = df_dict[var][i]
    df_dict[var] = temp
    df_dict[var] = df_dict[var].rename(columns = dict_colnames)

# 3 Output to CSV

In [10]:
#Make output folder
if not os.path.isdir('output'):
    os.makedirs('output')

if 'csv' in output_type:
    for var in var_list:
        df_dict[var].to_csv('./output/' + output_name + '_' + var + '.csv', index = False)
             
if 'Excel' in output_type:
    with pd.ExcelWriter('./output/'+ output_name + '.xlsx') as writer:  
        for var in var_list:
            df_dict[var].to_excel(writer, sheet_name= var, index = False)
        