In [1]:
import os, glob, csv, bz2, pickle

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib as mpl

from itertools import combinations
import itertools

from time import time
from matplotlib import rc

from sklearn.metrics import confusion_matrix, jaccard_score, f1_score, accuracy_score, balanced_accuracy_score

# # #rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
# rc('font',**{'family':'serif','serif':['Computer Modern']})
# rc('text', usetex = True)

plt.rcParams['axes.facecolor'] = 'silver'

# Shyncronize Vatic reports with clnSim scenarios

In [25]:
# This is the path to data (stract inside Sim* files)
path_to_vatic  = r"/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/"
path_to_data   = r"/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/"
path_to_fdepth = r"/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/FunctionalDepthsTexas/"

## Step 1: processing Vatic reports

In [26]:
# Load vatic summary for each scenario in given date
def _load_vatic_summary(path):
    idx_ = []
    summaries_ = []
    for file_ in glob.glob(path + r'/*'):
        
        idx_.append(int(file_[50 + file_[50:].find('_s') + 2:-11]))
        summaries_.append(pd.read_pickle(file_).to_numpy()[..., np.newaxis])

    return np.concatenate(summaries_, axis = 2)[..., np.argsort(np.array(idx_))]


# Load clnSim compressed scenarios file
def _load_clnSim_scen(date, path):
    with bz2.BZ2File(path + 'scens_{}.p.gz'.format(date), 'r') as f:
        return pickle.load(f)
    
# Format scenarios file per cohort
def _get_assets_in_cohort(scen_, cohorts_):
    return [np.concatenate([scen_[cohort].to_numpy()[:, i*24:(i + 1)*24][..., np.newaxis] 
                     for i in range(int(scen_[cohort].shape[1]/24))], axis = 2) for cohort in cohorts_]
        
# Find All Simulations per Dates/Folders
files_ = glob.glob(path_to_vatic + r'VaticReportTexas/*')
for file_ in files_:
    print(file_)
    
    # Crate directory if it does not exist
    if os.path.exists(path_to_vatic + pd.to_datetime(file_[-10:]).strftime("%b%d")):

        Summary_ = _load_vatic_summary(file_)[:, [1, 2, 8, 5], :]
        print(Summary_.shape)
        df_ = pd.DataFrame(np.sum(Summary_[:, ...], axis = 0).T, columns = ['GenerationCostAll', 
                                                                             'LoadSheddingAll', 
                                                                             'RenewableCurtailmentAll', 
                                                                             'ReserveShortfallAll'])
        df_.to_csv(path_to_vatic + r'ProcessedDataTexas/' + pd.to_datetime(file_[-10:]).strftime("%b%d") + '/VaticOutput.csv', index = False)

/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-07-22
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-10-04
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-08-08
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-07-24
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-03-14
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-02-13
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-02-14
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-01-02
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-11-02
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-12-01
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/VaticReportTexas/2018-05-27

## Step 2: processing clnSim scenarios to aggregated-level

In [27]:
# Load csv avoiding errors in the headers
def __read_csv_file(file_name):
    with open(file_name) as _file:
        rows_ = []
        for row in csv.reader(_file, delimiter = ','):
            row = np.asarray(row)[:, np.newaxis]
            rows_.append(row)
    rows_.pop(0)
    return np.concatenate(rows_, axis = 1).T

L_, S_, W_, G_, N_, Y_ = [], [], [], [], [], []

# Find All Simulations per Dates/Folders
files_ = glob.glob(path_to_data + r'*')
for file in files_:
    print(file)
            
    L_.append( __read_csv_file(file + '/LoadScenariosAggregated.csv').astype(np.float_)[..., np.newaxis] )
    S_.append( __read_csv_file(file + '/SolarScenariosAggregated.csv').astype(np.float_)[..., np.newaxis] )
    W_.append( __read_csv_file(file + '/WindScenariosAggregated.csv').astype(np.float_)[..., np.newaxis] )
    G_.append( S_[-1] + W_[-1] )
    N_.append( L_[-1] - S_[-1] - W_[-1] )
    Y_.append( __read_csv_file(file + '/VaticOutput.csv').astype(np.float_)[..., np.newaxis] )

L_ = np.concatenate(L_, axis = -1)
S_ = np.concatenate(S_, axis = -1)
W_ = np.concatenate(W_, axis = -1)
G_ = np.concatenate(G_, axis = -1)
N_ = np.concatenate(N_, axis = -1)
Y_ = np.concatenate(Y_, axis = -1)
print(L_.shape, S_.shape, W_.shape, G_.shape, N_.shape, Y_.shape)

dates_ = ['2018-02-14', '2018-02-13', '2018-08-08', '2018-06-04', '2018-05-25',
          '2018-11-13', '2018-09-14', '2018-04-24', '2018-05-10', '2018-04-01',
          '2018-12-27', '2018-04-09', '2018-07-22', '2018-07-24', '2018-03-14',
          '2018-01-02', '2018-06-30', '2018-02-26', '2018-12-01', '2018-10-17',
          '2018-11-02', '2018-10-02', '2018-09-04', '2018-08-18', '2018-01-20']

idx_dates_ = np.argsort(dates_)
print(np.array(dates_)[idx_dates_])

with open(path_to_fdepth + 'ProcessedAggregatedData.pkl', 'wb') as handle:
    pickle.dump([L_[..., idx_dates_], S_, W_[..., idx_dates_], G_[..., idx_dates_], N_[..., idx_dates_], Y_[..., idx_dates_]], handle, protocol = pickle.HIGHEST_PROTOCOL)

/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Feb14
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Feb13
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Aug08
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Jun04
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/May27
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Nov13
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Sep14
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Apr24
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/May10
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Apr01
/Users/Guille/Dropbox/ProcessedDataTexas/vatic_output/Texas-7k/ProcessedDataTexas/Dec27
/Users/Guille/Dropbox/ProcessedD

## Step 3: processing clnSim scenarios to zonal-level

In [28]:
# Vatic reports
L_ = []
S_ = []
W_ = []
dates_ = []
# Loop over files in directory
for file_ in glob.glob(path_to_vatic + 'ProcessedScenTexas/' + '*'):
    print(file_[-14:-4])
    dates_.append(file_[-14:-4])
    # Load data
    with open(file_, 'rb') as _f:
        scen_ = pickle.load(_f)
    # Split data per asset
    load_data_, load_zones_, _              = scen_[0]
    solar_data_, solar_zones_, solar_names_ = scen_[1]
    wind_data_, wind_zones_, wind_names_    = scen_[2]
    load_zones_                             = np.array(load_zones_)
    # Processing data
    solar_ = []
    wind_  = []
    for zone in load_zones_:
        idx_ = solar_zones_ == zone

        if idx_.sum() == 0:
            solar_.append(np.zeros((1, 1000, 24)))
        else:
            solar_.append(np.sum(solar_data_[idx_, ...], axis = 0)[np.newaxis, ...])

        idx_ = wind_zones_ == zone
        if idx_.sum() == 0:
            wind_.append(np.zeros((1, 1000, 24)))
        else:
            wind_.append(np.sum(wind_data_[idx_, ...], axis = 0)[np.newaxis, ...])
            
    # Formated data
    L_.append(load_data_)
    S_.append(np.swapaxes(np.concatenate(solar_, axis = 0), 1, 2))
    W_.append(np.swapaxes(np.concatenate(wind_, axis = 0), 1, 2))
    
idx_dates_ = np.argsort(dates_)
print(np.array(dates_)[idx_dates_])

L_ = np.swapaxes(np.swapaxes(np.swapaxes(np.stack(L_), 0, -1), 1, 2), 1, 0)[..., idx_dates_]
S_ = np.swapaxes(np.swapaxes(np.swapaxes(np.stack(S_), 0, -1), 1, 2), 1, 0)[..., idx_dates_]
W_ = np.swapaxes(np.swapaxes(np.swapaxes(np.stack(W_), 0, -1), 1, 2), 1, 0)[..., idx_dates_]
print(L_.shape, S_.shape, W_.shape, Y_.shape)
    

# Save processed data
with open(path_to_fdepth + 'ProcessedZonalTexas.pkl', 'wb') as _f:
    pickle.dump([L_, S_, W_, S_ + W_, L_ - S_ - W_, Y_], _f, protocol = pickle.HIGHEST_PROTOCOL)

2018-05-10
2018-09-04
2018-02-26
2018-12-27
2018-09-14
2018-11-13
2018-01-02
2018-08-18
2018-04-24
2018-06-30
2018-03-14
2018-04-09
2018-08-08
2018-11-02
2018-01-20
2018-04-01
2018-06-04
2018-12-01
2018-10-04
2018-02-13
2018-07-22
2018-05-27
2018-10-17
2018-07-24
2018-02-14
['2018-01-02' '2018-01-20' '2018-02-13' '2018-02-14' '2018-02-26'
 '2018-03-14' '2018-04-01' '2018-04-09' '2018-04-24' '2018-05-10'
 '2018-05-27' '2018-06-04' '2018-06-30' '2018-07-22' '2018-07-24'
 '2018-08-08' '2018-08-18' '2018-09-04' '2018-09-14' '2018-10-04'
 '2018-10-17' '2018-11-02' '2018-11-13' '2018-12-01' '2018-12-27']
(24, 1000, 8, 25) (24, 1000, 8, 25) (24, 1000, 8, 25) (1000, 4, 25)
