In [7]:
import pandas as pd
import os


# Allow pretty-display of multiple variables
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Adjust the way pandas DataFrames are displayed to fit more columns
pd.reset_option('display.max_colwidth')
pd.options.display.max_columns = 60
# pd.options.display.max_colwidth=5
# pd.set_option('display.max_row', 1000)


#
# user-defined CONSTANTS
#
YEAR = 2017
LEAP_YEAR = False
VERBOSE = True

#
# global CONSTANTS
#
if not LEAP_YEAR: 
    HOURS_IN_ONE_YEAR = 8760
    DAYS_IN_FEB = 28
else: 
    HOURS_IN_ONE_YEAR = 8760 + 24
    DAYS_IN_FEB = 29
    
DAYS_IN_JAN = 31

# DateTimeIndex for resampling time series
START_DATE = str(YEAR) + '-01-01' 
DTI15 = pd.date_range(START_DATE, periods=4*HOURS_IN_ONE_YEAR, freq='15min')
DTI30 = pd.date_range(START_DATE, periods=2*HOURS_IN_ONE_YEAR, freq='30min')
DTI60 = pd.date_range(START_DATE, periods=HOURS_IN_ONE_YEAR, freq='60min')

# DST starts March 26, 2017 / March 25, 2018 / March 31, 2019
DST_MARCH_START_DAY = {
    2017 : 26,
    2018 : 25,
    2019 : 31
}


GEN_FREQ = {
    'AT' : 15, 
    'DE' : 15,
    'CY' : 30,
    'BE' : 60
}
#
# Actual Generation per Prod Type / Gen Forecasts
# 15MIN: AT, DE, HU, NL 
# 30MIN: CY, IE, UK
# 60MIN: BE, BG, HR, CY, CZ, DK, EE, FI, FR, GR, IT, LV, LT, PL, PT, RO, SK, SI, ES, SE
#


LOAD_FREQ = {
    'AT' : 15,
    'BE' : 15,
    'DE' : 15,
    'CY' : 30
}
#
# Total Load
# 15MIN: AT, BE, DE
# 30MIN: CY,


# Landlocked countries have no offshore wind
LANDLOCKED = {
    'AT' : True,
    'BE' : False,
    'DE' : False,
    'CY' : False
}




#
# define class
#
        
#country = "DE"  # todo: iterate thru list of EU28 countries


def load_data_from_csv(country = "DE") :

    # directory or folder where raw data files reside grouped by country
    path = "data" + os.sep + str(YEAR) + os.sep + country + os.sep 
    tail = str(YEAR) + "01010000-" + str(YEAR+1) + "01010000.csv"
    
    vre_forecast_csv_filename = path + "Generation Forecasts for Wind and Solar_" + tail
    print("Loading", vre_forecast_csv_filename)
    vre_forecast = pd.read_csv(vre_forecast_csv_filename, usecols=[0,1,4,7])
    vre_forecast.columns = ['Time','Forecast.Solar','Forecast.Offshore','Forecast.Onshore']
    drop_dst_rows(vre_forecast, GEN_FREQ[country])
    if (LANDLOCKED[country]) : vre_forecast['Forecast.Offshore'] = 0 
    if (GEN_FREQ[country] < 60) : 
        vre_forecast = convert_to_hourly(vre_forecast, GEN_FREQ[country]) 
    
    #vre_forecast.head(4) #print(vre_forecast)
    
    actual_generation_csv_filename = path + "Actual Generation per Production Type_" + tail
    print("Loading", actual_generation_csv_filename)
    actual_generation = pd.read_csv(actual_generation_csv_filename, usecols=[19,21,22])
    actual_generation.columns = ['Actual.Solar', 'Actual.Offshore', 'Actual.Onshore']
    if (LANDLOCKED[country]) : actual_generation['Actual.Offshore'] = 0
    drop_dst_rows(actual_generation, GEN_FREQ[country])
    if (GEN_FREQ[country] < 60) : 
        actual_generation = convert_to_hourly(actual_generation, GEN_FREQ[country])
    
    total_load_csv_filename = path + "Total Load - Day Ahead _ Actual_" + tail
    print("Loading", total_load_csv_filename)
    total_load = pd.read_csv(total_load_csv_filename, usecols=[1,2])
    total_load.columns = ['Forecast.Load', 'Actual.Load']
    drop_dst_rows(total_load, LOAD_FREQ[country])
    if (GEN_FREQ[country] < 60) : 
        total_load = convert_to_hourly(total_load, GEN_FREQ[country])
    #total_load.head(4) #print("Len =", len(total_load)) #print(HOURS_IN_ONE_YEAR*4)

    return pd.concat([vre_forecast, actual_generation, total_load], axis=1)


# TODO: modify function to return index of first empty DST row
def drop_empty_rows(df):
    
    empty_rows = df[df['Solar'].isnull() & df['Offshore'].isnull() & df['Onshore'].isnull()]
    if (len(empty_rows) > 0) : 
        print("Found empty rows", empty_rows) 
        print("Drop emtpy rows? y/n")
        yes = input()
        if (yes) :        
            df.drop(empty_rows.index, inplace=True)
            #df[df['Solar'].isnull() & df['Offshore'].isnull() & df['Onshore'].isnull()]
    return


#
# Handle DST by deleting the extra (empty) row(s)
# The DST start date and time is hard-coded to save time
# and because we are only concerned with 2017, 2018, and 2019
#
def drop_dst_rows(df, freq):

    #print("DST start date is March ", DST_MARCH_START_DATE[YEAR])
    start_row = 24 * (DAYS_IN_JAN + DAYS_IN_FEB + (DST_MARCH_START_DAY[YEAR] - 1))
    start_row = start_row + 2   # 2am                   
    if (freq == 15) : # 15 minute intervals
        start_row = start_row * 4
        end_row = start_row + 4
    elif (freq == 30) : # 30 minute intervals
        start_row = start_row * 2
        end_row = start_row + 2
    elif (freq == 60) : # hourly intervals
        end_row = start_row + 1
    # else : # raise exception - invalid frequency
    
    if (VERBOSE) : print(df[start_row:end_row])
    df.drop(df.index[start_row:end_row], inplace=True)    
        
    return

def convert_to_hourly(df, freq):

    if (freq == 15) : # 15 minute intervals
        df.index = DTI15
    elif (freq == 30) : # 30 minute intervals
        df.index = DTI30
        
    return df.resample('H').mean()   
    

hourly_data = load_data_from_csv('AT')
#big_table.head(64)

#big_table.dropna(axis=0, thresh=8, inplace=True)
#print("Len =", len(big_table))
#big_table[7976:7980]

#if (len(big_table) != (HOURS_IN_ONE_YEAR*4)) :
#    print("Sanity check failed")

#null_data = hourly_data[hourly_data.isnull().any(axis=1)]
#null_data = df[df.isnull().any(axis=1)]

# Combine Onshore and Offshore
hourly_data.insert(4, 'Forecast.Wind', hourly_data['Forecast.Offshore'] + hourly_data['Forecast.Onshore'])
hourly_data.drop(['Forecast.Offshore','Forecast.Onshore'], axis=1, inplace=True) #hourly_data.head(4)
hourly_data.insert(3, 'Actual.Wind', hourly_data['Actual.Offshore'] + hourly_data['Actual.Onshore'])
hourly_data.drop(['Actual.Offshore','Actual.Onshore'], axis=1, inplace=True) #hourly_data.head(4)

# Calc 1. VRE, 2. Residual Load, 3. Delta Residual Load
hourly_data.insert(4, 'Forecast.VRE', hourly_data['Forecast.Solar'] + hourly_data['Forecast.Wind'])
hourly_data.insert(5, 'Actual.VRE', hourly_data['Actual.Solar'] + hourly_data['Actual.Wind'])
hourly_data['Forecast.ResLoad'] = hourly_data['Forecast.Load'] - hourly_data['Forecast.VRE']
hourly_data['Actual.ResLoad'] = hourly_data['Actual.Load'] - hourly_data['Actual.VRE']
hourly_data['Delta.ResLoad'] = hourly_data['Actual.ResLoad'] - hourly_data['Forecast.ResLoad']

# add column index
#hourly_data['Hour'] = list(hourly_data.index)

# critical hours
hourly_data.sort_values(by=['Delta.ResLoad'],ascending=False)



Loading data/2017/AT/Generation Forecasts for Wind and Solar_201701010000-201801010000.csv
                                     Time  Forecast.Solar Forecast.Offshore  \
8072  26.03.2017 02:00 - 26.03.2017 02:15             NaN               NaN   
8073  26.03.2017 02:15 - 26.03.2017 02:30             NaN               NaN   
8074  26.03.2017 02:30 - 26.03.2017 02:45             NaN               NaN   
8075  26.03.2017 02:45 - 26.03.2017 03:00             NaN               NaN   

      Forecast.Onshore  
8072               NaN  
8073               NaN  
8074               NaN  
8075               NaN  
Loading data/2017/AT/Actual Generation per Production Type_201701010000-201801010000.csv
      Actual.Solar  Actual.Offshore  Actual.Onshore
8072           NaN                0             NaN
8073           NaN                0             NaN
8074           NaN                0             NaN
8075           NaN                0             NaN
Loading data/2017/AT/Total Load - Day A

Unnamed: 0,Forecast.Solar,Actual.Solar,Forecast.Wind,Actual.Wind,Forecast.VRE,Actual.VRE,Forecast.Load,Actual.Load,Forecast.ResLoad,Actual.ResLoad,Delta.ResLoad
2017-12-24 07:00:00,8.50,8.50,2076.75,2114.75,2085.25,2123.25,4057.00,6294.75,1971.75,4171.50,2199.75
2017-06-28 13:00:00,525.50,525.50,2159.25,1210.75,2684.75,1736.25,7509.75,8616.00,4825.00,6879.75,2054.75
2017-12-24 08:00:00,26.50,26.50,2017.00,2177.75,2043.50,2204.25,4434.50,6642.00,2391.00,4437.75,2046.75
2017-06-28 12:00:00,572.75,572.75,2178.50,1255.50,2751.25,1828.25,7539.25,8653.00,4788.00,6824.75,2036.75
2017-06-28 16:00:00,266.50,266.50,2111.25,1192.25,2377.75,1458.75,7341.50,8449.75,4963.75,6991.00,2027.25
2017-06-28 15:00:00,349.75,349.75,2132.00,1285.25,2481.75,1635.00,7339.00,8502.25,4857.25,6867.25,2010.00
2017-06-28 14:00:00,442.00,442.00,2144.50,1284.00,2586.50,1726.00,7465.50,8565.50,4879.00,6839.50,1960.50
2017-06-28 11:00:00,585.50,585.50,2082.00,1230.50,2667.50,1816.00,7538.00,8596.00,4870.50,6780.00,1909.50
2017-06-28 17:00:00,187.75,187.75,2013.00,1131.00,2200.75,1318.75,7266.00,8273.00,5065.25,6954.25,1889.00
2017-06-23 15:00:00,352.25,352.25,1534.75,432.25,1887.00,784.50,7126.25,7882.50,5239.25,7098.00,1858.75
