In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
from pandas.core.frame import DataFrame
%matplotlib inline

In [2]:
# read csv files
master_df = pd.read_csv('ieso_ga_master_dataset_allWeather_updated2020.csv')

In [3]:
forecast_df = pd.read_csv('ga_forecasts_top_2.csv')

In [4]:
def pre_master_dataset(master_df):
# Load and copy data
    df = master_df.copy()
# cleaning and processing data
    df.rename(columns={'timestamp_eastern': 'ts'}, inplace=True)
    temp = pd.DatetimeIndex(df['ts'])
    df['Date'] = temp.date
    df['Time'] = temp.time
    df['hourss'] = temp.hour
    df['year'] = temp.year
    df['month'] = temp.month
    df['ts'] = pd.to_datetime(df['ts'])
    cols_to_keep = ['season','adjusted_demand_MW','demand_MW','hour_ending_eastern','ts']
    df = df[cols_to_keep]
    df['ts'] = pd.to_datetime(df['ts'])
    df['rankings_per_day'] = df.groupby(['season', df.ts.dt.date]).demand_MW.rank(ascending=False)
    df = df[df['rankings_per_day'] == 1]
    df.reset_index(drop=True, inplace=True)
# Group by season and rank by adjusted_demand_MW
    df['rankings_per_season'] = df.groupby(['season']).demand_MW.rank(ascending=False)
    return df 

In [5]:
def pre_forecast_dataset(forecast_df):
    df3 = forecast_df.copy()
    temp = pd.DatetimeIndex(df3['ts'])
    df3['Date'] = temp.date
    df3['Time'] = temp.time
    df3['hourss'] = temp.hour
    df3['year'] = temp.year
    df3['month'] = temp.month
    df3['ts'] = pd.to_datetime(df3['ts'])
    df3['ts_future'] = pd.to_datetime(df3['ts_future'])
    cols_to_keep = ['ts', 'ts_future','forecast']
    df3 = df3[cols_to_keep]
    df_ten = df3[(df3.ts.dt.time == datetime.time(10,0))]
    df_ten = df_ten[['forecast','ts','ts_future']]
    df_ten.rename(columns={'ts': 'ts_main'}, inplace=True)
    df_ten.rename(columns={'ts_future': 'ts'}, inplace=True)
    df_ten = df_ten[['forecast','ts']]
    return df_ten

In [6]:
def merge_forecast_top_priority(master_df,forecast_df):
    dff = pre_master_dataset(master_df) 
    dff2 = pre_forecast_dataset(forecast_df)
    df_merge = dff.merge(dff2,on='ts')
    df_merge['predict']=(df_merge.forecast>0).astype(int)
    return df_merge

In [7]:
def top_priority(master_df,forecast_df):
    num = [1,3,5,10,20]
    new = []
    data = merge_forecast_top_priority(master_df,forecast_df)
    df = data.copy()  
    for i in df.groupby('season'):
        for j in num:
            df = df[(df.rankings_per_season <= j)]
        new.append(df)
    return new

In [None]:
def extract_topN_forecast(ts,n):
    df3 = forecast_df.copy()
    temp = pd.DatetimeIndex(df3['ts'])
    df3['Date'] = temp.date
    df3['Time'] = temp.time
    df3['hourss'] = temp.hour
    df3['year'] = temp.year
    df3['month'] = temp.month
    df3['ts'] = pd.to_datetime(df3['ts'])   
    df3['ts_future'] = pd.to_datetime(df3['ts_future'])
    cols_to_keep = ['ts', 'ts_future','forecast']
    
    df3 = df3[cols_to_keep]
    df_ten = df3[(df3.ts.dt.time == datetime.time(10,0))]
    df_ten = df_ten[['forecast','ts','ts_future']]  
    df_ten2 = df_ten[(df_ten['ts_future'] == ts)]
    ts_10=df_ten2['ts'].max()
    te=df_ten2['forecast'].max()
    
    df_ten=df_ten[(df_ten['ts']==ts_10)]
    nlar=df_ten.nlargest(n, 'forecast') 
    df = nlar['forecast']
    df_s = df.sum()
    dff = (1-df_s)/n
    nlar['pro'] = nlar['forecast'].apply(lambda x: (x+ dff))   
    result= nlar[(nlar['ts_future']==ts)] 
    
    if len(result['pro']) == 0:
        return 0
    df_p = result['pro'].max()
    
    if df_p >= 0.5:
        return (0.5/0.5)*100
    else:
        p = int((df_p/0.5)*100)
        return p

In [8]:
def Accuracy(master_df,forecast_df):
    num = [1,3,5,10,20]
    new = []
    data = merge_forecast_top_priority(master_df,forecast_df)
    df = data.copy()
 
    for i in df.groupby('season'):
        for j in num:
            tmp=df[(df['season']==i[0]) & (df['rankings_per_season']<=j)]          
            test=tmp.forecast.apply(lambda x: (x>0))
            df=df[df['rankings_per_season']<=20]
            dff_uni=pd.unique(df['hour_ending_eastern'])
            
            count=0
            for k,v in test.items():
                if v==True:
                    count+=1

            row={
                'season': i[0],
                'top_n' : j,
                'success': count,
                'Hit rate': count/j,

            }
            performance = 0
            for peak_hr in dff_uni:
    
                df_peak_hr = tmp[tmp['hour_ending_eastern']== peak_hr]
                sum=0
                ct=0
                for z in df_peak_hr['ts']:
                    ct+=1
                    sum += extract_topN_forecast(z,3)
                ave=0
                if ct!=0:
                    ave= int(sum/ct)
                performance += int(ave* df_peak_hr.predict.sum())           
                row[peak_hr] =f'{df_peak_hr.predict.sum()}/{df_peak_hr.predict.count()},{ave}' 
             
            row['percentage'] = performance/j
            new.append(row)
           
        final_df =pd.DataFrame(new)
    
    return final_df