In [1]:
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, time
from google.cloud import bigquery
from google.oauth2 import service_account
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
# pd.reset_option(‘max_columns’)

In [2]:
path_to_private_key = './ieso-dashboard-c639f1a39298.json'
credentials = service_account.Credentials.from_service_account_file(
    path_to_private_key,
    scopes=['https://www.googleapis.com/auth/cloud-platform'],
)

### Independent/ Input Variables 

In [3]:
# OLD DATA from csv file
old_ade=pd.read_csv('adequacy_202209052348.csv')
old_ade['mkt_datetime']=old_ade['mkt_datetime'].map(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M').isoformat(timespec='minutes'))

In [4]:
# CURRENT DATA from bigQuery
query_string="SELECT * FROM `ieso-dashboard.Adequacy2.Adequacy2_table` ORDER BY mkt_date, mkt_he;"
gbq_ade = pd.read_gbq(query_string, credentials=credentials)

In [5]:
hour_col = gbq_ade['mkt_he'].apply(lambda x: str(x-1)+":00")
date_col = gbq_ade['mkt_date'].apply(lambda x: str(x))

date_time=pd.to_datetime(date_col +' '+ hour_col).map(lambda x: x.isoformat(timespec='minutes'))

gbq_ade.insert(loc = 0,
          column = 'mkt_datetime',
          value = date_time)
# gbq_ade['mkt_datetime']=date_time

In [6]:
ind_var = ['mkt_datetime','int_nuc_sch', 'int_gas_sch',  'int_hyd_sch', 'int_win_sch', 'imp_mic_sch', 'imp_new_sch', 'imp_que_sch', 'ont_avg_dem', 'exp_mic_sch', 'exp_new_sch', 'exp_que_sch']
X_curr = gbq_ade[ind_var]
X_old = old_ade[ind_var]

In [7]:
X1=X_curr[X_curr['mkt_datetime']>max(X_old['mkt_datetime'])]  # which is '2022-09-06T23:00'
X_combined = pd.concat([X_old, X1], ignore_index=True)

In [8]:
X=X_combined.dropna() # includes all days from 1/1/20 till current date

### Target Variable (ont_ene)

In [9]:
## CURRENT DATA being fetched from bigQuery
query_string="SELECT * FROM `ieso-dashboard.RealtimeMktPrice.RealtimeMktPrice_table` ORDER BY mkt_date, mkt_he;"
gbq_rtmp = pd.read_gbq(query_string, credentials=credentials)

In [10]:
hour_col = gbq_rtmp['mkt_he'].apply(lambda x: str(x-1)+":00")
date_col = gbq_rtmp['mkt_date'].apply(lambda x: str(x))

date_time=pd.to_datetime(date_col +' '+ hour_col).map(lambda x: x.isoformat(timespec='minutes'))

gbq_rtmp.insert(loc = 0,
          column = 'mkt_datetime',
          value = date_time)

In [11]:
dep_var = ['mkt_datetime','ont_ene']
Y_curr = gbq_rtmp[dep_var]

In [12]:
## OLD DATA
old_rtmp=pd.read_excel('historical ont_ene.xlsx',skiprows=4)
hour_col = old_rtmp['he'].apply(lambda x: str(x-1)+":00")
date_col = old_rtmp['mkt_date'].apply(lambda x: str(x))

date_time=pd.to_datetime(date_col +' '+ hour_col).map(lambda x: x.isoformat(timespec='minutes'))
old_rtmp.insert(loc = 0,
          column = 'mkt_datetime',
          value = date_time)

In [13]:
val_to_fillna=old_rtmp[old_rtmp['mkt_date']=='2022-05-20'].ont_ene.mean()
old_rtmp.at[20893,'ont_ene']=val_to_fillna
old_rtmp.at[20894,'ont_ene']=val_to_fillna
old_rtmp.at[20895,'ont_ene']=val_to_fillna

# old_rtmpold_rtmp['mkt_datetime']=='2022-05-20T14:00']['ont_ene']=val_to_fillna
# old_rtmp[old_rtmp['mkt_datetime']=='2022-05-20T15:00']['ont_ene']=val_to_fillna
old_rtmp=old_rtmp.dropna()

In [14]:
Y_old = old_rtmp[dep_var]
Y1=Y_curr[Y_curr['mkt_datetime']>max(Y_old['mkt_datetime'])]  # which is '2022-09-07T10:00'
Y_combined = pd.concat([Y_old, Y1], ignore_index=True)

In [15]:
Y=Y_combined.dropna() # includes all days from 1/1/20 till current date

### Merging all the columns

In [16]:
X[X['mkt_datetime'].duplicated() == True]

Unnamed: 0,mkt_datetime,int_nuc_sch,int_gas_sch,int_hyd_sch,int_win_sch,imp_mic_sch,imp_new_sch,imp_que_sch,ont_avg_dem,exp_mic_sch,exp_new_sch,exp_que_sch
1611,2020-03-08T03:00,10020,178,3189,2842,0,0,9,12989,-1450,-1407,-432
10515,2021-03-14T03:00,8345,216,2768,2830,0,0,9,12793,-1000,-517,-66
19251,2022-03-13T03:00,8345,827,3461,2274,0,0,1549,14898,-1450,-168,-214


In [17]:
X.at[1610,'mkt_datetime']='2020-03-08T02:00'
X.at[10514,'mkt_datetime']='2021-03-14T02:00'
X.at[19250,'mkt_datetime']='2022-03-13T02:00'

In [18]:
X['mkt_datetime'] = pd.to_datetime(X.mkt_datetime)
Y['mkt_datetime'] = pd.to_datetime(Y.mkt_datetime)

merged_data = pd.merge(X,Y,how='left',on = 'mkt_datetime')
merged_data=merged_data.set_index('mkt_datetime')

In [19]:
merged_data.to_csv('merged_data.csv')