In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

from datetime import datetime, time
from google.cloud import bigquery
from google.oauth2 import service_account

pd.set_option('display.max_columns', None)
# pd.reset_option(‘max_columns’)

In [2]:
path_to_private_key = './ieso-dashboard-c639f1a39298.json'
credentials = service_account.Credentials.from_service_account_file(
    path_to_private_key,
    scopes=['https://www.googleapis.com/auth/cloud-platform'],
)

### Independent/ Input Variables 

In [3]:
# OLD DATA from csv file
old_ade=pd.read_csv('adequacy_202209052348.csv')
old_ade['mkt_datetime']=old_ade['mkt_datetime'].map(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M').isoformat(timespec='minutes'))

In [4]:
# CURRENT DATA from bigQuery
query_string="SELECT * FROM `ieso-dashboard.Adequacy2.Adequacy2_table` ORDER BY mkt_date, mkt_he;"
gbq_ade = pd.read_gbq(query_string, credentials=credentials)

In [5]:
hour_col = gbq_ade['mkt_he'].apply(lambda x: str(x-1)+":00")
date_col = gbq_ade['mkt_date'].apply(lambda x: str(x))

date_time=pd.to_datetime(date_col +' '+ hour_col).map(lambda x: x.isoformat(timespec='minutes'))

gbq_ade.insert(loc = 0,
          column = 'mkt_datetime',
          value = date_time)
# gbq_ade['mkt_datetime']=date_time

In [6]:
ind_var = ['mkt_datetime','int_nuc_sch', 'int_gas_sch',  'int_hyd_sch', 'int_win_sch', 'imp_mic_sch', 'imp_new_sch', 'imp_que_sch', 'ont_avg_dem', 'exp_mic_sch', 'exp_new_sch', 'exp_que_sch']
X_curr = gbq_ade[ind_var]
X_old = old_ade[ind_var]

In [7]:
X1=X_curr[X_curr['mkt_datetime']>max(X_old['mkt_datetime'])]  # which is '2022-09-06T23:00'
X_combined = pd.concat([X_old, X1], ignore_index=True)

In [8]:
X=X_combined.dropna() # includes all days from 1/1/20 till 8/9/22
X.to_csv('x.csv', index=False)

X.tail()

Unnamed: 0,mkt_datetime,int_nuc_sch,int_gas_sch,int_hyd_sch,int_win_sch,imp_mic_sch,imp_new_sch,imp_que_sch,ont_avg_dem,exp_mic_sch,exp_new_sch,exp_que_sch
23563,2022-09-08T19:00,9415,4183,4491,339,366,650,9,18660,-355,-380,-9
23564,2022-09-08T20:00,9415,3976,4218,406,305,250,9,17742,-418,-380,-9
23565,2022-09-08T21:00,9415,3177,3948,484,280,0,9,16484,-645,-621,-9
23566,2022-09-08T22:00,9415,2943,3399,526,0,0,9,15062,-855,-710,-9
23567,2022-09-08T23:00,9415,1581,3206,564,0,0,9,14132,-695,-100,-9


### Target Variable (ont_ene)

In [9]:
## CURRENT DATA being fetched from bigQuery
query_string="SELECT * FROM `ieso-dashboard.RealtimeMktPrice.RealtimeMktPrice_table` ORDER BY mkt_date, mkt_he;"
gbq_rtmp = pd.read_gbq(query_string, credentials=credentials)

In [10]:
hour_col = gbq_rtmp['mkt_he'].apply(lambda x: str(x-1)+":00")
date_col = gbq_rtmp['mkt_date'].apply(lambda x: str(x))

date_time=pd.to_datetime(date_col +' '+ hour_col).map(lambda x: x.isoformat(timespec='minutes'))

gbq_rtmp.insert(loc = 0,
          column = 'mkt_datetime',
          value = date_time)

In [11]:
dep_var = ['mkt_datetime','ont_ene']
Y_curr = gbq_rtmp[dep_var]

In [12]:
## OLD DATA
old_rtmp=pd.read_excel('historical ont_ene.xlsx',skiprows=4)
hour_col = old_rtmp['he'].apply(lambda x: str(x-1)+":00")
date_col = old_rtmp['mkt_date'].apply(lambda x: str(x))

date_time=pd.to_datetime(date_col +' '+ hour_col).map(lambda x: x.isoformat(timespec='minutes'))
old_rtmp.insert(loc = 0,
          column = 'mkt_datetime',
          value = date_time)

In [13]:
val_to_fillna=old_rtmp[old_rtmp['mkt_date']=='2022-05-20'].ont_ene.mean()
old_rtmp.at[20893,'ont_ene']=val_to_fillna
old_rtmp.at[20894,'ont_ene']=val_to_fillna
old_rtmp.at[20895,'ont_ene']=val_to_fillna

# old_rtmpold_rtmp['mkt_datetime']=='2022-05-20T14:00']['ont_ene']=val_to_fillna
# old_rtmp[old_rtmp['mkt_datetime']=='2022-05-20T15:00']['ont_ene']=val_to_fillna
old_rtmp=old_rtmp.dropna()

In [14]:
Y_old = old_rtmp[dep_var]

In [15]:
Y1=Y_curr[Y_curr['mkt_datetime']>max(Y_old['mkt_datetime'])]  # which is '2022-09-07T10:00'
Y_combined = pd.concat([Y_old, Y1], ignore_index=True)

In [16]:
Y=Y_combined.dropna() # includes all days from 1/1/20 till 8/9/22
Y.to_csv('y.csv', index=False)

Y.head()

Unnamed: 0,mkt_datetime,ont_ene
0,2020-01-01T00:00,0.0
1,2020-01-01T01:00,0.0
2,2020-01-01T02:00,0.0
3,2020-01-01T03:00,0.0
4,2020-01-01T04:00,0.0


In [17]:
Y.tail()

Unnamed: 0,mkt_datetime,ont_ene
23549,2022-09-08T05:00,77.74
23550,2022-09-08T06:00,79.24
23551,2022-09-08T07:00,79.54
23552,2022-09-08T08:00,93.67
23553,2022-09-08T09:00,128.69
