## Importing Libraries

In [6]:
import ast
import os
import math
import json
import logging
import warnings
import operator
import subprocess
import time
import datetime as dt
from datetime import date, timedelta, datetime
import pandas as pd
import numpy as np
from pandas.io import gbq
from google.cloud import storage
from pytz import timezone

import geopy.distance
from configparser import ConfigParser, ExtendedInterpolation

# Setup logs
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)

## Fetching hourly weather data for 3 days

In [7]:
df_weather = "SELECT * FROM (SELECT Job_Update_Time, timestamp,Extract(Date from Timestamp) as Date1,\
              timestampInit, temp, windSpd, snowfallProb, precipProb, Location from aes-analytics-0002.weathersource_mds.forecast_hour\
              where Location like '%IPL%') where Date1 between DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY) AND DATE_SUB(CURRENT_DATE(),\
              INTERVAL -2 DAY) and Job_Update_Time = (Select max(Job_Update_Time) from\
              (SELECT Job_Update_Time, timestamp,Extract(Date from Timestamp) as Date1, timestampInit,\
              temp, windSpd, snowfallProb, precipProb, Location from aes-analytics-0002.weathersource_mds.forecast_hour\
              where Location like '%IPL%')) order by  location , timestamp desc"
df_weather = gbq.read_gbq(df_weather, project_id='aes-analytics-0002')

In [8]:
logging.info(df_weather.shape)
df_weather.head()

2020-11-23 14:00:20,625 (1440, 9)


Unnamed: 0,Job_Update_Time,timestamp,Date1,timestampInit,temp,windSpd,snowfallProb,precipProb,Location
0,2020-11-19 07:46:53+00:00,2020-11-25 23:00:00+00:00,2020-11-25,2020-11-19 00:00:00+00:00,47.1,5.4,0.0,10.0,IPL_Marker1
1,2020-11-19 07:46:53+00:00,2020-11-25 22:00:00+00:00,2020-11-25,2020-11-19 00:00:00+00:00,47.3,7.4,0.0,10.0,IPL_Marker1
2,2020-11-19 07:46:53+00:00,2020-11-25 21:00:00+00:00,2020-11-25,2020-11-19 00:00:00+00:00,47.5,9.6,5.0,29.0,IPL_Marker1
3,2020-11-19 07:46:53+00:00,2020-11-25 20:00:00+00:00,2020-11-25,2020-11-19 00:00:00+00:00,48.6,12.0,5.0,29.0,IPL_Marker1
4,2020-11-19 07:46:53+00:00,2020-11-25 19:00:00+00:00,2020-11-25,2020-11-19 00:00:00+00:00,49.7,14.6,5.0,29.0,IPL_Marker1


In [9]:
df_weather['timestamp'] = pd.to_datetime(df_weather['timestamp'], format = '%Y-%m-%d %H:%M:%S', errors ='coerce')
df_weather['timestamp'] = df_weather['timestamp'].dt.tz_convert('US/Eastern')
df_weather.drop(['Date1'], axis=1, inplace=True)
df_weather['Date'] = df_weather['timestamp'].dt.date

In [10]:
df_weather.sort_values(['timestamp', 'Location'], inplace=True, ascending=False)
df_weather.reset_index(drop=True, inplace=True)

In [11]:
CURRENT_DATE_HOUR = datetime.now(timezone('US/Eastern')).strftime('%Y-%m-%d %H')
# print(CURRENT_DATE_HOUR)
CURRENT_DATE_HOUR = datetime.strptime(CURRENT_DATE_HOUR, '%Y-%m-%d %H')
print(CURRENT_DATE_HOUR)

2020-11-23 09:00:00


In [12]:
six_hours_from_now = CURRENT_DATE_HOUR + timedelta(hours=6)
six_hours_from_now = six_hours_from_now.strftime('%Y-%m-%d %H:%M:%S%z')

twelve_hours_from_now = CURRENT_DATE_HOUR + timedelta(hours=12)
twelve_hours_from_now = twelve_hours_from_now.strftime('%Y-%m-%d %H:%M:%S%z')

twetyfour_hour_from_now = CURRENT_DATE_HOUR + timedelta(hours=24)
twetyfour_hour_from_now = twetyfour_hour_from_now.strftime('%Y-%m-%d %H:%M:%S%z')

logging.info(six_hours_from_now)
logging.info(twelve_hours_from_now)
logging.info(twetyfour_hour_from_now)

2020-11-23 14:00:30,864 2020-11-23 15:00:00
2020-11-23 14:00:30,865 2020-11-23 21:00:00
2020-11-23 14:00:30,865 2020-11-24 09:00:00


In [13]:
CURRENT_DATE_HOUR = CURRENT_DATE_HOUR.strftime('%Y-%m-%d %H:%M:%S')
print(CURRENT_DATE_HOUR)
print(six_hours_from_now)

2020-11-23 09:00:00
2020-11-23 15:00:00


In [14]:
timefilter_list = ['Next 6 Hours', 'Next 12 Hours', 'Next 24 Hours']

next6hours = df_weather[(df_weather['timestamp'] > CURRENT_DATE_HOUR) & (df_weather['timestamp'] <= six_hours_from_now)]
next6hours['timestamp'] = next6hours['timestamp'].astype(str)

next12hours = df_weather[(df_weather['timestamp'] > CURRENT_DATE_HOUR) & (df_weather['timestamp'] <= twelve_hours_from_now)]
next12hours['timestamp'] = next12hours['timestamp'].astype(str)

next24hours = df_weather[(df_weather['timestamp'] > CURRENT_DATE_HOUR) & (df_weather['timestamp'] <= twetyfour_hour_from_now)]
next24hours['timestamp'] = next24hours['timestamp'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [15]:
next6hours = list(next6hours['timestamp'].unique())
next6hours

['2020-11-23 15:00:00-05:00',
 '2020-11-23 14:00:00-05:00',
 '2020-11-23 13:00:00-05:00',
 '2020-11-23 12:00:00-05:00',
 '2020-11-23 11:00:00-05:00',
 '2020-11-23 10:00:00-05:00']

In [16]:
next12hours = list(next12hours['timestamp'].unique())
next12hours

['2020-11-23 21:00:00-05:00',
 '2020-11-23 20:00:00-05:00',
 '2020-11-23 19:00:00-05:00',
 '2020-11-23 18:00:00-05:00',
 '2020-11-23 17:00:00-05:00',
 '2020-11-23 16:00:00-05:00',
 '2020-11-23 15:00:00-05:00',
 '2020-11-23 14:00:00-05:00',
 '2020-11-23 13:00:00-05:00',
 '2020-11-23 12:00:00-05:00',
 '2020-11-23 11:00:00-05:00',
 '2020-11-23 10:00:00-05:00']

In [17]:
next24hours = list(next24hours['timestamp'].unique())
next24hours

['2020-11-24 09:00:00-05:00',
 '2020-11-24 08:00:00-05:00',
 '2020-11-24 07:00:00-05:00',
 '2020-11-24 06:00:00-05:00',
 '2020-11-24 05:00:00-05:00',
 '2020-11-24 04:00:00-05:00',
 '2020-11-24 03:00:00-05:00',
 '2020-11-24 02:00:00-05:00',
 '2020-11-24 01:00:00-05:00',
 '2020-11-24 00:00:00-05:00',
 '2020-11-23 23:00:00-05:00',
 '2020-11-23 22:00:00-05:00',
 '2020-11-23 21:00:00-05:00',
 '2020-11-23 20:00:00-05:00',
 '2020-11-23 19:00:00-05:00',
 '2020-11-23 18:00:00-05:00',
 '2020-11-23 17:00:00-05:00',
 '2020-11-23 16:00:00-05:00',
 '2020-11-23 15:00:00-05:00',
 '2020-11-23 14:00:00-05:00',
 '2020-11-23 13:00:00-05:00',
 '2020-11-23 12:00:00-05:00',
 '2020-11-23 11:00:00-05:00',
 '2020-11-23 10:00:00-05:00']

In [18]:
filter_df = pd.DataFrame({'Filter_ID': timefilter_list[0], 'timestamp': next6hours})
filter_df_1 = pd.DataFrame({'Filter_ID': timefilter_list[1], 'timestamp': next12hours})
filter_df_2 = pd.DataFrame({'Filter_ID': timefilter_list[2], 'timestamp': next24hours})

In [19]:
final_df = pd.DataFrame()
final_df = final_df.append([filter_df, filter_df_1, filter_df_2])
final_df.reset_index(drop=True, inplace=True)

In [20]:
final_df['timestamp']=pd.to_datetime(final_df['timestamp']).dt.strftime('%Y-%m-%d %H:%M:%S')
df_weather['timestamp']=pd.to_datetime(df_weather['timestamp']).dt.strftime('%Y-%m-%d %H:%M:%S')

## Write to big query

In [21]:
df_weather.to_gbq('mds_outage_restoration.IPL_Weather_Dahsboard', project_id='aes-analytics-0002',
                  chunksize=None, reauth=False, if_exists='replace', auth_local_webserver=False,
                    table_schema=None, location=None, progress_bar=True, credentials=None)

final_df.to_gbq('mds_outage_restoration.IPL_Weather_Timefilter', project_id='aes-analytics-0002',
                    chunksize=None, reauth=False, if_exists='replace', auth_local_webserver=False,
                    table_schema=None, location=None, progress_bar=True, credentials=None)

1440 out of 1440 rows loaded.14:00:53,669 
1it [00:03,  3.30s/it]
42 out of 42 rows loaded.-23 14:00:57,805 
1it [00:02,  2.38s/it]


In [22]:
final_df

Unnamed: 0,Filter_ID,timestamp
0,Next 6 Hours,2020-11-23 15:00:00
1,Next 6 Hours,2020-11-23 14:00:00
2,Next 6 Hours,2020-11-23 13:00:00
3,Next 6 Hours,2020-11-23 12:00:00
4,Next 6 Hours,2020-11-23 11:00:00
5,Next 6 Hours,2020-11-23 10:00:00
6,Next 12 Hours,2020-11-23 21:00:00
7,Next 12 Hours,2020-11-23 20:00:00
8,Next 12 Hours,2020-11-23 19:00:00
9,Next 12 Hours,2020-11-23 18:00:00
