In [1]:
"""
Script for getting the hour level weather source data for Indianapolis Power & Light
Marker Locations Mapping File Path - 'gs://aes-analytics-0001-curated'\
                                     '/Outage_Restoration/Live_Data_Curation\'
                                     '/Mapping_Tables/IPL_OMS_MARKER_MAPPING.csv'
Config File Path - 
"""

"\nScript for getting the hour level weather source data for Indianapolis Power & Light\nMarker Locations Mapping File Path - 'gs://aes-analytics-0001-curated'                                     '/Outage_Restoration/Live_Data_Curation'\n                                     '/Mapping_Tables/IPL_OMS_MARKER_MAPPING.csv'\nConfig File Path - \n"

In [2]:
# importing libraries
from datetime import datetime, timedelta
import warnings
import json
import requests # to get info from server
from requests.exceptions import ConnectionError
import logging
logging.basicConfig(level=logging.INFO)
import pandas as pd
from pandas.io.json import json_normalize
from pytz import timezone #date-time conversion
from pyspark.context import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession

sc = SparkContext.getOrCreate()
spark = SparkSession(sc)

warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 2000)
pd.options.display.float_format = '{:.2f}'.format

# proxy settings for api calls
HEADERS = {'User-Agent': 'Chrome/78and.0.3865.90'}
logging.info("Headers %s", HEADERS)
HTTP_PROXY = "http://proxy.ouraes.com:8080"
logging.info("HTTP Proxy: %s", HTTP_PROXY)
HTTPS_PROXY = "https://proxy.ouraes.com:8080"
logging.info("HTTPS Proxy: %s", HTTPS_PROXY)
FTP_PROXY = "ftp://proxy.ouraes.com:8080"
logging.info("FTP Proxy %s", HTTPS_PROXY)

logging.info("Proxy Dict", )
PROXY_DICT = {"http"  : HTTP_PROXY, "https" : HTTPS_PROXY, "ftp"   : FTP_PROXY}

INFO:root:Headers {'User-Agent': 'Chrome/78and.0.3865.90'}
INFO:root:HTTP Proxy: http://proxy.ouraes.com:8080
INFO:root:HTTPS Proxy: https://proxy.ouraes.com:8080
INFO:root:FTP Proxy https://proxy.ouraes.com:8080
INFO:root:Proxy Dict


In [3]:
# weather source historical api call function defination
def ws_historical_data(start, end, lat, long, period='hour', fields='all'):
    '''
    If duration is more than 1 year separate calls should be used
    Timestamp should be converted to ISO 8601 format
    Docstring with examples and function return values:

    Input :
    start - (%Y-%m-%d) format
    end - (%Y-%m-%d) format
    lat - latitude
    long - longitude
    period - hour, day (default=hour)

    Output : return a collection of weather historical data for a latitude/longitude point
    '''

    key = 'e721181f854ac2268ee8'
    start = pd.to_datetime(start, format='%Y-%m-%d')
    end = pd.to_datetime(end, format='%Y-%m-%d')

    start = start.strftime('%Y-%m-%d')
    end = end.strftime('%Y-%m-%d')

    weather_ = pd.DataFrame()
    link = 'https://api.weathersource.com/v1/'+key+'/points/'+lat+','+long\
	       +'/history.json?period='+period+'&timestamp_between='+start+','+end+'&fields='+fields
    logging.info("Historical API pull Link: %s", link)
    
    try :
        response = requests.get(link, headers=HEADERS,proxies=PROXY_DICT)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    except ConnectionError:
        time.sleep(10)
        response = requests.get(link, headers=HEADERS,proxies=PROXY_DICT)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    
    return weather_

In [4]:
# weather source forecast api call function defination
def ws_forecast_data(start, end, lat, long, period='hour', fields='all'):
    '''
    Timestamp should be converted to ISO 8601 format
    Docstring with examples and function return values:

    Input :
    start - (%Y-%m-%d) format
    end - (%Y-%m-%d) format
    lat - latitude
    long - longitude
    period - hour, day (default=hour)

    Output : returns forecast data upto 15 days ahead of forecast data and 240 hours of hourly
    weather data for a latitude/longitude point
    '''

    key = 'e721181f854ac2268ee8'
    start = pd.to_datetime(start, format='%Y-%m-%d')
    end = pd.to_datetime(end, format='%Y-%m-%d')

    start = start.strftime('%Y-%m-%d')
    end = end.strftime('%Y-%m-%d')

    weather_ = pd.DataFrame()
    link = 'https://api.weathersource.com/v1/'+key+'/points/'+lat+','+long+'/forecast.json?period='+period+'&timestamp_between='+start+','+end+'&fields='+fields
    logging.info("Forecast API pull Link: %s", link)
    logging.info("\n")
    try :
        response = requests.get(link, headers=HEADERS,proxies=PROXY_DICT)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    except ConnectionError:
        time.sleep(10)
        response = requests.get(link, headers=HEADERS,proxies=PROXY_DICT)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    
    return weather_

In [5]:
# weather source nowcast request api call function defination
def ws_nowcast_data(lat, long, fields='all'):
    '''
    Docstring with examples and function return values:

    Input :
    lat - latitude
    long - longitude

    Output : Returns current weather data for a latitude/longitude point.
    Our patented OnPoint™ system uniquely derives contiguous data for every\
    possible latitude/longitude location. This resource is globally available,\
    everywhere there is landmass (except Antartica).
    '''

    key = 'e721181f854ac2268ee8'
    start = pd.to_datetime(start, format='%Y-%m-%d')
    end = pd.to_datetime(end, format='%Y-%m-%d')

    start = start.strftime('%Y-%m-%dT%H:%M:%S')
    end = end.strftime('%Y-%m-%dT%H:%M:%S')

    weather_ = pd.DataFrame()
    link = 'https://api.weathersource.com/v1/'+key+'/points/'+lat+','+long+'/nowcast.json?fields='+fields
    logging.info("Nowcast API pull Link: %s", link)
    logging.info("\n")
    try :
        response = requests.get(link, headers=HEADERS,proxies=PROXY_DICT)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    except ConnectionError:
        time.sleep(10)
        response = requests.get(link, headers=HEADERS,proxies=PROXY_DICT)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    
    return weather_

In [6]:
logging.info("Libraries and functions loaded")
logging.info('\n')

logging.info("Loading Marker Lat Long Mapping File")
logging.info('\n')

bucket_name = 'gs://aes-analytics-0002-curated/Outage_Restoration/'
logging.info('Bucket Name: %s', bucket_name)
logging.info('\n')
marker_df = spark.read.format('CSV').option("header", "true").option("inferSchema", "true").option("delimiter", ",").load(
    bucket_name+'Live_Data_Curation/Mapping_Tables/IPL_OMS_MARKER_MAPPING.csv').toPandas()


marker_df = marker_df.loc[:, ~marker_df.columns.str.contains('^Unnamed')]
marker_df = marker_df.loc[:, ~marker_df.columns.str.contains('^_c0')]
logging.info("Shape of Marker File %s", marker_df.shape)
logging.info('\n')
logging.info("No of NA's if any : %s", marker_df.isnull().values.any())
logging.info('\n')

logging.info('Name of columns in dataframe %s', list(marker_df.columns))
logging.info('\n')

SITES = marker_df.set_index('Marker').T.to_dict('list')
logging.info('Sites Dict %s', SITES)

logging.info('\n')
MARKER_LOCATION = list(marker_df.Marker)
logging.info('Marker Locations %s', MARKER_LOCATION)

INFO:root:Libraries and functions loaded
INFO:root:

INFO:root:Loading Marker Lat Long Mapping File
INFO:root:

INFO:root:Bucket Name: gs://aes-analytics-0002-curated/Outage_Restoration/
INFO:root:

INFO:root:Shape of Marker File (20, 3)
INFO:root:

INFO:root:No of NA's if any : False
INFO:root:

INFO:root:Name of columns in dataframe ['Marker', 'Latitude', 'Longitude']
INFO:root:

INFO:root:Sites Dict {'Marker1': [39.9613, -86.4034], 'Marker2': [39.8971, -86.3045], 'Marker3': [39.906, -86.2001], 'Marker4': [39.9024, -86.0738], 'Marker5': [39.896, -85.9783], 'Marker6': [39.8339, -86.3155], 'Marker7': [39.8412, -86.2056], 'Marker8': [39.8381, -86.0985], 'Marker9': [39.8386, -85.9811], 'Marker10': [39.7579, -86.3155], 'Marker11': [39.7621, -86.2042], 'Marker12': [39.7621, -86.0923], 'Marker13': [39.7695, -85.9708], 'Marker14': [39.6617, -86.2935], 'Marker15': [39.6639, -86.1823], 'Marker16': [39.6702, -86.0669], 'Marker17': [39.6744, -85.9557], 'Marker18': [39.5909, -86.4212], 'Marker19'

In [7]:
TODAY_DATE = datetime.now()
logging.info("Today's Date %s", TODAY_DATE)
logging.info('\n')

FORECAST_NEXT_DATE = (TODAY_DATE + timedelta(days=1)).strftime('%Y-%m-%d')
logging.info("Today + 1 day Forecast Pull Date %s", FORECAST_NEXT_DATE)
logging.info('\n')

FORECAST_END_DATE = (TODAY_DATE + timedelta(days=2)).strftime('%Y-%m-%d')
logging.info("Today + 2 day Forecast Pull Date %s", FORECAST_END_DATE)
logging.info('\n')

PAST_START_DATE = (TODAY_DATE - timedelta(days=2)).strftime('%Y-%m-%d')
logging.info("Today - 2 day Historical Pull Date %s", PAST_START_DATE)
logging.info('\n')

PAST_END_DATE = (TODAY_DATE - timedelta(days=1)).strftime('%Y-%m-%d')
logging.info("Today - 1 day Historical Pull Date %s", PAST_END_DATE)
logging.info('\n')

TODAY_DATE = TODAY_DATE.strftime('%Y-%m-%d')
logging.info("Today Date formatted YYYY-MM-DD %s", TODAY_DATE)
logging.info('\n')


logging.info("Extracting from API")
logging.info('\n')

WAETHERSOURCEFILES_FORECAST = []
WAETHERSOURCEFILES_HISTORICAL = []
WEATHERSOURCE = []
VALUE1 = 0.0
VALUE2 = 0.0

for i in MARKER_LOCATION:
    logging.info('Marker Name : %s', i)
    VALUE1 = str(SITES.get(i)[0])
    logging.info('Marker Latitude %s', VALUE1)
    VALUE2 = str(SITES.get(i)[1])
    logging.info('Marker Longitude %s', VALUE2)
    logging.info('\n')
    waethersource_data_forecast = ws_forecast_data(start=TODAY_DATE, end=FORECAST_END_DATE,
                                                   lat=VALUE1, long=VALUE2)
    waethersource_data_historical = ws_historical_data(start=PAST_START_DATE, end=PAST_END_DATE,
                                                       lat=VALUE1, long=VALUE2)
    waethersource_data_historical['Location'] = i
    waethersource_data_forecast['Location'] = i
    WAETHERSOURCEFILES_FORECAST.append(waethersource_data_forecast)
    WAETHERSOURCEFILES_HISTORICAL.append(waethersource_data_historical)

WAETHERSOURCE_DF_HIS = pd.concat(WAETHERSOURCEFILES_HISTORICAL)
WAETHERSOURCE_DF_FOR = pd.concat(WAETHERSOURCEFILES_FORECAST)

WAETHERSOURCE_DF_HIS.reset_index(drop=True, inplace=True)
logging.info("Shape of Hourly Historical Data Pulled %s", WAETHERSOURCE_DF_HIS.shape)
WAETHERSOURCE_DF_FOR.reset_index(drop=True, inplace=True)
logging.info("Shape of Hourly Forecast Data Pulled %s", WAETHERSOURCE_DF_HIS.shape)
logging.info("Successful Extraction of Hourly Weather Data 100%")
logging.info("/n")

logging.info("No of unique timestamps in historical data: %s", list(WAETHERSOURCE_DF_HIS['timestamp'].unique()))
logging.info("No of unique timestamps in forecasted data: %s", list(WAETHERSOURCE_DF_FOR['timestamp'].unique()))

INFO:root:Today's Date 2020-11-06 13:32:52.267796
INFO:root:

INFO:root:Today + 1 day Forecast Pull Date 2020-11-07
INFO:root:

INFO:root:Today + 2 day Forecast Pull Date 2020-11-08
INFO:root:

INFO:root:Today - 2 day Historical Pull Date 2020-11-04
INFO:root:

INFO:root:Today - 1 day Historical Pull Date 2020-11-05
INFO:root:

INFO:root:Today Date formatted YYYY-MM-DD 2020-11-06
INFO:root:

INFO:root:Extracting from API
INFO:root:

INFO:root:Marker Name : Marker1
INFO:root:Marker Latitude 39.9613
INFO:root:Marker Longitude -86.4034
INFO:root:

INFO:root:Forecast API pull Link: https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.9613,-86.4034/forecast.json?period=hour&timestamp_between=2020-11-06,2020-11-08&fields=all
INFO:root:

INFO:root:Historical API pull Link: https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.9613,-86.4034/history.json?period=hour&timestamp_between=2020-11-04,2020-11-05&fields=all
INFO:root:Marker Name : Marker2
INFO:root:Marker Latitude 

In [9]:
WAETHERSOURCE_DF_HIS['timestamp'] = pd.to_datetime(WAETHERSOURCE_DF_HIS['timestamp'])
WAETHERSOURCE_DF_FOR['timestamp'] = pd.to_datetime(WAETHERSOURCE_DF_FOR['timestamp'])

WAETHERSOURCE_DF_HIS['timestamp'] = (WAETHERSOURCE_DF_HIS['timestamp']).apply(
    lambda row: row.strftime("%Y-%m-%d %H:%M:%S"))

WAETHERSOURCE_DF_FOR['timestamp'] = (WAETHERSOURCE_DF_FOR['timestamp']).apply(
    lambda row: row.strftime("%Y-%m-%d %H:%M:%S"))

WAETHERSOURCE_DF_HIS['timestamp'] = pd.to_datetime(WAETHERSOURCE_DF_HIS['timestamp']).dt.date
WAETHERSOURCE_DF_FOR['timestamp'] = pd.to_datetime(WAETHERSOURCE_DF_FOR['timestamp']).dt.date

DATE_LIST = [PAST_START_DATE, PAST_END_DATE, TODAY_DATE, FORECAST_NEXT_DATE, FORECAST_END_DATE]




for i in range(0, 2):
    temp_df = WAETHERSOURCE_DF_HIS[WAETHERSOURCE_DF_HIS['timestamp'].astype(str) == DATE_LIST[i]]
    loc = "gs://aes-datahub-0001-raw/Weather/weather_source/USA/Indianapolis/"
    loc = loc + datetime.strptime(DATE_LIST[i], '%Y-%m-%d').strftime('%Y%m%d')[:4] + "-" +\
    datetime.strptime(DATE_LIST[i], '%Y-%m-%d').strftime('%Y%m%d')[4:6]
    loc = loc + "/actual_data/weathersource_daily_"
    loc = loc + datetime.strptime(DATE_LIST[i], '%Y-%m-%d').strftime('%Y%m%d') +'.csv'
#     temp_df.to_csv(loc)


for i in range(2, 5):
    temp_df = WAETHERSOURCE_DF_FOR[WAETHERSOURCE_DF_FOR['timestamp'].astype(str) == DATE_LIST[i]]
    loc = "gs://aes-datahub-0001-raw/Weather/weather_source/USA/Indianapolis/"
    loc = loc + datetime.strptime(DATE_LIST[i], '%Y-%m-%d').strftime('%Y%m%d')[:4] + "-"\
    + datetime.strptime(DATE_LIST[i], '%Y-%m-%d').strftime('%Y%m%d')[4:6]
    loc = loc + "/forecast_data/" + TODAY_DATE + "/weathersource_daily_"
    loc = loc + datetime.strptime(DATE_LIST[i], '%Y-%m-%d').strftime('%Y%m%d') + '.csv'
#     temp_df.to_csv(loc)


logging.info("Saved location at aes-datahub-0001-raw/Weather/weather_source/usa/Indianapolis/")

INFO:root:Saved location at aes-datahub-0001-raw/Weather/weather_source/usa/Indianapolis/


In [10]:
temp_df.head()

Unnamed: 0,latitude,longitude,timestamp,timestampInit,cldCvr,dewPt,feelsLike,heatIndex,mslPres,precip,precipProb,relHum,radSolar,sfcPres,snowfall,snowfallProb,spcHum,temp,wetBulb,windChill,windDir,windDir80m,windDir100m,windSpd,windSpd80m,windSpd100m,Location
40,39.96,-86.4,2020-11-08,2020-11-06T01:00:00-05:00,0,47.3,51.6,53.3,1024.3,0,0,79.8,0.0,990.7,0,0,7.0,53.3,50.1,51.6,165.2,168.2,169.2,5.9,12.2,13.4,Marker1
81,39.9,-86.3,2020-11-08,2020-11-06T01:00:00-05:00,0,47.0,54.5,55.2,1024.3,0,0,74.2,0.0,992.6,0,0,6.9,55.2,50.8,54.5,163.4,165.8,167.6,4.6,11.1,12.3,Marker2
122,39.91,-86.2,2020-11-08,2020-11-06T01:00:00-05:00,0,46.8,56.4,56.4,1024.4,0,0,70.1,0.0,993.8,0,0,6.9,56.4,51.3,56.2,161.0,166.1,168.0,4.1,10.8,12.0,Marker3
163,39.9,-86.07,2020-11-08,2020-11-06T01:00:00-05:00,0,46.9,55.8,56.0,1024.5,0,0,71.6,0.0,994.5,0,0,6.9,56.0,51.1,55.5,161.3,166.3,168.1,4.5,11.0,12.1,Marker4
204,39.9,-85.98,2020-11-08,2020-11-06T01:00:00-05:00,0,47.1,53.5,54.6,1024.5,0,0,75.9,0.0,994.3,0,0,6.9,54.6,50.6,53.5,162.0,166.1,168.0,5.3,11.2,12.3,Marker5
