## **Weather Data Hour Level Pull**

In [1]:
import csv
import math
import time
import warnings
import operator
import statistics
import requests
import json
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from dateutil.parser import parse
from datetime import datetime
from datetime import date, timedelta

plt.style.use('fivethirtyeight')
warnings.filterwarnings('ignore')
%matplotlib inline

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.float_format = '{:.6f}'.format

In [2]:
# latitude of the location markers
sites_latitude = {
    'Marker1' : '39.9613','Marker2' : '39.8971','Marker3' : '39.9060',
    'Marker4' : '39.9024','Marker5' : '39.8960','Marker6' : '39.8339',
    'Marker7' : '39.8412','Marker8' : '39.8381','Marker9' : '39.8386',
    'Marker10' : '39.7579','Marker11' : '39.7621','Marker12' : '39.7621',
    'Marker13' : '39.7695','Marker14' : '39.6617','Marker15' : '39.6639',
    'Marker16' : '39.6702','Marker17' : '39.6744','Marker18' : '39.5909',
    'Marker19' : '39.5295','Marker20' : '39.5475'
}

# longitude of the location markers
sites_longitude = {
    'Marker1' : '-86.4034','Marker2' : '-86.3045','Marker3' : '-86.2001',
    'Marker4' : '-86.0738','Marker5' : '-85.9783','Marker6' : '-86.3155',
    'Marker7' : '-86.2056','Marker8' : '-86.0985','Marker9' : '-85.9811',
    'Marker10' : '-86.3155','Marker11' : '-86.2042','Marker12' : '-86.0923',
    'Marker13' : '-85.9708','Marker14' : '-86.2935','Marker15' : '-86.1823',
    'Marker16' : '-86.0669','Marker17' : '-85.9557','Marker18' : '-86.4212',
    'Marker19' : '-86.5874','Marker20' : '-86.2743'
}

In [3]:
from pandas.io.json import json_normalize
from requests.exceptions import ConnectionError

def ws_historical_data(start, lat, long, period='hour', fields='all'):
    '''
    If duration is more than 1 year separate calls should be used
    Timestamp should be converted to ISO 8601 format
    Docstring with examples and function return values:
    
    Input :
    start - (%Y-%m-%d) format
    end - (%Y-%m-%d) format
    lat - latitude 
    long - longitude
    period - hour, day (default=day)
    
    Output : return a callection of weather historical data for a latitude/longitude point
    
    '''
    headers = {'User-Agent': 'Chrome/78and.0.3865.90'}
    http_proxy  = "http://10.245.5.249:8080"
    https_proxy = "https://10.245.5.249:8080"
    ftp_proxy   = "ftp://10.245.5.249:8080"

    proxyDict = { 
                "http"  : http_proxy, 
                "https" : https_proxy, 
                "ftp"   : ftp_proxy
                 }
    
    key = 'e721181f854ac2268ee8'
    start = pd.to_datetime(start,format='%Y-%m-%d')
    end = start + timedelta(days=1)
    end = pd.to_datetime(end, format='%Y-%m-%d')
    
    start = start.strftime('%Y-%m-%dT%H:%M:%S')
    end = end.strftime('%Y-%m-%dT%H:%M:%S')
    
    weather_ = pd.DataFrame()
    link = 'https://api.weathersource.com/v1/'+key+'/points/'+lat+','+long+'/history.json?period='+period+'&timestamp_between='+start+','+end+'&fields='+fields
    print(link)
    try :
        response = requests.get(link, headers=headers,proxies=proxyDict)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    except ConnectionError:
        time.sleep(10)
        response = requests.get(link, headers=headers,proxies=proxyDict)
        json_obj = json.loads(response.content.decode('utf-8'))
        weather_ = json_normalize(json_obj)
    
    return weather_

In [4]:
df_ads = pd.read_csv('gs://aes-datahub-0002-curated/Outage_Restoration/Historical_Data/Master_Dataset/OMS_IPL_OUTAGE_BASECREW_11022020.csv')
print(df_ads.shape)

(111545, 164)


In [11]:
print(list(df_ads.columns))

['Unnamed: 0', 'OUTAGE_ID', 'INCIDENT_ID', 'STRCTUR_NO', 'EVENT', 'CREATION_DATETIME', 'ENERGIZED_DATETIME', 'CIRCT_ID', 'DNI_EQUIP_TYPE', 'SUBST_ID', 'CALL_QTY', 'DOWNSTREAM_CUST_QTY', 'KEY_CUST_QTY', 'ETR_DATETIME', 'CUST_QTY', 'DAY_FLAG', 'TTR', 'POLE_CLUE_FLG', 'PART_LIGHT_CLUE_FLG', 'EMERGENCY_CLUE_FLG', 'POWER_OUT_CLUE_FLG', 'OPEN_DEVICE_CLUE_FLG', 'TREE_CLUE_FLG', 'WIRE_DOWN_CLUE_FLG', 'IVR_CLUE_FLG', 'EQUIPMENT_CLUE_FLG', 'TRANSFORMER_CLUE_FLG', 'OH_CAUSE_FLG', 'UG_CAUSE_FLG', 'ANIMAL_CAUSE_FLG', 'WEATHER_CAUSE_FLG', 'WEATHER_COLD_CAUSE_FLG', 'PUBLIC_CAUSE_FLG', 'WEATHER_LIGHTNING_CAUSE_FLG', 'WEATHER__SNOW_CAUSE_FLG', 'WEATHER__WIND_CAUSE_FLG', 'WEATHER__HEAT_CAUSE_FLG', 'CUST_REQUEST_CAUSE_FLG', 'WEATHER__FLOOD_CAUSE_FLG', 'STREET_CAUSE_FLG', 'SUBSTATION_CAUSE_FLG', 'TREE_CAUSE_FLG', 'MISCELLANEOUS_CAUSE_FLG', 'NO_CAUSE_FLG', 'PLANNED_CAUSE_FLG', 'NO_OUTAGE_CAUSE_FLG', 'FUSE_OCCURN_FLG', 'CUST_EQUIP_OCCURN_FLG', 'POLE_OCCURN_FLG', 'TRANSFORMER_OCCURN_FLG', 'METER_OCCURN_FLG',

In [5]:
df_ads['CREATION_DATETIME'] = pd.to_datetime(df_ads['CREATION_DATETIME'],errors='coerce')

df_ads['Date'] = df_ads['CREATION_DATETIME'].dt.date
df_ads['Hour'] = df_ads['CREATION_DATETIME'].dt.hour

In [6]:
df_storm = df_ads[df_ads.EVENT == 'STORM']
print(len(list(df_storm['Date'].unique())))

445


In [7]:
df_storm = df_storm[['Date']]
df_storm.drop_duplicates(subset=['Date'], keep='first', inplace=True)
df_storm.reset_index(drop=True, inplace=True)
print(df_storm.shape)

(445, 1)


In [8]:
# latitude of the location markers
sites_latitude = {
    'Marker1' : '39.9613','Marker2' : '39.8971','Marker3' : '39.9060','Marker4' : '39.9024','Marker5' : '39.8960','Marker6' : '39.8339',
    'Marker7' : '39.8412','Marker8' : '39.8381','Marker9' : '39.8386','Marker10' : '39.7579','Marker11' : '39.7621','Marker12' : '39.7621',
    'Marker13' : '39.7695','Marker14' : '39.6617','Marker15' : '39.6639','Marker16' : '39.6702','Marker17' : '39.6744','Marker18' : '39.5909',
    'Marker19' : '39.5295','Marker20' : '39.5475'
    }
    
# longitude of the location markers
sites_longitude = {
    'Marker1' : '-86.4034','Marker2' : '-86.3045','Marker3' : '-86.2001','Marker4' : '-86.0738','Marker5' : '-85.9783','Marker6' : '-86.3155',
    'Marker7' : '-86.2056','Marker8' : '-86.0985','Marker9' : '-85.9811','Marker10' : '-86.3155','Marker11' : '-86.2042','Marker12' : '-86.0923',
    'Marker13' : '-85.9708','Marker14' : '-86.2935','Marker15' : '-86.1823','Marker16' : '-86.0669','Marker17' : '-85.9557','Marker18' : '-86.4212',
    'Marker19' : '-86.5874','Marker20' : '-86.2743'
    }


Marker_Location = ['Marker1', 'Marker2', 'Marker3', 'Marker4', 'Marker5', 'Marker6', 'Marker7', 'Marker8', 'Marker9', 'Marker10',
                   'Marker11', 'Marker12', 'Marker13', 'Marker14', 'Marker15', 'Marker16', 'Marker17', 'Marker18', 'Marker19', 'Marker20']

In [None]:
waethersourcefiles_historical = []
ws_master = pd.DataFrame()

value1 = 0.0
value2 = 0.0

for j in range(len(df_storm)):
    time.sleep(2)
    for i in range(len(Marker_Location)):
        value1 = sites_latitude.get(Marker_Location[i])
        value2 = sites_longitude.get(Marker_Location[i])
    
        waethersource_data_historical = ws_historical_data(start=df_storm.Date[j], lat=value1, long=value2)
        waethersourcefiles_historical.append(waethersource_data_historical)

waethersource_df_his = pd.concat(waethersourcefiles_historical)
waethersource_df_his.reset_index(drop=True, inplace=True)

https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.9613,-86.4034/history.json?period=hour&timestamp_between=2007-02-24T00:00:00,2007-02-25T00:00:00&fields=all
https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.8971,-86.3045/history.json?period=hour&timestamp_between=2007-02-24T00:00:00,2007-02-25T00:00:00&fields=all
https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.9060,-86.2001/history.json?period=hour&timestamp_between=2007-02-24T00:00:00,2007-02-25T00:00:00&fields=all
https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.9024,-86.0738/history.json?period=hour&timestamp_between=2007-02-24T00:00:00,2007-02-25T00:00:00&fields=all
https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.8960,-85.9783/history.json?period=hour&timestamp_between=2007-02-24T00:00:00,2007-02-25T00:00:00&fields=all
https://api.weathersource.com/v1/e721181f854ac2268ee8/points/39.8339,-86.3155/history.json?period=hour&timestamp_between=2007-02-24T00:00:00

In [10]:
waethersource_df_his.to_csv('gs://aes-analytics-0002-curated/Outage_Restoration/Historical_Data/Weather_Data_2020_Hourly/IPL_Weather_Marker_ALL_11052020.csv', index=False)