## **Import necessary packages**

In [10]:
"""
Load hypertuned Random forest model to predict total time
for restoration and provide ETR's to stakeholders
"""

import pickle
import logging
import subprocess
from pytz import timezone
import datetime as dt
from datetime import datetime, date, timedelta
import pandas as pd
from pandas.io import gbq
import numpy as np
from configparser import ConfigParser, ExtendedInterpolation
import gcsfs
import re

# Setup logs
logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [11]:
# read config file
CONFIGPARSER = ConfigParser(interpolation=ExtendedInterpolation())
CONFIGPARSER.read('/root/confignew0002.ini')
logging.info('Config File Loaded')
logging.info('Config File Sections %s', CONFIGPARSER.sections())

2020-11-27 16:51:33 INFO     Config File Loaded
2020-11-27 16:51:33 INFO     Config File Sections ['SETTINGS', 'LIVE_OMS', 'DATA_COLLATION', 'CURATED_DATA', 'LOAD_AND_PREDICT']


In [12]:
def QC_CHECK_SHAPE_AND_COLUMNS(df):
    '''
    Input - Dataframe with operations/addtion of features/columns or joins performed
    Output - Log Info using shape of dataframe and columns present
    '''
    logging.info('****QC Check****')
    logging.info('\n')
    logging.info('Shape of the DataFrame %s', df.shape)
    logging.info('\n')
    logging.info('Columns present in the DataFrame: %s', list(df.columns))
    logging.info('\n')
    return

## **Read OMS Live Curated Dataset**

In [13]:
BUCKET_NAME = CONFIGPARSER['LOAD_AND_PREDICT']['STAGING_BUCKET']
logging.info('Staging Bucket %s', BUCKET_NAME)

DF_ADS_FINAL = pd.read_csv(BUCKET_NAME)

DF_ADS_FINAL = DF_ADS_FINAL.loc[:, ~DF_ADS_FINAL.columns.str.contains('^Unnamed')]
DF_ADS_FINAL = DF_ADS_FINAL.loc[:, ~DF_ADS_FINAL.columns.str.contains('^c0')]

logging.info('OMS LIVE CURATED DATASET LOADED')
logging.info('\n')
logging.info('No of NAs if any: %s', DF_ADS_FINAL.isnull().values.any())
logging.info('\n')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)

# gs://aes-analytics-0002-curated/Outage_Restoration/Staging/IPL_Live_Master_Dataset.csv

2020-11-27 16:51:40 INFO     Staging Bucket gs://aes-analytics-0002-curated/Outage_Restoration/Staging/IPL_Live_Master_Dataset.csv
2020-11-27 16:51:40 INFO     OMS LIVE CURATED DATASET LOADED
2020-11-27 16:51:40 INFO     

2020-11-27 16:51:40 INFO     No of NAs if any: True
2020-11-27 16:51:40 INFO     

2020-11-27 16:51:40 INFO     ****QC Check****
2020-11-27 16:51:40 INFO     

2020-11-27 16:51:40 INFO     Shape of the DataFrame (2, 158)
2020-11-27 16:51:40 INFO     

2020-11-27 16:51:40 INFO     Columns present in the DataFrame: ['INCIDENT_ID', 'STRCTUR_NO', 'CIRCT_ID', 'DNI_EQUIP_TYPE', 'CALL_QTY', 'CUST_QTY', 'KVA_VAL', 'DOWNSTREAM_KVA_VAL', 'INCIDENT_DEVICE_ID', 'CREATION_DATETIME', 'SUBST_ID', 'LOCATION_ID', 'ENERGIZED_DATETIME', 'OUTAGE_ID', 'DAY_FLAG', 'POLE_CLUE_FLG', 'PART_LIGHT_CLUE_FLG', 'EMERGENCY_CLUE_FLG', 'POWER_OUT_CLUE_FLG', 'TREE_CLUE_FLG', 'WIRE_DOWN_CLUE_FLG', 'IVR_CLUE_FLG', 'EQUIPMENT_CLUE_FLG', 'TRANSFORMER_CLUE_FLG', 'OPEN_DEVICE_CLUE_FLG', 'OH_CAUSE_FLG', 'UG

In [14]:
DF_ADS_FINAL.head()

Unnamed: 0,INCIDENT_ID,STRCTUR_NO,CIRCT_ID,DNI_EQUIP_TYPE,CALL_QTY,CUST_QTY,KVA_VAL,DOWNSTREAM_KVA_VAL,INCIDENT_DEVICE_ID,CREATION_DATETIME,...,Outages_in_last_2hr,Outages_in_last_3hr,Outages_in_last_4hr,Outages_in_last_5hr,Outages_in_last_6hr,Outages_in_last_7hr,Outages_in_last_8hr,Outages_in_last_9hr,Outages_in_last_10hr,DOWNSTREAM_CUST_QTY
0,2001562970,397-B/144,1308,1TBOH,1,1,0,0,2002774699,,...,0,0,0,0,0,0,0,0,0,1
1,2001562972,221BA/143,3156,1TPUG,1,1,0,0,2002774701,,...,0,0,0,0,0,0,0,0,0,1


## **Read Storm Profiles Data**

In [7]:
DF_ADS_FINAL.h

Unnamed: 0,INCIDENT_ID,STRCTUR_NO,CIRCT_ID,DNI_EQUIP_TYPE,CALL_QTY,CUST_QTY,KVA_VAL,DOWNSTREAM_KVA_VAL,INCIDENT_DEVICE_ID,CREATION_DATETIME,...,Outages_in_last_2hr,Outages_in_last_3hr,Outages_in_last_4hr,Outages_in_last_5hr,Outages_in_last_6hr,Outages_in_last_7hr,Outages_in_last_8hr,Outages_in_last_9hr,Outages_in_last_10hr,DOWNSTREAM_CUST_QTY
0,2001562970,397-B/144,1308,1TBOH,1,1,0,0,2002774699,NaT,...,0,0,0,0,0,0,0,0,0,1
1,2001562972,221BA/143,3156,1TPUG,1,1,0,0,2002774701,NaT,...,0,0,0,0,0,0,0,0,0,1


In [6]:
BUCKET_NAME = CONFIGPARSER['LOAD_AND_PREDICT']['STORM_PROFILE_BUCKET']
BUCKET_NAME = 'gs://aes-analytics-0001-curated/Outage_Restoration/Live_Data_Curation'

DF_ADS_FINAL['CREATION_DATETIME'] = pd.to_datetime(
    DF_ADS_FINAL['CREATION_DATETIME'], errors='coerce')
DF_ADS_FINAL['Date'] = DF_ADS_FINAL['CREATION_DATETIME'].dt.date

UNIQUE_DATES = DF_ADS_FINAL[['Date']]
UNIQUE_DATES.drop_duplicates(subset=['Date'], keep='first', inplace=True)
UNIQUE_DATES['Date'] = UNIQUE_DATES['Date'].apply(lambda x: x.strftime('%Y%m%d'))
UNIQUE = UNIQUE_DATES['Date'].to_list()

logging.info('Dates for which strom profiles will be read: %s', UNIQUE)
logging.info('\n')


STORM_PROFILES_LOCATION = BUCKET_NAME + '/Storm_Profiles/'
logging.info('Location of Storm Profiles %s', STORM_PROFILES_LOCATION)
logging.info('\n')
STORM_PROFILES_FILES = []

for i in UNIQUE:
    FILENAME = STORM_PROFILES_LOCATION + 'storm_profiles_{}.csv'.format(i)
    STORM_PROFILES_FILES.append(pd.read_csv(FILENAME))

STORMPROFILES_DF = pd.concat(STORM_PROFILES_FILES)
STORMPROFILES_DF.reset_index(drop=True, inplace=True)

STORMPROFILES_DF = STORMPROFILES_DF.loc[:, ~STORMPROFILES_DF.columns.str.contains('^Unnamed')]
STORMPROFILES_DF = STORMPROFILES_DF.loc[:, ~STORMPROFILES_DF.columns.str.contains('_c0')]
STORMPROFILES_DF = STORMPROFILES_DF[['timestamp', 'Location', 'clusters']]

STORMPROFILES_DF.rename({'timestamp' : 'Date', 'Location' : 'Marker_Location',
                         'clusters' : 'Cluster_ID'}, axis=1, inplace=True)

logging.info('Pre-processing Storm Info Done')
logging.info('\n')
QC_CHECK_SHAPE_AND_COLUMNS(STORMPROFILES_DF)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


ValueError: NaTType does not support strftime

In [None]:
STORMPROFILES_DF

In [None]:
DF_ADS_FINAL['Date']

In [None]:
def rename_storm_info(row):
    """
    Input - Cluster Number
    Output - Full description and name of the cluster after profling
    """
    cluster_profile = ''
    if row == 'Cluster1':
        cluster_profile = 'Hot Days with Sudden Rain'
    if row == 'Cluster2':
        cluster_profile = 'Strong Breeze with Sudden Rain'
    if row == 'Cluster3':
        cluster_profile = 'Thunderstorms'
    if row == 'Cluster4':
        cluster_profile = 'Chilly Day with Chances of Snow'
    if row == 'Cluster5':
        cluster_profile = 'Strong Chilled Breeze with Chances of Snow'
    if row == 'Cluster6':
        cluster_profile = 'Hot Days with Chance of Rain'
    
    return cluster_profile

STORMPROFILES_DF['Cluster_ID'] = STORMPROFILES_DF['Cluster_ID'].apply(rename_storm_info)

In [None]:
STORMPROFILES_DF

In [None]:
DF_ADS_FINAL['Marker_Location']

In [None]:
DF_ADS_FINAL['Date']

In [None]:
#Making the Marker Location string matching 
for i in range(len(DF_ADS_FINAL)):
    if len(DF_ADS_FINAL['Marker_Location'][i].split())==1:
        DF_ADS_FINAL['Marker_Location'][i] = re.sub('(\d+(\.\d+)?)', r' \1 ', DF_ADS_FINAL['Marker_Location'][i])
        
for i in range(len(STORMPROFILES_DF)):
    if len(STORMPROFILES_DF['Marker_Location'][i].split())==1:
        STORMPROFILES_DF['Marker_Location'][i] = re.sub('(\d+(\.\d+)?)', r' \1 ', STORMPROFILES_DF['Marker_Location'][i])

In [None]:
# merge storm profiles with final dataframe
DF_ADS_FINAL['Date'] = pd.to_datetime(DF_ADS_FINAL['Date'])
STORMPROFILES_DF['Date'] = pd.to_datetime(STORMPROFILES_DF['Date'])

In [None]:
DF_ADS_FINAL.isna().sum()

In [None]:
DF_ADS_FINAL.shape

In [None]:
STORMPROFILES_DF[['Date', 'Marker_Location']].dtypes

In [None]:
STORMPROFILES_DF['Marker_Location'][0]

In [None]:
DF_ADS_FINAL['Marker_Location'][0]

In [None]:
STORMPROFILES_DF['Marker_Location']  = STORMPROFILES_DF['Marker_Location'].replace(" ", "")
DF_ADS_FINAL['Marker_Location']  = DF_ADS_FINAL['Marker_Location'].replace(' ', "")

In [None]:
DF_ADS_FINAL[['Date', 'Marker_Location']][0]

In [None]:

DF_ADS_FINAL = DF_ADS_FINAL.merge(STORMPROFILES_DF, how='left',
                                  left_on=['Date', 'Marker_Location'],
                                  right_on=['Date', 'Marker_Location'])


logging.info('Cluster Profiles Added')
logging.info('\n')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)

In [None]:
DF_ADS_FINAL

## **Load Hyper Tuned RF model**

In [None]:
RF_MODEL = pd.read_pickle(CONFIGPARSER['LOAD_AND_PREDICT']['MODEL_LOCATION'])
logging.info("Model Loaded")
logging.info('\n')

In [None]:
MODEL_FEATURES = CONFIGPARSER['LOAD_AND_PREDICT']['MODEL_FEATURES']
FEATURES_DF = pd.read_csv(MODEL_FEATURES)

FEATURE_LIST = list(FEATURES_DF.Features_List)
logging.info('Features Loaded')
logging.info('\n')
logging.info('Name of the features present %s', FEATURE_LIST)
logging.info('\n')

## **Feature Pre-Processing before it is sent to the Model**

In [None]:
DF_ADS_FINAL['POWER_OUT_CLUE_FLG_False'] = DF_ADS_FINAL['POWER_OUT_CLUE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['ST_OCCURN_FLG_False'] = DF_ADS_FINAL['ST_OCCURN_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['WIRE_OCCURN_FLG_False'] = DF_ADS_FINAL['WIRE_OCCURN_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['FUSE_OCCURN_FLG_False'] = DF_ADS_FINAL['FUSE_OCCURN_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['ST_OCCURN_FLG_True'] = DF_ADS_FINAL['ST_OCCURN_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG_True'] = \
DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG'].apply(lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['NO_CAUSE_FLG_False'] = DF_ADS_FINAL['NO_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['ANIMAL_CAUSE_FLG_True'] = DF_ADS_FINAL['ANIMAL_CAUSE_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['DAY_FLAG_True'] = DF_ADS_FINAL['DAY_FLAG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['UG_CAUSE_FLG_False'] = DF_ADS_FINAL['UG_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['POLE_CLUE_FLG_False'] = DF_ADS_FINAL['POLE_CLUE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['TREE_CAUSE_FLG_True'] = DF_ADS_FINAL['TREE_CAUSE_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['ANIMAL_CAUSE_FLG_False'] = DF_ADS_FINAL['ANIMAL_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['TREE_CAUSE_FLG_False'] = DF_ADS_FINAL['TREE_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG_False'] = \
DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG'].apply(lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['POWER_OUT_CLUE_FLG_True'] = DF_ADS_FINAL['POWER_OUT_CLUE_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['CITY_NAM_NO_CITY'] = DF_ADS_FINAL['CITY_NAM'].apply(
    lambda row: 1 if (row is 'NO_CITY') else 0)

logging.info("Preprocessing Done")

DF_ADS_FINAL_TEMP = DF_ADS_FINAL.copy(deep=True)

DF_ADS_FINAL = DF_ADS_FINAL[FEATURE_LIST]

Y_TEST_PRED = RF_MODEL.predict(DF_ADS_FINAL)
Y_TEST_PRED = np.exp(Y_TEST_PRED)
Y_TEST_PRED = list(Y_TEST_PRED)

logging.info('Predicted Values Are %s', Y_TEST_PRED)

In [None]:
def business_layer_add_addtional_time(predicted_values):
    '''
    Input - Prediction of Outages in minutes
    Output - If predicted  values are less than 1440 min 
    Add 45 mins to predictions, Else Add 360 mins to predictions
    '''
    new_pred_values = []
    for i in range(len(predicted_values)):
        if predicted_values[i] <= 1440:
            new_pred_values.append(predicted_values[i] + 45)
        elif predicted_values[i] > 1440:
            new_pred_values.append(predicted_values[i] + 360)
        else :
            new_pred_values.append(predicted_values[i])
        
    return new_pred_values

Y_TEST_PRED = business_layer_add_addtional_time(Y_TEST_PRED)
DF_ADS_FINAL['Predicted_TTR'] = Y_TEST_PRED

logging.info('Business Logic Added')
logging.info('\n')
logging.info('Predicted ETRs after business logic %s', Y_TEST_PRED)
logging.info('\n')

logging.info('Predicted ETRs added to final dataframe')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)

In [None]:
#DF_ADS_FINAL_TEMP = DF_ADS_FINAL.copy(deep=True)

#DF_ADS_FINAL['CREATION_DATETIME'] = DF_ADS_FINAL_TEMP['CREATION_DATETIME'].copy(deep=True) 
DF_ADS_FINAL = pd.concat([DF_ADS_FINAL, DF_ADS_FINAL_TEMP[['CREATION_DATETIME', 'STRCTUR_NO', 'Cluster_ID',
                                                          'CIRCT_ID', 'DNI_EQUIP_TYPE', 'OUTAGE_ID',
                                                          'INCIDENT_ID' ]]], axis=1)

In [None]:
def created_predicted_etr(creation_datetime, time_in_minutes):
    """
    This function calculates the ETR timestamp using creation datetime
    and time for restoration in minutes

    Function returns ETR timestamp

    Args:
        creation_datetime - Outage Creation Datetime
		time_in_minutes - TTR in minutes
    """
    newtime = creation_datetime + timedelta(minutes=time_in_minutes)
    newtime = newtime.strftime("%Y-%m-%d %H:%M:%S %z")
    return newtime


DF_ADS_FINAL['CREATION_DATETIME'] = pd.to_datetime(DF_ADS_FINAL['CREATION_DATETIME'])
DF_ADS_FINAL['Restoration_Period'] = round(DF_ADS_FINAL['Predicted_TTR'], 0)
DF_ADS_FINAL['Predicted_ETR'] = DF_ADS_FINAL.apply(
    lambda row: created_predicted_etr(row['CREATION_DATETIME'], row['Predicted_TTR']), axis=1)

DF_ADS_FINAL['Predicted_ETR'] = pd.to_datetime(DF_ADS_FINAL['Predicted_ETR'])
DF_ADS_FINAL['Predicted_ETR'] = DF_ADS_FINAL['Predicted_ETR'].dt.round('10min')

DF_ADS_FINAL['CREATION_DATETIME'] = DF_ADS_FINAL['CREATION_DATETIME'].apply(
    lambda row: row.strftime("%Y/%m/%d %H:%M:%S"))
DF_ADS_FINAL['Predicted_ETR'] = DF_ADS_FINAL['Predicted_ETR'].apply(
    lambda row: row.strftime("%Y/%m/%d %H:%M:%S"))

logging.info('Final ETRs Created')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)

## **Final Pre-processing to Write Outputs in correct Format**

In [None]:
DF_ADS_FINAL = DF_ADS_FINAL[['OUTAGE_ID', 'INCIDENT_ID', 'STRCTUR_NO', 'CIRCT_ID',
                             'DNI_EQUIP_TYPE', 'CREATION_DATETIME', 'Predicted_ETR',
                             'Restoration_Period', 'Cluster_ID']]

DF_ADS_FINAL.rename({'CREATION_DATETIME' : 'Creation_Time',
                     'Predicted_ETR' : 'Estimated_Restoration_Time',
                     'Restoration_Period' : 'ETR','Cluster_ID' : 'Weather_Profile'}, axis=1, inplace=True)

In [None]:
# DF_ADS_FINAL['Last_Updated'] = datetime.now().strftime(format='%Y-%m-%d %H:%M')

In [None]:
# DF_ADS_FINAL.head()

In [None]:
DF_ADS_FINAL.head()

In [None]:
DF_ADS_FINAL['Creation_Time'] = DF_ADS_FINAL['Creation_Time'].astype(str)
DF_ADS_FINAL['Estimated_Restoration_Time'] = DF_ADS_FINAL['Estimated_Restoration_Time'].astype(str)

In [None]:
DF_ADS_FINAL.dtypes

## **Read and Add Insertion Time to Outages**

In [None]:
DF_PRED = DF_ADS_FINAL.copy(deep=True)
DF_PRED['Last_Updated'] = datetime.now().strftime("%Y-%m-%d %H:%M")

DF_PRED.to_gbq(CONFIGPARSER['SETTINGS']['BQ_IPL_PREDICTIONS'], project_id=CONFIGPARSER['SETTINGS']['PROJECT_ID'],
                    chunksize=None, reauth=False, if_exists='append', auth_local_webserver=False,
                    table_schema=None, location=None, progress_bar=True, credentials=None)

DF_PRED.to_gbq(CONFIGPARSER['SETTINGS']['BQ_IPL_LIVE_PREDICTIONS'], project_id=CONFIGPARSER['SETTINGS']['PROJECT_ID'],
                    chunksize=None, reauth=False, if_exists='replace', auth_local_webserver=False,
                    table_schema=None, location=None, progress_bar=True, credentials=None)

logging.info('Prediction Live path %s', CONFIGPARSER['LOAD_AND_PREDICT']['PREDICTION_LIVE'])
DF_ADS_FINAL.to_csv(CONFIGPARSER['LOAD_AND_PREDICT']['PREDICTION_LIVE'], index=False)

YEAR_MONTH = datetime.now(timezone('US/Eastern')).strftime('%Y-%m')
CURRENT_DATE = datetime.now(timezone('US/Eastern')).strftime('%Y-%m-%d')
CURRENT_DATE_HOUR = datetime.now(timezone('US/Eastern')).strftime('%Y%m%d%H%M')
logging.info('Year Month in Eastern Time Zone %s', YEAR_MONTH)
logging.info('Current Month in Eastern Time Zone %s', CURRENT_DATE)
logging.info('Current Date & Hour in Eastern Time Zone %s', CURRENT_DATE_HOUR)
logging.info('\n')

FILENAME = CONFIGPARSER['LOAD_AND_PREDICT']['PREDICTION_BACKUP'] + '{}/{}/TTR_predictions_{}.csv'.format(YEAR_MONTH, CURRENT_DATE, CURRENT_DATE_HOUR)
logging.info('Backup Storage Predictions Storage Path: %s', FILENAME)

DF_ADS_FINAL.to_csv(FILENAME, index=False)

In [None]:
# '''
# Author: Mu Sigma
# Updated: 26 Nov 2020
# Version: 1.5
# Tasks : Load hypertuned Random forest model to predict total time for restoration
# and provide ETR's dataset and provided 0002 anbalytics locations
# '''

# # standard library imports
# import pickle
# import logging
# from pytz import timezone
# import datetime as dt
# from datetime import datetime, date, timedelta
# import pandas as pd
# from pandas.io import gbq
# import numpy as np
# from configparser import ConfigParser, ExtendedInterpolation

# # third party import
# import gcsfs

# Setup logs
logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


# read config file
CONFIGPARSER = ConfigParser(interpolation=ExtendedInterpolation())
CONFIGPARSER.read('/root/confignew0002.ini')
logging.info('Config File Loaded')
logging.info('Config File Sections %s', CONFIGPARSER.sections())



def QC_CHECK_SHAPE_AND_COLUMNS(df):
    '''
    Input - Dataframe with operations/addtion of features/columns or joins performed
    Output - Log Info using shape of dataframe and columns present
    '''
    logging.info('****QC Check****')
    logging.info('\n')
    logging.info('Shape of the DataFrame %s', df.shape)
    logging.info('\n')
    logging.info('Columns present in the DataFrame: %s', list(df.columns))
    logging.info('\n')
    return


# ## **Read OMS Live Curated Dataset**


BUCKET_NAME = CONFIGPARSER['LOAD_AND_PREDICT']['STAGING_BUCKET']
logging.info('Staging Bucket %s', BUCKET_NAME)

DF_ADS_FINAL = pd.read_csv(BUCKET_NAME)

DF_ADS_FINAL = DF_ADS_FINAL.loc[:, ~DF_ADS_FINAL.columns.str.contains('^Unnamed')]
DF_ADS_FINAL = DF_ADS_FINAL.loc[:, ~DF_ADS_FINAL.columns.str.contains('^c0')]

logging.info('OMS LIVE CURATED DATASET LOADED')
logging.info('\n')
logging.info('No of NAs if any: %s', DF_ADS_FINAL.isnull().values.any())
logging.info('\n')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)

# gs://aes-analytics-0002-curated/Outage_Restoration/Staging/IPL_Live_Master_Dataset.csv

DF_ADS_FINAL.head()


# ## **Read Storm Profiles Data**


BUCKET_NAME = CONFIGPARSER['LOAD_AND_PREDICT']['STORM_PROFILE_BUCKET']
BUCKET_NAME = 'gs://aes-analytics-0001-curated/Outage_Restoration/Live_Data_Curation'

DF_ADS_FINAL['CREATION_DATETIME'] = pd.to_datetime(
    DF_ADS_FINAL['CREATION_DATETIME'], errors='coerce')
DF_ADS_FINAL['Date'] = DF_ADS_FINAL['CREATION_DATETIME'].dt.date

UNIQUE_DATES = DF_ADS_FINAL[['Date']]
UNIQUE_DATES.drop_duplicates(subset=['Date'], keep='first', inplace=True)
UNIQUE_DATES['Date'] = UNIQUE_DATES['Date'].apply(lambda x: x.strftime('%Y%m%d'))
UNIQUE = UNIQUE_DATES['Date'].to_list()

logging.info('Dates for which strom profiles will be read: %s', UNIQUE)
logging.info('\n')


STORM_PROFILES_LOCATION = BUCKET_NAME + '/Storm_Profiles/'
logging.info('Location of Storm Profiles %s', STORM_PROFILES_LOCATION)
logging.info('\n')
STORM_PROFILES_FILES = []

for i in UNIQUE:
    FILENAME = STORM_PROFILES_LOCATION + 'storm_profiles_{}.csv'.format(i)
    STORM_PROFILES_FILES.append(pd.read_csv(FILENAME))

STORMPROFILES_DF = pd.concat(STORM_PROFILES_FILES)
STORMPROFILES_DF.reset_index(drop=True, inplace=True)

STORMPROFILES_DF = STORMPROFILES_DF.loc[:, ~STORMPROFILES_DF.columns.str.contains('^Unnamed')]
STORMPROFILES_DF = STORMPROFILES_DF.loc[:, ~STORMPROFILES_DF.columns.str.contains('_c0')]
STORMPROFILES_DF = STORMPROFILES_DF[['timestamp', 'Location', 'clusters']]

STORMPROFILES_DF.rename({'timestamp' : 'Date', 'Location' : 'Marker_Location',
                         'clusters' : 'Cluster_ID'}, axis=1, inplace=True)

logging.info('Pre-processing Storm Info Done')
logging.info('\n')
QC_CHECK_SHAPE_AND_COLUMNS(STORMPROFILES_DF)



def rename_storm_info(row):
    """
    Input - Cluster Number
    Output - Full description and name of the clsuter after profling
    """
    cluster_profile = ''
    if row == 'Cluster1':
        cluster_profile = 'Hot Days with Sudden Rain'
    if row == 'Cluster2':
        cluster_profile = 'Strong Breeze with Sudden Rain'
    if row == 'Cluster3':
        cluster_profile = 'Thunderstorms'
    if row == 'Cluster4':
        cluster_profile = 'Chilly Day with Chances of Snow'
    if row == 'Cluster5':
        cluster_profile = 'Strong Chilled Breeze with Chances of Snow'
    if row == 'Cluster6':
        cluster_profile = 'Hot Days with Chance of Rain'
    
    return cluster_profile

STORMPROFILES_DF['Cluster_ID'] = STORMPROFILES_DF['Cluster_ID'].apply(rename_storm_info)

# merge storm profiles with final dataframe
DF_ADS_FINAL['Date'] = pd.to_datetime(DF_ADS_FINAL['Date'])
STORMPROFILES_DF['Date'] = pd.to_datetime(STORMPROFILES_DF['Date'])
DF_ADS_FINAL = DF_ADS_FINAL.merge(STORMPROFILES_DF, how='left',
                                  left_on=['Date', 'Marker_Location'],
                                  right_on=['Date', 'Marker_Location'])


logging.info('Cluster Profiles Added')
logging.info('\n')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)


# ## **Load Hyper Tuned RF model**


RF_MODEL = pd.read_pickle(CONFIGPARSER['LOAD_AND_PREDICT']['MODEL_LOCATION'])
logging.info("Model Loaded")
logging.info('\n')


MODEL_FEATURES = CONFIGPARSER['LOAD_AND_PREDICT']['MODEL_FEATURES']
FEATURES_DF = pd.read_csv(MODEL_FEATURES)

FEATURE_LIST = list(FEATURES_DF.Features_List)
logging.info('Features Loaded')
logging.info('\n')
logging.info('Name of the features present %s', FEATURE_LIST)
logging.info('\n')


# ## **Feature Pre-Processing before it is sent to the Model**


DF_ADS_FINAL['POWER_OUT_CLUE_FLG_False'] = DF_ADS_FINAL['POWER_OUT_CLUE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['ST_OCCURN_FLG_False'] = DF_ADS_FINAL['ST_OCCURN_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['WIRE_OCCURN_FLG_False'] = DF_ADS_FINAL['WIRE_OCCURN_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['FUSE_OCCURN_FLG_False'] = DF_ADS_FINAL['FUSE_OCCURN_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['ST_OCCURN_FLG_True'] = DF_ADS_FINAL['ST_OCCURN_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG_True'] = DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG'].apply(lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['NO_CAUSE_FLG_False'] = DF_ADS_FINAL['NO_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['ANIMAL_CAUSE_FLG_True'] = DF_ADS_FINAL['ANIMAL_CAUSE_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['DAY_FLAG_True'] = DF_ADS_FINAL['DAY_FLAG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['UG_CAUSE_FLG_False'] = DF_ADS_FINAL['UG_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['POLE_CLUE_FLG_False'] = DF_ADS_FINAL['POLE_CLUE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['TREE_CAUSE_FLG_True'] = DF_ADS_FINAL['TREE_CAUSE_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['ANIMAL_CAUSE_FLG_False'] = DF_ADS_FINAL['ANIMAL_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['TREE_CAUSE_FLG_False'] = DF_ADS_FINAL['TREE_CAUSE_FLG'].apply(
    lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG_False'] = DF_ADS_FINAL['PUBLIC_SAFETY_OCCURN_FLG'].apply(lambda row: 1 if (row is False) else 0)
DF_ADS_FINAL['POWER_OUT_CLUE_FLG_True'] = DF_ADS_FINAL['POWER_OUT_CLUE_FLG'].apply(
    lambda row: 1 if (row is True) else 0)
DF_ADS_FINAL['CITY_NAM_NO_CITY'] = DF_ADS_FINAL['CITY_NAM'].apply(
    lambda row: 1 if (row is 'NO_CITY') else 0)

logging.info("Preprocessing Done")

DF_ADS_FINAL_TEMP = DF_ADS_FINAL.copy(deep=True)

DF_ADS_FINAL = DF_ADS_FINAL[FEATURE_LIST]

Y_TEST_PRED = RF_MODEL.predict(DF_ADS_FINAL)
Y_TEST_PRED = np.exp(Y_TEST_PRED)
Y_TEST_PRED = list(Y_TEST_PRED)

logging.info('Predicted Values Are %s', Y_TEST_PRED)


def business_layer_add_addtional_time(predicted_values):
    '''
    Input - Prediction of Outages in minutes
    Output - If predicted  values are less than 1440 min 
    Add 45 mins to predictions, Else Add 360 mins to predictions
    '''
    new_pred_values = []
    for i in range(len(predicted_values)):
        if predicted_values[i] <= 1440:
            new_pred_values.append(predicted_values[i] + 45)
        elif predicted_values[i] > 1440:
            new_pred_values.append(predicted_values[i] + 360)
        else :
            new_pred_values.append(predicted_values[i])
        
    return new_pred_values

Y_TEST_PRED = business_layer_add_addtional_time(Y_TEST_PRED)
DF_ADS_FINAL['Predicted_TTR'] = Y_TEST_PRED

logging.info('Business Logic Added')
logging.info('\n')
logging.info('Predicted ETRs after business logic %s', Y_TEST_PRED)
logging.info('\n')

logging.info('Predicted ETRs added to final dataframe')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)


#DF_ADS_FINAL_TEMP = DF_ADS_FINAL.copy(deep=True)

#DF_ADS_FINAL['CREATION_DATETIME'] = DF_ADS_FINAL_TEMP['CREATION_DATETIME'].copy(deep=True) 
DF_ADS_FINAL = pd.concat([DF_ADS_FINAL, DF_ADS_FINAL_TEMP[['CREATION_DATETIME', 'STRCTUR_NO', 'Cluster_ID',
                                                          'CIRCT_ID', 'DNI_EQUIP_TYPE', 'OUTAGE_ID',
                                                          'INCIDENT_ID' ]]], axis=1)



def created_predicted_etr(creation_datetime, time_in_minutes):
    """
    This function calculates the ETR timestamp using creation datetime
    and time for restoration in minutes

    Function returns ETR timestamp

    Args:
        creation_datetime - Outage Creation Datetime
		time_in_minutes - TTR in minutes
    """
    newtime = creation_datetime + timedelta(minutes=time_in_minutes)
    newtime = newtime.strftime("%Y-%m-%d %H:%M:%S %z")
    return newtime


DF_ADS_FINAL['CREATION_DATETIME'] = pd.to_datetime(DF_ADS_FINAL['CREATION_DATETIME'])
DF_ADS_FINAL['Restoration_Period'] = round(DF_ADS_FINAL['Predicted_TTR'], 0)
DF_ADS_FINAL['Predicted_ETR'] = DF_ADS_FINAL.apply(
    lambda row: created_predicted_etr(row['CREATION_DATETIME'], row['Predicted_TTR']), axis=1)

DF_ADS_FINAL['Predicted_ETR'] = pd.to_datetime(DF_ADS_FINAL['Predicted_ETR'])
DF_ADS_FINAL['Predicted_ETR'] = DF_ADS_FINAL['Predicted_ETR'].dt.round('10min')

DF_ADS_FINAL['CREATION_DATETIME'] = DF_ADS_FINAL['CREATION_DATETIME'].apply(
    lambda row: row.strftime("%Y/%m/%d %H:%M:%S"))
DF_ADS_FINAL['Predicted_ETR'] = DF_ADS_FINAL['Predicted_ETR'].apply(
    lambda row: row.strftime("%Y/%m/%d %H:%M:%S"))

logging.info('Final ETRs Created')
QC_CHECK_SHAPE_AND_COLUMNS(DF_ADS_FINAL)


# ## **Final Pre-processing to Write Outputs in correct Format**


DF_ADS_FINAL = DF_ADS_FINAL[['OUTAGE_ID', 'INCIDENT_ID', 'STRCTUR_NO', 'CIRCT_ID',
                             'DNI_EQUIP_TYPE', 'CREATION_DATETIME', 'Predicted_ETR',
                             'Restoration_Period', 'Cluster_ID']]

DF_ADS_FINAL.rename({'CREATION_DATETIME' : 'Creation_Time',
                     'Predicted_ETR' : 'Estimated_Restoration_Time',
                     'Restoration_Period' : 'ETR','Cluster_ID' : 'Weather_Profile'}, axis=1, inplace=True)

DF_ADS_FINAL.head()
# ## **Read and Add Insertion Time to Outages**


# DF_PRED = DF_ADS_FINAL.copy(deep=True)
# DF_PRED['Last_Updated'] = datetime.now().strftime("%Y-%m-%d %H:%M")

# DF_PRED.to_gbq(CONFIGPARSER['SETTINGS']['BQ_IPL_PREDICTIONS'], project_id=CONFIGPARSER['SETTINGS']['PROJECT_ID'],
#                     chunksize=None, reauth=False, if_exists='append', auth_local_webserver=False,
#                     table_schema=None, location=None, progress_bar=True, credentials=None)

# DF_PRED.to_gbq(CONFIGPARSER['SETTINGS']['BQ_IPL_LIVE_PREDICTIONS'], project_id=CONFIGPARSER['SETTINGS']['PROJECT_ID'],
#                     chunksize=None, reauth=False, if_exists='replace', auth_local_webserver=False,
#                     table_schema=None, location=None, progress_bar=True, credentials=None)

# logging.info('Prediction Live path %s', CONFIGPARSER['LOAD_AND_PREDICT']['PREDICTION_LIVE'])
# DF_ADS_FINAL.to_csv(CONFIGPARSER['LOAD_AND_PREDICT']['PREDICTION_LIVE'], index=False)

# YEAR_MONTH = datetime.now(timezone('US/Eastern')).strftime('%Y-%m')
# CURRENT_DATE = datetime.now(timezone('US/Eastern')).strftime('%Y-%m-%d')
# CURRENT_DATE_HOUR = datetime.now(timezone('US/Eastern')).strftime('%Y%m%d%H%M')
# logging.info('Year Month in Eastern Time Zone %s', YEAR_MONTH)
# logging.info('Current Month in Eastern Time Zone %s', CURRENT_DATE)
# logging.info('Current Date & Hour in Eastern Time Zone %s', CURRENT_DATE_HOUR)
# logging.info('\n')

# FILENAME = CONFIGPARSER['LOAD_AND_PREDICT']['PREDICTION_BACKUP'] + '{}/{}/TTR_predictions_{}.csv'.format(YEAR_MONTH, CURRENT_DATE, CURRENT_DATE_HOUR)
# logging.info('Backup Storage Predictions Storage Path: %s', FILENAME)

# DF_ADS_FINAL.to_csv(FILENAME, index=False)
