In [1]:
import logging
import pandas as pd
import numpy as np
import datetime as dt
from datetime import date, timedelta
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from pyspark.context import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession
logging.basicConfig(level=logging.INFO)
# logging.disable(logging.CRITICAL)

sc = SparkContext.getOrCreate()
spark = SparkSession(sc)

pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 2000)
pd.options.display.float_format = '{:.0f}'.format

In [2]:
## Dummifciation of new data
### Importing dataset

current_date = date.today()
logging.info(current_date)
year_month = pd.to_datetime(current_date).strftime('%Y-%m')
logging.info(year_month)
today = pd.to_datetime(current_date).strftime('%Y-%m-%d')
logging.info(today)
tomorrow1 = current_date + timedelta(1)
logging.info(tomorrow1)
tomorrow = tomorrow1.strftime('%Y%m%d')
logging.info(tomorrow)
dayafter1 = current_date + timedelta(2)
logging.info(dayafter1)
dayafter = dayafter1.strftime('%Y%m%d')
logging.info(dayafter)

year_month_1 = pd.to_datetime(tomorrow).strftime('%Y-%m')
logging.info(year_month_1)
year_month_2 = pd.to_datetime(dayafter).strftime('%Y-%m')
logging.info(year_month_2)

path1 = "gs://aes-datahub-0001-raw/Weather/weather_source/USA/Indianapolis/" + year_month_1 + "/forecast_data/" + today + "/weathersource_daily_" + tomorrow + ".csv"
logging.info(path1)
path2 = "gs://aes-datahub-0001-raw/Weather/weather_source/USA/Indianapolis/" + year_month_2 + "/forecast_data/" + today + "/weathersource_daily_" + dayafter + ".csv"
logging.info(path2)

#Reading the forecast files
new_data = spark.read.format('CSV').option("header","true").option("inferSchema","true").option("delimiter",",").load(path1, index_col=0).toPandas()
new_data.reset_index(drop=True, inplace=True)
logging.info(new_data.shape)
new_data2 = spark.read.format('CSV').option("header","true").option("inferSchema","true").option("delimiter",",").load(path2, index_col=0).toPandas()
new_data2.reset_index(drop=True, inplace=True)
logging.info(new_data2.shape)

# Reading the storm data
storm_data = spark.read.format('CSV').option("header","true").option("inferSchema","true").option("delimiter",",").load(
    'gs://aes-analytics-0001-curated/Outage_Restoration/Live_Data_Curation/Outage_Prediction/Storm_ID_level_data.csv', index_col=0).toPandas()
logging.info(storm_data.shape)

INFO:root:2020-10-28
INFO:root:2020-10
INFO:root:2020-10-28
INFO:root:2020-10-29
INFO:root:20201029
INFO:root:2020-10-30
INFO:root:20201030
INFO:root:2020-10
INFO:root:2020-10
INFO:root:gs://aes-datahub-0001-raw/Weather/weather_source/USA/Indianapolis/2020-10/forecast_data/2020-10-28/weathersource_daily_20201029.csv
INFO:root:gs://aes-datahub-0001-raw/Weather/weather_source/USA/Indianapolis/2020-10/forecast_data/2020-10-28/weathersource_daily_20201030.csv
INFO:root:(20, 59)
INFO:root:(20, 59)
INFO:root:(249, 1027)


In [3]:
def preprocess_data(new_data):
    new_data = new_data.drop(['latitude','longitude', 'timestampInit'], axis = 1) 
    new_data['Location'] = new_data['Location'].str.replace(' ', '')

    # Renaming Columns
    new_data.rename(columns = {"timestamp" : "Date"}, inplace=True)

    # Selecting required variables
    new_data = new_data[['cldCvrAvg', 'cldCvrMax', 'cldCvrMin', 'dewPtAvg', 'dewPtMax', 'dewPtMin', 'feelsLikeAvg', 'feelsLikeMax',
             'feelsLikeMin', 'heatIndexAvg', 'heatIndexMax', 'heatIndexMin', 'mslPresAvg',
             'mslPresMax', 'mslPresMin', 'precip', 'radSolarAvg',
             'radSolarMax', 'radSolarTot', 'relHumAvg', 'relHumMax', 'relHumMin', 'sfcPresAvg',
             'sfcPresMax', 'sfcPresMin', 'snowfall', 'spcHumAvg', 'spcHumMax', 'spcHumMin', 'tempAvg',
             'tempMax', 'tempMin', 'Date', 'wetBulbAvg', 'wetBulbMax', 'wetBulbMin', 'windChillAvg', 'windChillMax',
             'windChillMin', 'windDir100mAvg', 'windDir80mAvg', 'windDirAvg', 'windSpd100mAvg', 'windSpd100mMax',
             'windSpd100mMin', 'windSpd80mAvg', 'windSpd80mMax', 'windSpd80mMin', 'windSpdAvg', 'windSpdMax',
             'windSpdMin', 'Location']]


    # Converting to day level date
    new_data['Date'] = pd.to_datetime(new_data['Date']).dt.strftime('%Y-%m-%d')
    
    return new_data

def separate_different_markers(new_data):

    req_cols = ['Date','cldCvrAvg', 'cldCvrMax', 'cldCvrMin', 'dewPtAvg', 'dewPtMax', 'dewPtMin', 'feelsLikeAvg', 'feelsLikeMax',
             'feelsLikeMin', 'heatIndexAvg', 'heatIndexMax', 'heatIndexMin', 'mslPresAvg',
             'mslPresMax', 'mslPresMin', 'precip', 'radSolarAvg',
             'radSolarMax', 'radSolarTot', 'relHumAvg', 'relHumMax', 'relHumMin', 'sfcPresAvg',
             'sfcPresMax', 'sfcPresMin', 'snowfall', 'spcHumAvg', 'spcHumMax', 'spcHumMin', 'tempAvg',
             'tempMax', 'tempMin', 'wetBulbAvg', 'wetBulbMax', 'wetBulbMin', 'windChillAvg', 'windChillMax',
             'windChillMin', 'windDir100mAvg', 'windDir80mAvg', 'windDirAvg', 'windSpd100mAvg', 'windSpd100mMax',
             'windSpd100mMin', 'windSpd80mAvg', 'windSpd80mMax', 'windSpd80mMin', 'windSpdAvg', 'windSpdMax',
             'windSpdMin']

    marker1=new_data[new_data.Location == 'Marker1'][req_cols]
    marker2=new_data[new_data.Location == 'Marker2'][req_cols]
    marker3=new_data[new_data.Location == 'Marker3'][req_cols]
    marker4=new_data[new_data.Location == 'Marker4'][req_cols]
    marker5=new_data[new_data.Location == 'Marker5'][req_cols]
    marker6=new_data[new_data.Location == 'Marker6'][req_cols]
    marker7=new_data[new_data.Location == 'Marker7'][req_cols]
    marker8=new_data[new_data.Location == 'Marker8'][req_cols]
    marker9=new_data[new_data.Location == 'Marker9'][req_cols]
    marker10=new_data[new_data.Location == 'Marker10'][req_cols]
    marker11=new_data[new_data.Location == 'Marker11'][req_cols]
    marker12=new_data[new_data.Location == 'Marker12'][req_cols]
    marker13=new_data[new_data.Location == 'Marker13'][req_cols]
    marker14=new_data[new_data.Location == 'Marker14'][req_cols]
    marker15=new_data[new_data.Location == 'Marker15'][req_cols]
    marker16=new_data[new_data.Location == 'Marker16'][req_cols]
    marker17=new_data[new_data.Location == 'Marker17'][req_cols]
    marker18=new_data[new_data.Location == 'Marker18'][req_cols]
    marker19=new_data[new_data.Location == 'Marker19'][req_cols]
    marker20=new_data[new_data.Location == 'Marker20'][req_cols]
    
    return (marker1, marker2, marker3, marker4, marker5, marker6, marker7, marker8, marker9,
            marker10, marker11, marker12, marker13, marker14, marker15, marker16, marker17,
            marker18, marker19, marker20)

def rename_markers(marker1, marker2, marker3, marker4, marker5, marker6, marker7, marker8, marker9,
                   marker10, marker11, marker12, marker13, marker14, marker15, marker16, marker17,
                   marker18, marker19, marker20):
    # Data
    location = ['MARKER1', 'MARKER2', 'MARKER3', 'MARKER4', 'MARKER5', 'MARKER6', 'MARKER7', 'MARKER8',
                'MARKER9', 'MARKER10', 'MARKER11', 'MARKER12', 'MARKER13', 'MARKER14', 'MARKER15', 'MARKER16',
                'MARKER17', 'MARKER18', 'MARKER19', 'MARKER20']
    
    marker_name = [marker1, marker2, marker3, marker4, marker5, marker6, marker7, marker8, marker9,
                   marker10, marker11, marker12, marker13, marker14, marker15, marker16, marker17,
                   marker18, marker19, marker20]
    
    
    for i, j in enumerate(marker_name):
        j.rename(columns = {"cldCvrAvg" : location[i]+"_cldCvrAvg",
                              "cldCvrMax" : location[i]+"_cldCvrMax",
                              "cldCvrMin": location[i]+"_cldCvrMin",

                              "dewPtAvg": location[i]+"_dewPtAvg",
                              "dewPtMax" : location[i]+"_dewPtMax",
                              "dewPtMin" : location[i]+"_dewPtMin",

                              "feelsLikeAvg" : location[i]+"_feelsLikeAvg",
                              "feelsLikeMax" : location[i]+"_feelsLikeMax",
                              "feelsLikeMin" : location[i]+"_feelsLikeMin",

                              "heatIndexAvg" : location[i]+"_heatIndexAvg",
                              "heatIndexMax" : location[i]+"_heatIndexMax",
                              "heatIndexMin" : location[i]+"_heatIndexMin",

                              "mslPresAvg" : location[i]+"_mslPresAvg",
                              "mslPresMax" : location[i]+"_mslPresMax",
                              "mslPresMin" : location[i]+"_mslPresMin",

                              "precip" : location[i]+"_precip",

                              "radSolarAvg" : location[i]+"_radSolarAvg",
                              "radSolarMax" : location[i]+"_radSolarMax",
                              
                              "radSolarTot" : location[i]+"_radSolarTot",

                              "relHumAvg" : location[i]+"_relHumAvg",
                              "relHumMax" : location[i]+"_relHumMax",
                              "relHumMin" : location[i]+"_relHumMin",

                              "sfcPresAvg" : location[i]+"_sfcPresAvg",
                              "sfcPresMax" : location[i]+"_sfcPresMax",
                              "sfcPresMin" : location[i]+"_sfcPresMin",

                              "snowfall" : location[i]+"_snowfall",

                              "spcHumAvg" : location[i]+"_spcHumAvg",
                              "spcHumMax" : location[i]+"_spcHumMax",
                              "spcHumMin" : location[i]+"_spcHumMin",

                              "tempAvg" : location[i]+"_tempAvg",
                              "tempMin" : location[i]+"_tempMin",
                              "tempMax" : location[i]+"_tempMax",

                              "wetBulbAvg" : location[i]+"_wetBulbAvg",
                              "wetBulbMax" : location[i]+"_wetBulbMax",
                              "wetBulbMin" : location[i]+"_wetBulbMin",

                              "windChillAvg" : location[i]+"_windChillAvg",
                              "windChillMax" : location[i]+"_windChillMax",
                              "windChillMin" : location[i]+"_windChillMin",

                              "windDir100mAvg" : location[i]+"_windDir100mAvg",
                              "windDir80mAvg" : location[i]+"_windDir80mAvg",
                              "windDirAvg" : location[i]+"_windDirAvg",

                              "windSpd100mAvg" : location[i]+"_windSpd100mAvg",
                              "windSpd100mMax" : location[i]+"_windSpd100mMax",
                              "windSpd100mMin" : location[i]+"_windSpd100mMin",

                              "windSpd80mAvg" : location[i]+"_windSpd80mAvg",
                              "windSpd80mMax" : location[i]+"_windSpd80mMax",
                              "windSpd80mMin" : location[i]+"_windSpd80mMin",                          

                              "windSpdAvg" : location[i]+"_windSpdAvg",
                              "windSpdMax" : location[i]+"_windSpdMax",
                              "windSpdMin" : location[i]+"_windSpdMin",
                             }, inplace=True)
        
    return (marker1, marker2, marker3, marker4, marker5, marker6, marker7, marker8, marker9,
            marker10, marker11, marker12, marker13, marker14, marker15, marker16, marker17,
            marker18, marker19, marker20)

def merge_markers_dataframe(marker1, marker2, marker3, marker4, marker5, marker6, marker7, marker8, marker9,
                            marker10, marker11, marker12, marker13, marker14, marker15, marker16, marker17,
                            marker18, marker19, marker20):
    
    final_ads_1  = pd.merge(marker1, marker2, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker3, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker4, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker5, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker6, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker7, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker8, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker9, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker10, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker11, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker12, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker13, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker14, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker15, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker16, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker17, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker18, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker19, how='left', on=['Date'])
    final_ads_1  = pd.merge(final_ads_1, marker20, how='left', on=['Date'])
    
    return final_ads_1

In [4]:
# ### Dummified data
df1 = preprocess_data(new_data)
mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20 = separate_different_markers(df1)
mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20 = rename_markers(
    mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20)
final_df1 = merge_markers_dataframe(mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20)

df2 = preprocess_data(new_data2)
mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20 = separate_different_markers(df2)
mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20 = rename_markers(
    mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20)
final_df2 = merge_markers_dataframe(mr1, mr2, mr3, mr4, mr5, mr6, mr7, mr8, mr9, mr10, mr11, mr12, mr13, mr14, mr15, mr16, mr17, mr18, mr19, mr20)

In [5]:
final_df1.head()

Unnamed: 0,Date,MARKER1_cldCvrAvg,MARKER1_cldCvrMax,MARKER1_cldCvrMin,MARKER1_dewPtAvg,MARKER1_dewPtMax,MARKER1_dewPtMin,MARKER1_feelsLikeAvg,MARKER1_feelsLikeMax,MARKER1_feelsLikeMin,MARKER1_heatIndexAvg,MARKER1_heatIndexMax,MARKER1_heatIndexMin,MARKER1_mslPresAvg,MARKER1_mslPresMax,MARKER1_mslPresMin,MARKER1_precip,MARKER1_radSolarAvg,MARKER1_radSolarMax,MARKER1_radSolarTot,MARKER1_relHumAvg,MARKER1_relHumMax,MARKER1_relHumMin,MARKER1_sfcPresAvg,MARKER1_sfcPresMax,MARKER1_sfcPresMin,MARKER1_snowfall,MARKER1_spcHumAvg,MARKER1_spcHumMax,MARKER1_spcHumMin,MARKER1_tempAvg,MARKER1_tempMax,MARKER1_tempMin,MARKER1_wetBulbAvg,MARKER1_wetBulbMax,MARKER1_wetBulbMin,MARKER1_windChillAvg,MARKER1_windChillMax,MARKER1_windChillMin,MARKER1_windDir100mAvg,MARKER1_windDir80mAvg,MARKER1_windDirAvg,MARKER1_windSpd100mAvg,MARKER1_windSpd100mMax,MARKER1_windSpd100mMin,MARKER1_windSpd80mAvg,MARKER1_windSpd80mMax,MARKER1_windSpd80mMin,MARKER1_windSpdAvg,MARKER1_windSpdMax,MARKER1_windSpdMin,MARKER2_cldCvrAvg,MARKER2_cldCvrMax,MARKER2_cldCvrMin,MARKER2_dewPtAvg,MARKER2_dewPtMax,MARKER2_dewPtMin,MARKER2_feelsLikeAvg,MARKER2_feelsLikeMax,MARKER2_feelsLikeMin,MARKER2_heatIndexAvg,MARKER2_heatIndexMax,MARKER2_heatIndexMin,MARKER2_mslPresAvg,MARKER2_mslPresMax,MARKER2_mslPresMin,MARKER2_precip,MARKER2_radSolarAvg,MARKER2_radSolarMax,MARKER2_radSolarTot,MARKER2_relHumAvg,MARKER2_relHumMax,MARKER2_relHumMin,MARKER2_sfcPresAvg,MARKER2_sfcPresMax,MARKER2_sfcPresMin,MARKER2_snowfall,MARKER2_spcHumAvg,MARKER2_spcHumMax,MARKER2_spcHumMin,MARKER2_tempAvg,MARKER2_tempMax,MARKER2_tempMin,MARKER2_wetBulbAvg,MARKER2_wetBulbMax,MARKER2_wetBulbMin,MARKER2_windChillAvg,MARKER2_windChillMax,MARKER2_windChillMin,MARKER2_windDir100mAvg,MARKER2_windDir80mAvg,MARKER2_windDirAvg,MARKER2_windSpd100mAvg,MARKER2_windSpd100mMax,MARKER2_windSpd100mMin,MARKER2_windSpd80mAvg,MARKER2_windSpd80mMax,MARKER2_windSpd80mMin,MARKER2_windSpdAvg,MARKER2_windSpdMax,MARKER2_windSpdMin,MARKER3_cldCvrAvg,MARKER3_cldCvrMax,MARKER3_cldCvrMin,MARKER3_dewPtAvg,MARKER3_dewPtMax,MARKER3_dewPtMin,MARKER3_feelsLikeAvg,MARKER3_feelsLikeMax,MARKER3_feelsLikeMin,MARKER3_heatIndexAvg,MARKER3_heatIndexMax,MARKER3_heatIndexMin,MARKER3_mslPresAvg,MARKER3_mslPresMax,MARKER3_mslPresMin,MARKER3_precip,MARKER3_radSolarAvg,MARKER3_radSolarMax,MARKER3_radSolarTot,MARKER3_relHumAvg,MARKER3_relHumMax,MARKER3_relHumMin,MARKER3_sfcPresAvg,MARKER3_sfcPresMax,MARKER3_sfcPresMin,MARKER3_snowfall,MARKER3_spcHumAvg,MARKER3_spcHumMax,MARKER3_spcHumMin,MARKER3_tempAvg,MARKER3_tempMax,MARKER3_tempMin,MARKER3_wetBulbAvg,MARKER3_wetBulbMax,MARKER3_wetBulbMin,MARKER3_windChillAvg,MARKER3_windChillMax,MARKER3_windChillMin,MARKER3_windDir100mAvg,MARKER3_windDir80mAvg,MARKER3_windDirAvg,MARKER3_windSpd100mAvg,MARKER3_windSpd100mMax,MARKER3_windSpd100mMin,MARKER3_windSpd80mAvg,MARKER3_windSpd80mMax,MARKER3_windSpd80mMin,MARKER3_windSpdAvg,MARKER3_windSpdMax,MARKER3_windSpdMin,MARKER4_cldCvrAvg,MARKER4_cldCvrMax,MARKER4_cldCvrMin,MARKER4_dewPtAvg,MARKER4_dewPtMax,MARKER4_dewPtMin,MARKER4_feelsLikeAvg,MARKER4_feelsLikeMax,MARKER4_feelsLikeMin,MARKER4_heatIndexAvg,MARKER4_heatIndexMax,MARKER4_heatIndexMin,MARKER4_mslPresAvg,MARKER4_mslPresMax,MARKER4_mslPresMin,MARKER4_precip,MARKER4_radSolarAvg,MARKER4_radSolarMax,MARKER4_radSolarTot,MARKER4_relHumAvg,MARKER4_relHumMax,MARKER4_relHumMin,MARKER4_sfcPresAvg,MARKER4_sfcPresMax,MARKER4_sfcPresMin,MARKER4_snowfall,MARKER4_spcHumAvg,MARKER4_spcHumMax,MARKER4_spcHumMin,MARKER4_tempAvg,MARKER4_tempMax,MARKER4_tempMin,MARKER4_wetBulbAvg,MARKER4_wetBulbMax,MARKER4_wetBulbMin,MARKER4_windChillAvg,MARKER4_windChillMax,MARKER4_windChillMin,MARKER4_windDir100mAvg,MARKER4_windDir80mAvg,MARKER4_windDirAvg,MARKER4_windSpd100mAvg,MARKER4_windSpd100mMax,MARKER4_windSpd100mMin,MARKER4_windSpd80mAvg,MARKER4_windSpd80mMax,MARKER4_windSpd80mMin,MARKER4_windSpdAvg,MARKER4_windSpdMax,MARKER4_windSpdMin,MARKER5_cldCvrAvg,MARKER5_cldCvrMax,MARKER5_cldCvrMin,MARKER5_dewPtAvg,MARKER5_dewPtMax,MARKER5_dewPtMin,MARKER5_feelsLikeAvg,MARKER5_feelsLikeMax,MARKER5_feelsLikeMin,MARKER5_heatIndexAvg,MARKER5_heatIndexMax,MARKER5_heatIndexMin,MARKER5_mslPresAvg,MARKER5_mslPresMax,MARKER5_mslPresMin,MARKER5_precip,MARKER5_radSolarAvg,MARKER5_radSolarMax,MARKER5_radSolarTot,MARKER5_relHumAvg,MARKER5_relHumMax,MARKER5_relHumMin,MARKER5_sfcPresAvg,MARKER5_sfcPresMax,MARKER5_sfcPresMin,MARKER5_snowfall,MARKER5_spcHumAvg,MARKER5_spcHumMax,MARKER5_spcHumMin,MARKER5_tempAvg,MARKER5_tempMax,MARKER5_tempMin,MARKER5_wetBulbAvg,MARKER5_wetBulbMax,MARKER5_wetBulbMin,MARKER5_windChillAvg,MARKER5_windChillMax,MARKER5_windChillMin,MARKER5_windDir100mAvg,MARKER5_windDir80mAvg,MARKER5_windDirAvg,MARKER5_windSpd100mAvg,MARKER5_windSpd100mMax,MARKER5_windSpd100mMin,MARKER5_windSpd80mAvg,MARKER5_windSpd80mMax,MARKER5_windSpd80mMin,MARKER5_windSpdAvg,MARKER5_windSpdMax,...,MARKER16_cldCvrAvg,MARKER16_cldCvrMax,MARKER16_cldCvrMin,MARKER16_dewPtAvg,MARKER16_dewPtMax,MARKER16_dewPtMin,MARKER16_feelsLikeAvg,MARKER16_feelsLikeMax,MARKER16_feelsLikeMin,MARKER16_heatIndexAvg,MARKER16_heatIndexMax,MARKER16_heatIndexMin,MARKER16_mslPresAvg,MARKER16_mslPresMax,MARKER16_mslPresMin,MARKER16_precip,MARKER16_radSolarAvg,MARKER16_radSolarMax,MARKER16_radSolarTot,MARKER16_relHumAvg,MARKER16_relHumMax,MARKER16_relHumMin,MARKER16_sfcPresAvg,MARKER16_sfcPresMax,MARKER16_sfcPresMin,MARKER16_snowfall,MARKER16_spcHumAvg,MARKER16_spcHumMax,MARKER16_spcHumMin,MARKER16_tempAvg,MARKER16_tempMax,MARKER16_tempMin,MARKER16_wetBulbAvg,MARKER16_wetBulbMax,MARKER16_wetBulbMin,MARKER16_windChillAvg,MARKER16_windChillMax,MARKER16_windChillMin,MARKER16_windDir100mAvg,MARKER16_windDir80mAvg,MARKER16_windDirAvg,MARKER16_windSpd100mAvg,MARKER16_windSpd100mMax,MARKER16_windSpd100mMin,MARKER16_windSpd80mAvg,MARKER16_windSpd80mMax,MARKER16_windSpd80mMin,MARKER16_windSpdAvg,MARKER16_windSpdMax,MARKER16_windSpdMin,MARKER17_cldCvrAvg,MARKER17_cldCvrMax,MARKER17_cldCvrMin,MARKER17_dewPtAvg,MARKER17_dewPtMax,MARKER17_dewPtMin,MARKER17_feelsLikeAvg,MARKER17_feelsLikeMax,MARKER17_feelsLikeMin,MARKER17_heatIndexAvg,MARKER17_heatIndexMax,MARKER17_heatIndexMin,MARKER17_mslPresAvg,MARKER17_mslPresMax,MARKER17_mslPresMin,MARKER17_precip,MARKER17_radSolarAvg,MARKER17_radSolarMax,MARKER17_radSolarTot,MARKER17_relHumAvg,MARKER17_relHumMax,MARKER17_relHumMin,MARKER17_sfcPresAvg,MARKER17_sfcPresMax,MARKER17_sfcPresMin,MARKER17_snowfall,MARKER17_spcHumAvg,MARKER17_spcHumMax,MARKER17_spcHumMin,MARKER17_tempAvg,MARKER17_tempMax,MARKER17_tempMin,MARKER17_wetBulbAvg,MARKER17_wetBulbMax,MARKER17_wetBulbMin,MARKER17_windChillAvg,MARKER17_windChillMax,MARKER17_windChillMin,MARKER17_windDir100mAvg,MARKER17_windDir80mAvg,MARKER17_windDirAvg,MARKER17_windSpd100mAvg,MARKER17_windSpd100mMax,MARKER17_windSpd100mMin,MARKER17_windSpd80mAvg,MARKER17_windSpd80mMax,MARKER17_windSpd80mMin,MARKER17_windSpdAvg,MARKER17_windSpdMax,MARKER17_windSpdMin,MARKER18_cldCvrAvg,MARKER18_cldCvrMax,MARKER18_cldCvrMin,MARKER18_dewPtAvg,MARKER18_dewPtMax,MARKER18_dewPtMin,MARKER18_feelsLikeAvg,MARKER18_feelsLikeMax,MARKER18_feelsLikeMin,MARKER18_heatIndexAvg,MARKER18_heatIndexMax,MARKER18_heatIndexMin,MARKER18_mslPresAvg,MARKER18_mslPresMax,MARKER18_mslPresMin,MARKER18_precip,MARKER18_radSolarAvg,MARKER18_radSolarMax,MARKER18_radSolarTot,MARKER18_relHumAvg,MARKER18_relHumMax,MARKER18_relHumMin,MARKER18_sfcPresAvg,MARKER18_sfcPresMax,MARKER18_sfcPresMin,MARKER18_snowfall,MARKER18_spcHumAvg,MARKER18_spcHumMax,MARKER18_spcHumMin,MARKER18_tempAvg,MARKER18_tempMax,MARKER18_tempMin,MARKER18_wetBulbAvg,MARKER18_wetBulbMax,MARKER18_wetBulbMin,MARKER18_windChillAvg,MARKER18_windChillMax,MARKER18_windChillMin,MARKER18_windDir100mAvg,MARKER18_windDir80mAvg,MARKER18_windDirAvg,MARKER18_windSpd100mAvg,MARKER18_windSpd100mMax,MARKER18_windSpd100mMin,MARKER18_windSpd80mAvg,MARKER18_windSpd80mMax,MARKER18_windSpd80mMin,MARKER18_windSpdAvg,MARKER18_windSpdMax,MARKER18_windSpdMin,MARKER19_cldCvrAvg,MARKER19_cldCvrMax,MARKER19_cldCvrMin,MARKER19_dewPtAvg,MARKER19_dewPtMax,MARKER19_dewPtMin,MARKER19_feelsLikeAvg,MARKER19_feelsLikeMax,MARKER19_feelsLikeMin,MARKER19_heatIndexAvg,MARKER19_heatIndexMax,MARKER19_heatIndexMin,MARKER19_mslPresAvg,MARKER19_mslPresMax,MARKER19_mslPresMin,MARKER19_precip,MARKER19_radSolarAvg,MARKER19_radSolarMax,MARKER19_radSolarTot,MARKER19_relHumAvg,MARKER19_relHumMax,MARKER19_relHumMin,MARKER19_sfcPresAvg,MARKER19_sfcPresMax,MARKER19_sfcPresMin,MARKER19_snowfall,MARKER19_spcHumAvg,MARKER19_spcHumMax,MARKER19_spcHumMin,MARKER19_tempAvg,MARKER19_tempMax,MARKER19_tempMin,MARKER19_wetBulbAvg,MARKER19_wetBulbMax,MARKER19_wetBulbMin,MARKER19_windChillAvg,MARKER19_windChillMax,MARKER19_windChillMin,MARKER19_windDir100mAvg,MARKER19_windDir80mAvg,MARKER19_windDirAvg,MARKER19_windSpd100mAvg,MARKER19_windSpd100mMax,MARKER19_windSpd100mMin,MARKER19_windSpd80mAvg,MARKER19_windSpd80mMax,MARKER19_windSpd80mMin,MARKER19_windSpdAvg,MARKER19_windSpdMax,MARKER19_windSpdMin,MARKER20_cldCvrAvg,MARKER20_cldCvrMax,MARKER20_cldCvrMin,MARKER20_dewPtAvg,MARKER20_dewPtMax,MARKER20_dewPtMin,MARKER20_feelsLikeAvg,MARKER20_feelsLikeMax,MARKER20_feelsLikeMin,MARKER20_heatIndexAvg,MARKER20_heatIndexMax,MARKER20_heatIndexMin,MARKER20_mslPresAvg,MARKER20_mslPresMax,MARKER20_mslPresMin,MARKER20_precip,MARKER20_radSolarAvg,MARKER20_radSolarMax,MARKER20_radSolarTot,MARKER20_relHumAvg,MARKER20_relHumMax,MARKER20_relHumMin,MARKER20_sfcPresAvg,MARKER20_sfcPresMax,MARKER20_sfcPresMin,MARKER20_snowfall,MARKER20_spcHumAvg,MARKER20_spcHumMax,MARKER20_spcHumMin,MARKER20_tempAvg,MARKER20_tempMax,MARKER20_tempMin,MARKER20_wetBulbAvg,MARKER20_wetBulbMax,MARKER20_wetBulbMin,MARKER20_windChillAvg,MARKER20_windChillMax,MARKER20_windChillMin,MARKER20_windDir100mAvg,MARKER20_windDir80mAvg,MARKER20_windDirAvg,MARKER20_windSpd100mAvg,MARKER20_windSpd100mMax,MARKER20_windSpd100mMin,MARKER20_windSpd80mAvg,MARKER20_windSpd80mMax,MARKER20_windSpd80mMin,MARKER20_windSpdAvg,MARKER20_windSpdMax,MARKER20_windSpdMin
0,2020-10-29,99,100,66,39,42,36,38,47,34,44,50,42,1010,1016,1007,1,11,50,270,84,90,66,976,982,974,0,5,6,5,44,52,42,42,45,40,38,47,34,23,23,22,19,24,12,19,23,12,12,16,5,99,100,74,39,42,36,39,49,36,45,51,43,1010,1016,1007,1,7,36,172,80,86,63,978,983,975,0,5,6,5,45,54,43,42,46,40,39,49,36,23,23,22,19,24,12,18,23,11,11,16,5,100,100,89,39,41,36,40,50,36,45,52,44,1010,1016,1007,1,9,60,220,79,85,62,979,984,976,0,5,6,5,45,55,43,42,46,40,40,50,36,23,23,22,19,24,12,18,23,11,11,15,6,98,100,53,39,41,36,40,49,36,45,52,43,1010,1015,1007,1,9,56,218,80,86,64,980,985,977,0,5,6,5,45,55,43,42,46,40,40,49,36,22,22,21,19,25,12,18,24,12,11,15,6,97,100,36,40,42,37,39,48,36,45,51,42,1010,1015,1007,1,7,40,160,83,88,67,979,985,977,0,5,6,5,45,54,42,42,46,40,39,48,36,22,21,19,20,26,12,19,25,12,12,17,...,97,100,60,41,44,37,39,49,36,45,52,43,1009,1015,1006,1,9,52,216,85,90,71,981,986,977,0,6,6,5,45,54,43,43,47,40,39,49,36,21,20,20,20,28,13,20,27,12,13,20,8,96,100,37,41,45,37,38,48,35,45,51,42,1009,1015,1006,1,9,50,220,88,92,72,980,986,976,0,6,6,5,45,54,42,43,47,40,38,48,35,20,20,18,21,28,13,20,27,12,14,20,8,98,100,37,41,45,37,39,48,35,45,51,42,1009,1016,1006,1,6,35,146,87,91,70,982,987,978,0,6,6,5,45,53,42,43,47,40,39,48,35,22,22,20,21,28,14,20,27,13,13,19,7,96,101,4,42,45,37,38,48,35,45,51,41,1009,1016,1006,1,6,30,140,88,92,73,981,988,978,0,6,6,5,45,52,41,43,47,39,38,48,35,22,22,20,21,28,14,20,27,14,13,20,8,97,100,29,42,45,38,39,48,35,45,51,42,1009,1015,1006,1,9,60,208,87,92,74,982,988,978,0,6,6,5,45,53,42,44,47,40,39,48,35,22,22,21,21,29,14,20,28,14,14,20,8


In [6]:
final_df2.head()

Unnamed: 0,Date,MARKER1_cldCvrAvg,MARKER1_cldCvrMax,MARKER1_cldCvrMin,MARKER1_dewPtAvg,MARKER1_dewPtMax,MARKER1_dewPtMin,MARKER1_feelsLikeAvg,MARKER1_feelsLikeMax,MARKER1_feelsLikeMin,MARKER1_heatIndexAvg,MARKER1_heatIndexMax,MARKER1_heatIndexMin,MARKER1_mslPresAvg,MARKER1_mslPresMax,MARKER1_mslPresMin,MARKER1_precip,MARKER1_radSolarAvg,MARKER1_radSolarMax,MARKER1_radSolarTot,MARKER1_relHumAvg,MARKER1_relHumMax,MARKER1_relHumMin,MARKER1_sfcPresAvg,MARKER1_sfcPresMax,MARKER1_sfcPresMin,MARKER1_snowfall,MARKER1_spcHumAvg,MARKER1_spcHumMax,MARKER1_spcHumMin,MARKER1_tempAvg,MARKER1_tempMax,MARKER1_tempMin,MARKER1_wetBulbAvg,MARKER1_wetBulbMax,MARKER1_wetBulbMin,MARKER1_windChillAvg,MARKER1_windChillMax,MARKER1_windChillMin,MARKER1_windDir100mAvg,MARKER1_windDir80mAvg,MARKER1_windDirAvg,MARKER1_windSpd100mAvg,MARKER1_windSpd100mMax,MARKER1_windSpd100mMin,MARKER1_windSpd80mAvg,MARKER1_windSpd80mMax,MARKER1_windSpd80mMin,MARKER1_windSpdAvg,MARKER1_windSpdMax,MARKER1_windSpdMin,MARKER2_cldCvrAvg,MARKER2_cldCvrMax,MARKER2_cldCvrMin,MARKER2_dewPtAvg,MARKER2_dewPtMax,MARKER2_dewPtMin,MARKER2_feelsLikeAvg,MARKER2_feelsLikeMax,MARKER2_feelsLikeMin,MARKER2_heatIndexAvg,MARKER2_heatIndexMax,MARKER2_heatIndexMin,MARKER2_mslPresAvg,MARKER2_mslPresMax,MARKER2_mslPresMin,MARKER2_precip,MARKER2_radSolarAvg,MARKER2_radSolarMax,MARKER2_radSolarTot,MARKER2_relHumAvg,MARKER2_relHumMax,MARKER2_relHumMin,MARKER2_sfcPresAvg,MARKER2_sfcPresMax,MARKER2_sfcPresMin,MARKER2_snowfall,MARKER2_spcHumAvg,MARKER2_spcHumMax,MARKER2_spcHumMin,MARKER2_tempAvg,MARKER2_tempMax,MARKER2_tempMin,MARKER2_wetBulbAvg,MARKER2_wetBulbMax,MARKER2_wetBulbMin,MARKER2_windChillAvg,MARKER2_windChillMax,MARKER2_windChillMin,MARKER2_windDir100mAvg,MARKER2_windDir80mAvg,MARKER2_windDirAvg,MARKER2_windSpd100mAvg,MARKER2_windSpd100mMax,MARKER2_windSpd100mMin,MARKER2_windSpd80mAvg,MARKER2_windSpd80mMax,MARKER2_windSpd80mMin,MARKER2_windSpdAvg,MARKER2_windSpdMax,MARKER2_windSpdMin,MARKER3_cldCvrAvg,MARKER3_cldCvrMax,MARKER3_cldCvrMin,MARKER3_dewPtAvg,MARKER3_dewPtMax,MARKER3_dewPtMin,MARKER3_feelsLikeAvg,MARKER3_feelsLikeMax,MARKER3_feelsLikeMin,MARKER3_heatIndexAvg,MARKER3_heatIndexMax,MARKER3_heatIndexMin,MARKER3_mslPresAvg,MARKER3_mslPresMax,MARKER3_mslPresMin,MARKER3_precip,MARKER3_radSolarAvg,MARKER3_radSolarMax,MARKER3_radSolarTot,MARKER3_relHumAvg,MARKER3_relHumMax,MARKER3_relHumMin,MARKER3_sfcPresAvg,MARKER3_sfcPresMax,MARKER3_sfcPresMin,MARKER3_snowfall,MARKER3_spcHumAvg,MARKER3_spcHumMax,MARKER3_spcHumMin,MARKER3_tempAvg,MARKER3_tempMax,MARKER3_tempMin,MARKER3_wetBulbAvg,MARKER3_wetBulbMax,MARKER3_wetBulbMin,MARKER3_windChillAvg,MARKER3_windChillMax,MARKER3_windChillMin,MARKER3_windDir100mAvg,MARKER3_windDir80mAvg,MARKER3_windDirAvg,MARKER3_windSpd100mAvg,MARKER3_windSpd100mMax,MARKER3_windSpd100mMin,MARKER3_windSpd80mAvg,MARKER3_windSpd80mMax,MARKER3_windSpd80mMin,MARKER3_windSpdAvg,MARKER3_windSpdMax,MARKER3_windSpdMin,MARKER4_cldCvrAvg,MARKER4_cldCvrMax,MARKER4_cldCvrMin,MARKER4_dewPtAvg,MARKER4_dewPtMax,MARKER4_dewPtMin,MARKER4_feelsLikeAvg,MARKER4_feelsLikeMax,MARKER4_feelsLikeMin,MARKER4_heatIndexAvg,MARKER4_heatIndexMax,MARKER4_heatIndexMin,MARKER4_mslPresAvg,MARKER4_mslPresMax,MARKER4_mslPresMin,MARKER4_precip,MARKER4_radSolarAvg,MARKER4_radSolarMax,MARKER4_radSolarTot,MARKER4_relHumAvg,MARKER4_relHumMax,MARKER4_relHumMin,MARKER4_sfcPresAvg,MARKER4_sfcPresMax,MARKER4_sfcPresMin,MARKER4_snowfall,MARKER4_spcHumAvg,MARKER4_spcHumMax,MARKER4_spcHumMin,MARKER4_tempAvg,MARKER4_tempMax,MARKER4_tempMin,MARKER4_wetBulbAvg,MARKER4_wetBulbMax,MARKER4_wetBulbMin,MARKER4_windChillAvg,MARKER4_windChillMax,MARKER4_windChillMin,MARKER4_windDir100mAvg,MARKER4_windDir80mAvg,MARKER4_windDirAvg,MARKER4_windSpd100mAvg,MARKER4_windSpd100mMax,MARKER4_windSpd100mMin,MARKER4_windSpd80mAvg,MARKER4_windSpd80mMax,MARKER4_windSpd80mMin,MARKER4_windSpdAvg,MARKER4_windSpdMax,MARKER4_windSpdMin,MARKER5_cldCvrAvg,MARKER5_cldCvrMax,MARKER5_cldCvrMin,MARKER5_dewPtAvg,MARKER5_dewPtMax,MARKER5_dewPtMin,MARKER5_feelsLikeAvg,MARKER5_feelsLikeMax,MARKER5_feelsLikeMin,MARKER5_heatIndexAvg,MARKER5_heatIndexMax,MARKER5_heatIndexMin,MARKER5_mslPresAvg,MARKER5_mslPresMax,MARKER5_mslPresMin,MARKER5_precip,MARKER5_radSolarAvg,MARKER5_radSolarMax,MARKER5_radSolarTot,MARKER5_relHumAvg,MARKER5_relHumMax,MARKER5_relHumMin,MARKER5_sfcPresAvg,MARKER5_sfcPresMax,MARKER5_sfcPresMin,MARKER5_snowfall,MARKER5_spcHumAvg,MARKER5_spcHumMax,MARKER5_spcHumMin,MARKER5_tempAvg,MARKER5_tempMax,MARKER5_tempMin,MARKER5_wetBulbAvg,MARKER5_wetBulbMax,MARKER5_wetBulbMin,MARKER5_windChillAvg,MARKER5_windChillMax,MARKER5_windChillMin,MARKER5_windDir100mAvg,MARKER5_windDir80mAvg,MARKER5_windDirAvg,MARKER5_windSpd100mAvg,MARKER5_windSpd100mMax,MARKER5_windSpd100mMin,MARKER5_windSpd80mAvg,MARKER5_windSpd80mMax,MARKER5_windSpd80mMin,MARKER5_windSpdAvg,MARKER5_windSpdMax,...,MARKER16_cldCvrAvg,MARKER16_cldCvrMax,MARKER16_cldCvrMin,MARKER16_dewPtAvg,MARKER16_dewPtMax,MARKER16_dewPtMin,MARKER16_feelsLikeAvg,MARKER16_feelsLikeMax,MARKER16_feelsLikeMin,MARKER16_heatIndexAvg,MARKER16_heatIndexMax,MARKER16_heatIndexMin,MARKER16_mslPresAvg,MARKER16_mslPresMax,MARKER16_mslPresMin,MARKER16_precip,MARKER16_radSolarAvg,MARKER16_radSolarMax,MARKER16_radSolarTot,MARKER16_relHumAvg,MARKER16_relHumMax,MARKER16_relHumMin,MARKER16_sfcPresAvg,MARKER16_sfcPresMax,MARKER16_sfcPresMin,MARKER16_snowfall,MARKER16_spcHumAvg,MARKER16_spcHumMax,MARKER16_spcHumMin,MARKER16_tempAvg,MARKER16_tempMax,MARKER16_tempMin,MARKER16_wetBulbAvg,MARKER16_wetBulbMax,MARKER16_wetBulbMin,MARKER16_windChillAvg,MARKER16_windChillMax,MARKER16_windChillMin,MARKER16_windDir100mAvg,MARKER16_windDir80mAvg,MARKER16_windDirAvg,MARKER16_windSpd100mAvg,MARKER16_windSpd100mMax,MARKER16_windSpd100mMin,MARKER16_windSpd80mAvg,MARKER16_windSpd80mMax,MARKER16_windSpd80mMin,MARKER16_windSpdAvg,MARKER16_windSpdMax,MARKER16_windSpdMin,MARKER17_cldCvrAvg,MARKER17_cldCvrMax,MARKER17_cldCvrMin,MARKER17_dewPtAvg,MARKER17_dewPtMax,MARKER17_dewPtMin,MARKER17_feelsLikeAvg,MARKER17_feelsLikeMax,MARKER17_feelsLikeMin,MARKER17_heatIndexAvg,MARKER17_heatIndexMax,MARKER17_heatIndexMin,MARKER17_mslPresAvg,MARKER17_mslPresMax,MARKER17_mslPresMin,MARKER17_precip,MARKER17_radSolarAvg,MARKER17_radSolarMax,MARKER17_radSolarTot,MARKER17_relHumAvg,MARKER17_relHumMax,MARKER17_relHumMin,MARKER17_sfcPresAvg,MARKER17_sfcPresMax,MARKER17_sfcPresMin,MARKER17_snowfall,MARKER17_spcHumAvg,MARKER17_spcHumMax,MARKER17_spcHumMin,MARKER17_tempAvg,MARKER17_tempMax,MARKER17_tempMin,MARKER17_wetBulbAvg,MARKER17_wetBulbMax,MARKER17_wetBulbMin,MARKER17_windChillAvg,MARKER17_windChillMax,MARKER17_windChillMin,MARKER17_windDir100mAvg,MARKER17_windDir80mAvg,MARKER17_windDirAvg,MARKER17_windSpd100mAvg,MARKER17_windSpd100mMax,MARKER17_windSpd100mMin,MARKER17_windSpd80mAvg,MARKER17_windSpd80mMax,MARKER17_windSpd80mMin,MARKER17_windSpdAvg,MARKER17_windSpdMax,MARKER17_windSpdMin,MARKER18_cldCvrAvg,MARKER18_cldCvrMax,MARKER18_cldCvrMin,MARKER18_dewPtAvg,MARKER18_dewPtMax,MARKER18_dewPtMin,MARKER18_feelsLikeAvg,MARKER18_feelsLikeMax,MARKER18_feelsLikeMin,MARKER18_heatIndexAvg,MARKER18_heatIndexMax,MARKER18_heatIndexMin,MARKER18_mslPresAvg,MARKER18_mslPresMax,MARKER18_mslPresMin,MARKER18_precip,MARKER18_radSolarAvg,MARKER18_radSolarMax,MARKER18_radSolarTot,MARKER18_relHumAvg,MARKER18_relHumMax,MARKER18_relHumMin,MARKER18_sfcPresAvg,MARKER18_sfcPresMax,MARKER18_sfcPresMin,MARKER18_snowfall,MARKER18_spcHumAvg,MARKER18_spcHumMax,MARKER18_spcHumMin,MARKER18_tempAvg,MARKER18_tempMax,MARKER18_tempMin,MARKER18_wetBulbAvg,MARKER18_wetBulbMax,MARKER18_wetBulbMin,MARKER18_windChillAvg,MARKER18_windChillMax,MARKER18_windChillMin,MARKER18_windDir100mAvg,MARKER18_windDir80mAvg,MARKER18_windDirAvg,MARKER18_windSpd100mAvg,MARKER18_windSpd100mMax,MARKER18_windSpd100mMin,MARKER18_windSpd80mAvg,MARKER18_windSpd80mMax,MARKER18_windSpd80mMin,MARKER18_windSpdAvg,MARKER18_windSpdMax,MARKER18_windSpdMin,MARKER19_cldCvrAvg,MARKER19_cldCvrMax,MARKER19_cldCvrMin,MARKER19_dewPtAvg,MARKER19_dewPtMax,MARKER19_dewPtMin,MARKER19_feelsLikeAvg,MARKER19_feelsLikeMax,MARKER19_feelsLikeMin,MARKER19_heatIndexAvg,MARKER19_heatIndexMax,MARKER19_heatIndexMin,MARKER19_mslPresAvg,MARKER19_mslPresMax,MARKER19_mslPresMin,MARKER19_precip,MARKER19_radSolarAvg,MARKER19_radSolarMax,MARKER19_radSolarTot,MARKER19_relHumAvg,MARKER19_relHumMax,MARKER19_relHumMin,MARKER19_sfcPresAvg,MARKER19_sfcPresMax,MARKER19_sfcPresMin,MARKER19_snowfall,MARKER19_spcHumAvg,MARKER19_spcHumMax,MARKER19_spcHumMin,MARKER19_tempAvg,MARKER19_tempMax,MARKER19_tempMin,MARKER19_wetBulbAvg,MARKER19_wetBulbMax,MARKER19_wetBulbMin,MARKER19_windChillAvg,MARKER19_windChillMax,MARKER19_windChillMin,MARKER19_windDir100mAvg,MARKER19_windDir80mAvg,MARKER19_windDirAvg,MARKER19_windSpd100mAvg,MARKER19_windSpd100mMax,MARKER19_windSpd100mMin,MARKER19_windSpd80mAvg,MARKER19_windSpd80mMax,MARKER19_windSpd80mMin,MARKER19_windSpdAvg,MARKER19_windSpdMax,MARKER19_windSpdMin,MARKER20_cldCvrAvg,MARKER20_cldCvrMax,MARKER20_cldCvrMin,MARKER20_dewPtAvg,MARKER20_dewPtMax,MARKER20_dewPtMin,MARKER20_feelsLikeAvg,MARKER20_feelsLikeMax,MARKER20_feelsLikeMin,MARKER20_heatIndexAvg,MARKER20_heatIndexMax,MARKER20_heatIndexMin,MARKER20_mslPresAvg,MARKER20_mslPresMax,MARKER20_mslPresMin,MARKER20_precip,MARKER20_radSolarAvg,MARKER20_radSolarMax,MARKER20_radSolarTot,MARKER20_relHumAvg,MARKER20_relHumMax,MARKER20_relHumMin,MARKER20_sfcPresAvg,MARKER20_sfcPresMax,MARKER20_sfcPresMin,MARKER20_snowfall,MARKER20_spcHumAvg,MARKER20_spcHumMax,MARKER20_spcHumMin,MARKER20_tempAvg,MARKER20_tempMax,MARKER20_tempMin,MARKER20_wetBulbAvg,MARKER20_wetBulbMax,MARKER20_wetBulbMin,MARKER20_windChillAvg,MARKER20_windChillMax,MARKER20_windChillMin,MARKER20_windDir100mAvg,MARKER20_windDir80mAvg,MARKER20_windDirAvg,MARKER20_windSpd100mAvg,MARKER20_windSpd100mMax,MARKER20_windSpd100mMin,MARKER20_windSpd80mAvg,MARKER20_windSpd80mMax,MARKER20_windSpd80mMin,MARKER20_windSpdAvg,MARKER20_windSpdMax,MARKER20_windSpdMin
0,2020-10-30,12,87,0,32,36,30,38,46,30,42,48,36,1024,1027,1016,0,155,610,3719,69,90,50,989,992,982,0,4,5,4,42,48,35,38,41,35,38,46,30,14,14,11,10,18,4,10,17,4,6,10,3,9,74,0,31,36,29,40,47,32,43,49,38,1023,1027,1016,0,155,600,3715,64,83,46,991,994,984,0,4,4,3,43,49,37,38,41,36,40,47,32,13,12,10,10,18,4,10,17,4,6,10,3,6,50,0,31,36,28,40,48,33,44,49,39,1023,1027,1016,0,155,600,3714,61,80,44,992,996,985,0,4,4,3,44,49,38,39,41,36,40,48,33,11,11,10,10,18,5,10,17,5,6,10,3,11,77,0,31,36,29,40,47,33,43,49,39,1023,1027,1016,0,155,600,3710,63,82,46,993,996,985,0,4,4,3,43,49,39,38,41,36,40,47,33,9,8,7,10,19,5,10,18,5,6,10,3,16,104,0,32,36,30,38,46,32,42,48,38,1023,1028,1016,0,155,600,3710,67,86,49,992,996,985,0,4,5,4,42,48,38,38,41,36,38,46,32,6,6,3,11,19,5,10,18,5,7,10,...,7,43,0,32,36,30,38,47,32,42,49,38,1023,1028,1016,0,158,620,3791,68,86,47,994,998,986,0,4,5,4,42,49,38,38,42,35,38,47,32,11,11,13,11,20,6,10,19,6,7,10,4,11,86,0,32,37,30,37,46,31,42,48,37,1023,1028,1015,0,156,605,3738,71,90,50,993,997,986,0,4,5,4,42,48,37,38,41,35,37,46,31,9,9,10,11,21,6,11,20,6,7,11,4,8,101,0,33,37,30,38,47,30,41,49,36,1024,1027,1016,0,147,577,3529,73,93,50,995,998,987,0,4,5,4,41,49,36,38,42,35,38,47,30,10,9,1,10,20,4,10,18,4,6,10,2,13,100,0,33,37,30,37,48,31,41,49,36,1024,1027,1016,0,139,570,3340,74,94,50,995,998,988,0,4,5,4,41,49,35,38,42,35,37,48,31,9,8,6,10,21,4,10,20,4,6,11,2,3,34,0,33,37,30,38,48,31,42,49,36,1023,1027,1016,0,156,612,3754,71,92,48,995,999,988,0,4,5,4,42,49,36,38,42,35,38,48,31,10,9,2,10,19,4,10,18,4,6,10,2


In [None]:
# PCA on Storm Data
storm_data = storm_data.loc[:, ~storm_data.columns.str.contains('^Unnamed')]
storm_data = storm_data.loc[:, ~storm_data.columns.str.contains('^_c0')]
storm_data_dims = storm_data.copy(deep=True)
storm_data_dims.drop(['MAJ_OTG_ID', 'Date','OUTAGES','CUST_QTY','STORM_DURATION', 'OUTAGED_RECOVERY','MARKER1_radSolarMin','MARKER2_radSolarMin','MARKER3_radSolarMin','MARKER4_radSolarMin','MARKER5_radSolarMin','MARKER6_radSolarMin','MARKER7_radSolarMin','MARKER8_radSolarMin','MARKER9_radSolarMin','MARKER10_radSolarMin','MARKER11_radSolarMin','MARKER12_radSolarMin','MARKER13_radSolarMin','MARKER14_radSolarMin','MARKER15_radSolarMin','MARKER16_radSolarMin','MARKER17_radSolarMin','MARKER18_radSolarMin','MARKER19_radSolarMin','MARKER20_radSolarMin'], axis=1, inplace=True)

# Calculating mean and standard deviation 
meandata = storm_data_dims.mean(axis = 0, skipna = True)
stddata = storm_data_dims.std(axis = 0, skipna = True)

# Storing required features
features = list(storm_data_dims.columns)

# Setting standar scaler 
scaler = StandardScaler()

# Fitting on dataset
logging.info(storm_data_dims.shape)
# logging.info(list(storm_data_dims.columns))
scaler.fit(storm_data_dims)

# Transforming dataframe
scaled_data = scaler.transform(storm_data_dims)

# Data after scaling
storm_data_dims_scaled = pd.DataFrame(scaled_data,columns=features)
round(storm_data_dims_scaled.describe(),2)

# PCA
covar_matrix = PCA()

# Fitting Scaled data into covariance matrix
covar_matrix.fit(scaled_data)

# Crosscheck values whether variance is 88.9 or not
variance = covar_matrix.explained_variance_ratio_ 
var = np.cumsum(np.round(covar_matrix.explained_variance_ratio_, decimals=3)*100)

def pcafunc(dff):
    ## Scaling the new data
    for x in range(1,1000):
        colname = dff.columns[x]
        a = meandata.loc[colname]
        b = stddata.loc[colname]
        #storm_data_dims.iloc[]
        d = dff.at[0,colname]
        e = (d-a)/b
        dff[colname] = dff[colname].replace([d],e)

    ## storing date and dropping date column
    timestamp = dff.at[0,'Date']
    #print(timestamp)
    dff = dff.drop(['Date'], axis = 1)
    logging.info(dff.shape)
    
    ## PCA on new data
    newdata_transformed = covar_matrix.transform(dff)
    # Transforming to a dataframe
    newdata_transformed = pd.DataFrame(newdata_transformed)
    newdata_transformed = newdata_transformed.iloc[:, 0:7]
    newdata_transformed.columns = ['PC1','PC2','PC3','PC4','PC5','PC6','PC7']
    newdata_transformed.head()

    ## Adding date column in data
    newdata_transform = newdata_transformed
    newdata_transform['Date']= timestamp
    return newdata_transform

logging.info("PCA FUNCTION IS CREATED")

pca1 = pcafunc(final_df1)
pca2 = pcafunc(final_df2)

OP_Path1 = 'gs://aes-analytics-0001-curated/Outage_Restoration/OMS/Deliverables/Outage_Duration/' + year_month + '/' + today + '/' + 'PCA1.csv'
OP_Path2 = 'gs://aes-analytics-0001-curated/Outage_Restoration/OMS/Deliverables/Outage_Duration/' + year_month + '/' + today + '/' + 'PCA2.csv'

# pca1.to_csv(OP_Path1, index = False)
# pca2.to_csv(OP_Path2, index = False)
logging.info("Task Completed")

INFO:root:(249, 1000)
