## Cleaning the steel data and combining with weather data to produce augmented dataset

### Imports and read data from csv

In [1]:
import pandas as pd
import numpy as np

In [2]:
raw_steel = pd.read_csv("SteelData.csv")
display(raw_steel)

Unnamed: 0,date,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Load_Type
0,01/01/2018 00:15,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Monday,Light_Load
1,01/01/2018 00:30,4.00,4.46,0.00,0.0,66.77,100.00,1800,Weekday,Monday,Light_Load
2,01/01/2018 00:45,3.24,3.28,0.00,0.0,70.28,100.00,2700,Weekday,Monday,Light_Load
3,01/01/2018 01:00,3.31,3.56,0.00,0.0,68.09,100.00,3600,Weekday,Monday,Light_Load
4,01/01/2018 01:15,3.82,4.50,0.00,0.0,64.72,100.00,4500,Weekday,Monday,Light_Load
...,...,...,...,...,...,...,...,...,...,...,...
35035,31/12/2018 23:00,3.85,4.86,0.00,0.0,62.10,100.00,82800,Weekday,Monday,Light_Load
35036,31/12/2018 23:15,3.74,3.74,0.00,0.0,70.71,100.00,83700,Weekday,Monday,Light_Load
35037,31/12/2018 23:30,3.78,3.17,0.07,0.0,76.62,99.98,84600,Weekday,Monday,Light_Load
35038,31/12/2018 23:45,3.78,3.06,0.11,0.0,77.72,99.96,85500,Weekday,Monday,Light_Load


In [3]:
raw_steel['Load_Type'].value_counts()

Light_Load      18072
Medium_Load      9696
Maximum_Load     7272
Name: Load_Type, dtype: int64

### Creating derieved fields

In [4]:
def GetYearFromDate(dtVal):
    slash_split = dtVal.split("/")
    result = slash_split[2].split(" ")
    return int(result[0])

def GetMonthFromDate(dtVal):
    slash_split = dtVal.split("/")
    return int(slash_split[1])

def GetDayFromDate(dtVal):
    slash_split = dtVal.split("/")
    return int(slash_split[0])

def GetHoursFromDate(dtVal):
    slash_split = dtVal.split("/")
    result = slash_split[2].split(" ")
    return int(result[1].split(":")[0])

def GetMinutesFromDate(dtVal):
    slash_split = dtVal.split("/")
    result = slash_split[2].split(" ")
    return int(result[1].split(":")[1])

def GetSeason(month):
    if(month == 4 or month == 5 or month == 6):
        return "Spring"
    elif (month == 7 or month == 8):
        return "Summer"
    elif (month == 9 or month == 10 or month == 11):
        return "Autumn"
    else:
        return "Winter"

In [5]:
string = "01/12/2018 00:15"
print(GetYearFromDate(string))
print(GetMonthFromDate(string))
print(GetDayFromDate(string))
print(GetHoursFromDate(string))
print(GetMinutesFromDate(string))
print(GetSeason(4))

2018
12
1
0
15
Spring


In [6]:
derieved_steel = raw_steel.copy()
display(derieved_steel)

Unnamed: 0,date,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Load_Type
0,01/01/2018 00:15,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Monday,Light_Load
1,01/01/2018 00:30,4.00,4.46,0.00,0.0,66.77,100.00,1800,Weekday,Monday,Light_Load
2,01/01/2018 00:45,3.24,3.28,0.00,0.0,70.28,100.00,2700,Weekday,Monday,Light_Load
3,01/01/2018 01:00,3.31,3.56,0.00,0.0,68.09,100.00,3600,Weekday,Monday,Light_Load
4,01/01/2018 01:15,3.82,4.50,0.00,0.0,64.72,100.00,4500,Weekday,Monday,Light_Load
...,...,...,...,...,...,...,...,...,...,...,...
35035,31/12/2018 23:00,3.85,4.86,0.00,0.0,62.10,100.00,82800,Weekday,Monday,Light_Load
35036,31/12/2018 23:15,3.74,3.74,0.00,0.0,70.71,100.00,83700,Weekday,Monday,Light_Load
35037,31/12/2018 23:30,3.78,3.17,0.07,0.0,76.62,99.98,84600,Weekday,Monday,Light_Load
35038,31/12/2018 23:45,3.78,3.06,0.11,0.0,77.72,99.96,85500,Weekday,Monday,Light_Load


In [7]:
derieved_steel["Year"] = derieved_steel["date"].apply(GetYearFromDate)
derieved_steel["Month"] = derieved_steel["date"].apply(GetMonthFromDate)
derieved_steel["Day"] = derieved_steel["date"].apply(GetDayFromDate)

derieved_steel["Hours"] = derieved_steel["date"].apply(GetHoursFromDate)
derieved_steel["Minutes"] = derieved_steel["date"].apply(GetMinutesFromDate)

derieved_steel["Season"] = derieved_steel["Month"].apply(GetSeason)

display(derieved_steel)

Unnamed: 0,date,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Load_Type,Year,Month,Day,Hours,Minutes,Season
0,01/01/2018 00:15,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Monday,Light_Load,2018,1,1,0,15,Winter
1,01/01/2018 00:30,4.00,4.46,0.00,0.0,66.77,100.00,1800,Weekday,Monday,Light_Load,2018,1,1,0,30,Winter
2,01/01/2018 00:45,3.24,3.28,0.00,0.0,70.28,100.00,2700,Weekday,Monday,Light_Load,2018,1,1,0,45,Winter
3,01/01/2018 01:00,3.31,3.56,0.00,0.0,68.09,100.00,3600,Weekday,Monday,Light_Load,2018,1,1,1,0,Winter
4,01/01/2018 01:15,3.82,4.50,0.00,0.0,64.72,100.00,4500,Weekday,Monday,Light_Load,2018,1,1,1,15,Winter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35035,31/12/2018 23:00,3.85,4.86,0.00,0.0,62.10,100.00,82800,Weekday,Monday,Light_Load,2018,12,31,23,0,Winter
35036,31/12/2018 23:15,3.74,3.74,0.00,0.0,70.71,100.00,83700,Weekday,Monday,Light_Load,2018,12,31,23,15,Winter
35037,31/12/2018 23:30,3.78,3.17,0.07,0.0,76.62,99.98,84600,Weekday,Monday,Light_Load,2018,12,31,23,30,Winter
35038,31/12/2018 23:45,3.78,3.06,0.11,0.0,77.72,99.96,85500,Weekday,Monday,Light_Load,2018,12,31,23,45,Winter


In [8]:
holiday_data = pd.read_csv("HolidayData.csv")
display(holiday_data)

Unnamed: 0,Year,Month,Day,HolidayName
0,2018,1,1,New Year's Day
1,2018,2,15,Seotdal Geumeum
2,2018,2,16,Korean New Year
3,2018,2,17,Korean New Year
4,2018,3,1,Independence Movement Day
5,2018,5,5,Children's Day
6,2018,5,22,Buddha's birthday
7,2018,6,6,Memorial Day
8,2018,8,15,National Liberation Day of Korea
9,2018,9,23,Chuseok


In [9]:
month = 1
day = 1
query = "Month=="+str(month)+" & Day=="+str(day)
print(query)
print(type(holiday_data.query(query)))
print(holiday_data.query(query).empty)

print(int("01"))

Month==1 & Day==1
<class 'pandas.core.frame.DataFrame'>
False
1


In [10]:
def SetHolidayOrNot(weekstatus, month, day):
    query = "Month=="+str(int(month))+" & Day=="+str(int(day))
    if weekstatus == "Weekend":
        return 1
    elif holiday_data.query(query).empty == False:
        return 1
    else:
        return 0

In [11]:
derieved_steel['isholiday'] = derieved_steel.apply(lambda x: SetHolidayOrNot(x.WeekStatus, x.Month, x.Day), axis=1)
display(derieved_steel)

Unnamed: 0,date,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Load_Type,Year,Month,Day,Hours,Minutes,Season,isholiday
0,01/01/2018 00:15,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Monday,Light_Load,2018,1,1,0,15,Winter,1
1,01/01/2018 00:30,4.00,4.46,0.00,0.0,66.77,100.00,1800,Weekday,Monday,Light_Load,2018,1,1,0,30,Winter,1
2,01/01/2018 00:45,3.24,3.28,0.00,0.0,70.28,100.00,2700,Weekday,Monday,Light_Load,2018,1,1,0,45,Winter,1
3,01/01/2018 01:00,3.31,3.56,0.00,0.0,68.09,100.00,3600,Weekday,Monday,Light_Load,2018,1,1,1,0,Winter,1
4,01/01/2018 01:15,3.82,4.50,0.00,0.0,64.72,100.00,4500,Weekday,Monday,Light_Load,2018,1,1,1,15,Winter,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35035,31/12/2018 23:00,3.85,4.86,0.00,0.0,62.10,100.00,82800,Weekday,Monday,Light_Load,2018,12,31,23,0,Winter,0
35036,31/12/2018 23:15,3.74,3.74,0.00,0.0,70.71,100.00,83700,Weekday,Monday,Light_Load,2018,12,31,23,15,Winter,0
35037,31/12/2018 23:30,3.78,3.17,0.07,0.0,76.62,99.98,84600,Weekday,Monday,Light_Load,2018,12,31,23,30,Winter,0
35038,31/12/2018 23:45,3.78,3.06,0.11,0.0,77.72,99.96,85500,Weekday,Monday,Light_Load,2018,12,31,23,45,Winter,0


In [12]:
weather_cleaned = pd.read_csv("CleanedWeather.csv")
display(weather_cleaned)

Unnamed: 0,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precip,precipprob,...,visibility,solarradiation,solarenergy,uvindex,conditions,Year,Month,Day,SunriseHour,SunsetHour
0,44.0,28.9,37.8,38.1,24.7,31.5,24.9,60.3,0.000,0,...,9.0,139.9,12.0,6,Clear,2018,1,1,7,17
1,46.7,22.8,38.6,41.1,22.8,33.8,28.3,67.0,0.000,0,...,4.8,125.2,10.8,5,Clear,2018,1,2,7,17
2,41.8,28.4,35.3,33.6,20.0,26.5,22.7,61.3,0.000,0,...,9.6,142.9,12.4,6,Partially cloudy,2018,1,3,7,17
3,39.5,26.6,34.4,39.5,18.8,28.6,21.0,58.6,0.000,0,...,9.9,71.3,6.1,3,Overcast,2018,1,4,7,17
4,42.9,30.4,37.0,39.5,23.3,30.3,25.4,63.4,0.012,100,...,9.1,135.4,11.8,6,"Rain, Partially cloudy",2018,1,5,7,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,36.0,22.3,30.8,27.0,11.9,18.8,17.0,59.9,0.000,0,...,12.3,143.0,12.4,6,Partially cloudy,2018,12,27,7,17
361,33.5,17.8,27.8,21.0,8.1,16.1,20.1,73.6,0.000,0,...,11.9,129.8,11.2,5,Clear,2018,12,28,7,17
362,35.3,19.6,30.2,25.7,9.1,19.6,21.4,70.9,0.000,0,...,11.5,136.3,11.6,5,Clear,2018,12,29,7,17
363,37.5,22.7,32.0,31.4,14.5,24.0,20.7,64.6,0.000,0,...,11.3,142.4,12.3,6,Partially cloudy,2018,12,30,7,17


### Sampling the steel data

In [13]:
sampled_steel = derieved_steel.iloc[::5, :]
#sampled_steel = derieved_steel.groupby(['Month', 'Day'], group_keys=False).apply(lambda x: x.sample(frac=0.2, random_state=20))
display(sampled_steel)

Unnamed: 0,date,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Load_Type,Year,Month,Day,Hours,Minutes,Season,isholiday
0,01/01/2018 00:15,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Monday,Light_Load,2018,1,1,0,15,Winter,1
5,01/01/2018 01:30,3.28,3.56,0.00,0.0,67.76,100.00,5400,Weekday,Monday,Light_Load,2018,1,1,1,30,Winter,1
10,01/01/2018 02:45,3.46,4.03,0.00,0.0,65.14,100.00,9900,Weekday,Monday,Light_Load,2018,1,1,2,45,Winter,1
15,01/01/2018 04:00,3.89,5.00,0.00,0.0,61.40,100.00,14400,Weekday,Monday,Light_Load,2018,1,1,4,0,Winter,1
20,01/01/2018 05:15,3.56,4.07,0.00,0.0,65.84,100.00,18900,Weekday,Monday,Light_Load,2018,1,1,5,15,Winter,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35015,31/12/2018 18:00,3.42,0.00,9.79,0.0,100.00,32.98,64800,Weekday,Monday,Light_Load,2018,12,31,18,0,Winter,0
35020,31/12/2018 19:15,3.96,0.00,18.29,0.0,100.00,21.16,69300,Weekday,Monday,Light_Load,2018,12,31,19,15,Winter,0
35025,31/12/2018 20:30,3.38,0.00,13.43,0.0,100.00,24.41,73800,Weekday,Monday,Light_Load,2018,12,31,20,30,Winter,0
35030,31/12/2018 21:45,3.42,0.00,13.36,0.0,100.00,24.80,78300,Weekday,Monday,Light_Load,2018,12,31,21,45,Winter,0


In [14]:
steel_selected = sampled_steel.loc[:, ['Usage_kWh', 'Lagging_Current_Reactive.Power_kVarh', 
                                       'Leading_Current_Reactive_Power_kVarh', 'CO2(tCO2)', 'Lagging_Current_Power_Factor',
                                      'Leading_Current_Power_Factor', 'NSM', 'WeekStatus',
                                      'Load_Type', 'Year', 'Month', 'Day', 'Hours', 'Minutes', 'isholiday', 'Season']]
display(steel_selected)

Unnamed: 0,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Load_Type,Year,Month,Day,Hours,Minutes,isholiday,Season
0,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Light_Load,2018,1,1,0,15,1,Winter
5,3.28,3.56,0.00,0.0,67.76,100.00,5400,Weekday,Light_Load,2018,1,1,1,30,1,Winter
10,3.46,4.03,0.00,0.0,65.14,100.00,9900,Weekday,Light_Load,2018,1,1,2,45,1,Winter
15,3.89,5.00,0.00,0.0,61.40,100.00,14400,Weekday,Light_Load,2018,1,1,4,0,1,Winter
20,3.56,4.07,0.00,0.0,65.84,100.00,18900,Weekday,Light_Load,2018,1,1,5,15,1,Winter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35015,3.42,0.00,9.79,0.0,100.00,32.98,64800,Weekday,Light_Load,2018,12,31,18,0,0,Winter
35020,3.96,0.00,18.29,0.0,100.00,21.16,69300,Weekday,Light_Load,2018,12,31,19,15,0,Winter
35025,3.38,0.00,13.43,0.0,100.00,24.41,73800,Weekday,Light_Load,2018,12,31,20,30,0,Winter
35030,3.42,0.00,13.36,0.0,100.00,24.80,78300,Weekday,Light_Load,2018,12,31,21,45,0,Winter


### Merging steel and weather data

In [15]:
merged_data = pd.merge(steel_selected, weather_cleaned, how='left', 
                       left_on=['Year','Month','Day'], right_on=['Year','Month','Day'])
display(merged_data)

Unnamed: 0,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Load_Type,Year,...,winddir,sealevelpressure,cloudcover,visibility,solarradiation,solarenergy,uvindex,conditions,SunriseHour,SunsetHour
0,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
1,3.28,3.56,0.00,0.0,67.76,100.00,5400,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
2,3.46,4.03,0.00,0.0,65.14,100.00,9900,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
3,3.89,5.00,0.00,0.0,61.40,100.00,14400,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
4,3.56,4.07,0.00,0.0,65.84,100.00,18900,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7003,3.42,0.00,9.79,0.0,100.00,32.98,64800,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17
7004,3.96,0.00,18.29,0.0,100.00,21.16,69300,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17
7005,3.38,0.00,13.43,0.0,100.00,24.41,73800,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17
7006,3.42,0.00,13.36,0.0,100.00,24.80,78300,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17


In [16]:
print(merged_data.columns)

Index(['Usage_kWh', 'Lagging_Current_Reactive.Power_kVarh',
       'Leading_Current_Reactive_Power_kVarh', 'CO2(tCO2)',
       'Lagging_Current_Power_Factor', 'Leading_Current_Power_Factor', 'NSM',
       'WeekStatus', 'Load_Type', 'Year', 'Month', 'Day', 'Hours', 'Minutes',
       'isholiday', 'Season', 'tempmax', 'tempmin', 'temp', 'feelslikemax',
       'feelslikemin', 'feelslike', 'dew', 'humidity', 'precip', 'precipprob',
       'precipcover', 'preciptype', 'snowdepth', 'windgust', 'windspeed',
       'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'solarenergy', 'uvindex', 'conditions', 'SunriseHour',
       'SunsetHour'],
      dtype='object')


### Refactor column names

In [17]:
merged_data.rename(columns={'Usage_kWh': 'Usage', 
                            'Lagging_Current_Reactive.Power_kVarh': 'LaggingCurrentReactivePower',
                           'Leading_Current_Reactive_Power_kVarh':'LeadingCurrentReactivePower',
                           'CO2(tCO2)':'CO2', 'Lagging_Current_Power_Factor': 'LaggingCurrentPowerFactor',
                           'Leading_Current_Power_Factor':'LeadingCurrentPowerFactor',
                           'Load_Type': 'LoadType', 'isholiday':'IsHoliday','Season': 'Season',
                           'tempmax': 'TempMax', 'tempmin': 'TempMin', 'temp': 'Temp', 'feelslikemax': 'FeelsLikeMax',
                           'feelslikemin':'FeelsLikeMin', 'feelslike':'FeelsLike', 'dew':'Dew', 'humidity':'Humidity',
                           'precip':'Precip', 'precipprob':'PrecipProb', 'precipcover':'PrecipCover',
                           'preciptype':'PrecipType', 'snowdepth':'SnowDepth', 'windgust':'WindGust',
                           'windspeed':'WindSpeed', 'winddir':'WindDir', 'sealevelpressure':'SeaLevelPressure',
                           'cloudcover':'CloudCover', 'visibility':'Visibility', 'solarradiation':'SolarRadiation',
                           'solarenergy':'SolarEnergy', 'uvindex':'UvIndex', 'conditions': 'Conditions'}, 
                   inplace=True)
display(merged_data)

Unnamed: 0,Usage,LaggingCurrentReactivePower,LeadingCurrentReactivePower,CO2,LaggingCurrentPowerFactor,LeadingCurrentPowerFactor,NSM,WeekStatus,LoadType,Year,...,WindDir,SeaLevelPressure,CloudCover,Visibility,SolarRadiation,SolarEnergy,UvIndex,Conditions,SunriseHour,SunsetHour
0,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
1,3.28,3.56,0.00,0.0,67.76,100.00,5400,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
2,3.46,4.03,0.00,0.0,65.14,100.00,9900,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
3,3.89,5.00,0.00,0.0,61.40,100.00,14400,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
4,3.56,4.07,0.00,0.0,65.84,100.00,18900,Weekday,Light_Load,2018,...,335.8,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7003,3.42,0.00,9.79,0.0,100.00,32.98,64800,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17
7004,3.96,0.00,18.29,0.0,100.00,21.16,69300,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17
7005,3.38,0.00,13.43,0.0,100.00,24.41,73800,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17
7006,3.42,0.00,13.36,0.0,100.00,24.80,78300,Weekday,Light_Load,2018,...,335.6,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,17


### Put load column last because it is the target

In [18]:
column_to_move = merged_data.pop("LoadType")
merged_data.insert(39, "LoadType", column_to_move)
display(merged_data)

Unnamed: 0,Usage,LaggingCurrentReactivePower,LeadingCurrentReactivePower,CO2,LaggingCurrentPowerFactor,LeadingCurrentPowerFactor,NSM,WeekStatus,Year,Month,...,SeaLevelPressure,CloudCover,Visibility,SolarRadiation,SolarEnergy,UvIndex,Conditions,SunriseHour,LoadType,SunsetHour
0,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
1,3.28,3.56,0.00,0.0,67.76,100.00,5400,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
2,3.46,4.03,0.00,0.0,65.14,100.00,9900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
3,3.89,5.00,0.00,0.0,61.40,100.00,14400,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
4,3.56,4.07,0.00,0.0,65.84,100.00,18900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7003,3.42,0.00,9.79,0.0,100.00,32.98,64800,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7004,3.96,0.00,18.29,0.0,100.00,21.16,69300,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7005,3.38,0.00,13.43,0.0,100.00,24.41,73800,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7006,3.42,0.00,13.36,0.0,100.00,24.80,78300,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17


### Export dataset to be used to csv

In [19]:
merged_data.to_csv('SeqCombinedData.csv', index=False)