## More charts for data analysis

Some of the more computationally intensive charts are kept in a seperate file. These are mainly the correlation charts.

### Imports and read data from csv

In [24]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import datetime

In [25]:
import plotly.io as pio
pio.renderers.default = 'iframe'

In [26]:
combined_data = pd.read_csv("SeqCombinedData.csv")
display(combined_data)

Unnamed: 0,Usage,LaggingCurrentReactivePower,LeadingCurrentReactivePower,CO2,LaggingCurrentPowerFactor,LeadingCurrentPowerFactor,NSM,WeekStatus,Year,Month,...,SeaLevelPressure,CloudCover,Visibility,SolarRadiation,SolarEnergy,UvIndex,Conditions,SunriseHour,LoadType,SunsetHour
0,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
1,3.28,3.56,0.00,0.0,67.76,100.00,5400,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
2,3.46,4.03,0.00,0.0,65.14,100.00,9900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
3,3.89,5.00,0.00,0.0,61.40,100.00,14400,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
4,3.56,4.07,0.00,0.0,65.84,100.00,18900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7003,3.42,0.00,9.79,0.0,100.00,32.98,64800,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7004,3.96,0.00,18.29,0.0,100.00,21.16,69300,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7005,3.38,0.00,13.43,0.0,100.00,24.41,73800,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7006,3.42,0.00,13.36,0.0,100.00,24.80,78300,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17


In [27]:
print(combined_data.columns)

Index(['Usage', 'LaggingCurrentReactivePower', 'LeadingCurrentReactivePower',
       'CO2', 'LaggingCurrentPowerFactor', 'LeadingCurrentPowerFactor', 'NSM',
       'WeekStatus', 'Year', 'Month', 'Day', 'Hours', 'Minutes', 'IsHoliday',
       'Season', 'TempMax', 'TempMin', 'Temp', 'FeelsLikeMax', 'FeelsLikeMin',
       'FeelsLike', 'Dew', 'Humidity', 'Precip', 'PrecipProb', 'PrecipCover',
       'PrecipType', 'SnowDepth', 'WindGust', 'WindSpeed', 'WindDir',
       'SeaLevelPressure', 'CloudCover', 'Visibility', 'SolarRadiation',
       'SolarEnergy', 'UvIndex', 'Conditions', 'SunriseHour', 'LoadType',
       'SunsetHour'],
      dtype='object')


### Weather data correlations

In [28]:
fig_1 = make_subplots(rows=5, cols = 5)

# Row 1 Temp with others
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Temp'].values, y=combined_data.loc[:,'Temp'].values, name='Temp-Temp', mode="markers", showlegend=True),
    row=1, col=1
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Temp'].values, y=combined_data.loc[:,'Humidity'].values, name='Temp-Humidity', mode="markers", showlegend=True),
    row=1, col=2
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Temp'].values, y=combined_data.loc[:,'Precip'].values, name='Temp-Precip', mode="markers", showlegend=True),
    row=1, col=3
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Temp'].values, y=combined_data.loc[:,'WindSpeed'].values, name='Temp-Windspeed', mode="markers", showlegend=True),
    row=1, col=4
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Temp'].values, y=combined_data.loc[:,'Dew'].values, name='Temp-Dew', mode="markers", showlegend=True),
    row=1, col=5
)

# Row 2 Humidity with others
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Humidity'].values, y=combined_data.loc[:,'Humidity'].values, name='Humidity-Humidity', mode="markers", showlegend=True),
    row=2, col=2
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Humidity'].values, y=combined_data.loc[:,'Precip'].values, name='Humidity-Precip', mode="markers", showlegend=True),
    row=2, col=3
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Humidity'].values, y=combined_data.loc[:,'WindSpeed'].values, name='Humidity-Windspeed', mode="markers", showlegend=True),
    row=2, col=4
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Humidity'].values, y=combined_data.loc[:,'Dew'].values, name='Humidity-Dew', mode="markers", showlegend=True),
    row=2, col=5
)

#Row 3 Precip with others
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Precip'].values, y=combined_data.loc[:,'Precip'].values, name='Precip-Precip', mode="markers", showlegend=True),
    row=3, col=3
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Precip'].values, y=combined_data.loc[:,'WindSpeed'].values, name='Precip-Windspeed', mode="markers", showlegend=True),
    row=3, col=4
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Precip'].values, y=combined_data.loc[:,'Dew'].values, name='Precip-Dew', mode="markers", showlegend=True),
    row=3, col=5
)

#Row 4 Windspeed with others

fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'WindSpeed'].values, y=combined_data.loc[:,'WindSpeed'].values, name='Windspeed-Windspeed', mode="markers", showlegend=True),
    row=4, col=4
)
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'WindSpeed'].values, y=combined_data.loc[:,'Dew'].values, name='WindSpeed-Dew', mode="markers", showlegend=True),
    row=4, col=5
)

#Row 5 Dew with others
fig_1.add_trace(
    go.Scatter(x=combined_data.loc[:,'Dew'].values, y=combined_data.loc[:,'Dew'].values, name='Dew-Dew', mode="markers", showlegend=True),
    row=5, col=5
)

fig_1.update_layout(title_text="Correlation scatter plots between some weather columns")
fig_1.show()

### Steel data correlations

In [29]:
fig_2 = make_subplots(rows=5, cols = 5)

# Row 1 LaggingCurrentReactivePower with others
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'LaggingCurrentReactivePower'].values, name='LaCRP-LaCRP', mode="markers", showlegend=True),
    row=1, col=1
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'LeadingCurrentReactivePower'].values, name='LaCRP-LeCRP', mode="markers", showlegend=True),
    row=1, col=2
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, name='LaCRP-LaCPF', mode="markers", showlegend=True),
    row=1, col=3
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, name='LaCRP-LeCPF', mode="markers", showlegend=True),
    row=1, col=4
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'NSM'].values, name='LaCRP-NSM', mode="markers", showlegend=True),
    row=1, col=5
)

# Row 2 LeadingCurrentReactivePower with others
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentReactivePower'].values, y=combined_data.loc[:,'LeadingCurrentReactivePower'].values, name='LeCRP-LeCRP', mode="markers", showlegend=True),
    row=2, col=2
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentReactivePower'].values, y=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, name='LeCRP-LaCPF', mode="markers", showlegend=True),
    row=2, col=3
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentReactivePower'].values, y=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, name='LeCRP-LeCPF', mode="markers", showlegend=True),
    row=2, col=4
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentReactivePower'].values, y=combined_data.loc[:,'NSM'].values, name='LeCRP-NSM', mode="markers", showlegend=True),
    row=2, col=5
)

# Row 3 LaggingCurrentPowerFactor with others
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, y=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, name='LaCPF-LaCPF', mode="markers", showlegend=True),
    row=3, col=3
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, y=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, name='LaCPF-LeCPF', mode="markers", showlegend=True),
    row=3, col=4
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, y=combined_data.loc[:,'NSM'].values, name='LaCPF-NSM', mode="markers", showlegend=True),
    row=3, col=5
)

# Row 4 LeadingCurrentPowerFactor with others
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, y=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, name='LeCPF-LeCPF', mode="markers", showlegend=True),
    row=4, col=4
)
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, y=combined_data.loc[:,'NSM'].values, name='LeCPF-NSM', mode="markers", showlegend=True),
    row=4, col=5
)

# Row 5 NSM with others
fig_2.add_trace(
    go.Scatter(x=combined_data.loc[:,'NSM'].values, y=combined_data.loc[:,'NSM'].values, name='NSM-NSM', mode="markers", showlegend=True),
    row=5, col=5
)

fig_2.update_layout(title_text="Correlation scatter plots between some steel plant columns")
fig_2.show()

### Correlations for important fields of weather and steel data

In [30]:
fig_3 = make_subplots(rows=4, cols = 3)

# Row 1 LaggingCurrentReactivePower with others
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'Temp'].values, name='LaCRP-Temp', mode="markers", showlegend=True),
    row=1, col=1
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'Humidity'].values, name='LaCRP-Hum', mode="markers", showlegend=True),
    row=1, col=2
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentReactivePower'].values, y=combined_data.loc[:,'Precip'].values, name='LaCRP-Precip', mode="markers", showlegend=True),
    row=1, col=3
)

# Row 2 LeadingCurrentReactivePower with others
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentReactivePower'].values, y=combined_data.loc[:,'Temp'].values, name='LeCRP-Temp', mode="markers", showlegend=True),
    row=2, col=1
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentReactivePower'].values, y=combined_data.loc[:,'Humidity'].values, name='LeCRP-Hum', mode="markers", showlegend=True),
    row=2, col=2
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentReactivePower'].values, y=combined_data.loc[:,'Precip'].values, name='LeCRP-Precip', mode="markers", showlegend=True),
    row=2, col=3
)

# Row 3 LaggingCurrentPowerFactor with others
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, y=combined_data.loc[:,'Temp'].values, name='LaCPF-Temp', mode="markers", showlegend=True),
    row=3, col=1
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, y=combined_data.loc[:,'Humidity'].values, name='LaCPF-Hum', mode="markers", showlegend=True),
    row=3, col=2
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LaggingCurrentPowerFactor'].values, y=combined_data.loc[:,'Precip'].values, name='LaCPF-Precip', mode="markers", showlegend=True),
    row=3, col=3
)

# Row 4 LeadingCurrentPowerFactor with others
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, y=combined_data.loc[:,'Temp'].values, name='LeCPF-Temp', mode="markers", showlegend=True),
    row=4, col=1
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, y=combined_data.loc[:,'Humidity'].values, name='LeCPF-Hum', mode="markers", showlegend=True),
    row=4, col=2
)
fig_3.add_trace(
    go.Scatter(x=combined_data.loc[:,'LeadingCurrentPowerFactor'].values, y=combined_data.loc[:,'Precip'].values, name='LeCPF-Precip', mode="markers", showlegend=True),
    row=4, col=3
)

fig_3.update_layout(title_text="Correlation scatter plots between some steel plant columns and some weather columns")
fig_3.show()