In [1]:
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme('paper')

In [2]:
MERGED_LOAD_DIR = Path(
    Path.cwd().parent, "data.nosync", "transformed_data", "merged.parquet"
)

In [5]:
df = pd.read_parquet(MERGED_LOAD_DIR)
df.head()

Unnamed: 0,Time Stamp,Name,PTID,Load,Year,Month,Day,Minute,Hour,Max Temp,Min Temp,Max Wet Bulb,Min Wet Bulb
0,2016-06-22 00:00:00,CAPITL,61757.0,1252.099976,2016,6,22,0,0,27.777778,15.0,18.444444,11.777778
1,2016-06-22 00:05:00,CAPITL,61757.0,1239.199951,2016,6,22,5,0,27.777778,15.0,18.444444,11.777778
2,2016-06-22 00:10:00,CAPITL,61757.0,1241.900024,2016,6,22,10,0,27.777778,15.0,18.444444,11.777778
3,2016-06-22 00:15:00,CAPITL,61757.0,1240.900024,2016,6,22,15,0,27.777778,15.0,18.444444,11.777778
4,2016-06-22 00:20:00,CAPITL,61757.0,1226.900024,2016,6,22,20,0,27.777778,15.0,18.444444,11.777778


# Dropping unused columns

We dont need the Time Stamp column, as all the info has already been broken out into year, month, day, minute, hour. Leaving this in would actually be detrimental to the modelling as it would cause multi collinearity. 

In [6]:
df = df.drop('Time Stamp', axis = 1)

# Rounding

We dont need 6 significant figures for load and temperatures as the sensors are unlikely to be this accurate anyway, we can round these to help speed up our modelling. 

In [10]:
df['Load'] = df['Load'].round(2)
df['Max Temp'] = df['Max Temp'].round(2)
df['Min Temp'] = df['Min Temp'].round(2)
df['Max Wet Bulb'] = df['Max Wet Bulb'].round(2)
df['Min Wet Bulb'] = df['Min Wet Bulb'].round(2)

In [11]:
df.head()

Unnamed: 0,Name,PTID,Load,Year,Month,Day,Minute,Hour,Max Temp,Min Temp,Max Wet Bulb,Min Wet Bulb
0,CAPITL,61757.0,1252.099976,2016,6,22,0,0,27.78,15.0,18.44,11.78
1,CAPITL,61757.0,1239.199951,2016,6,22,5,0,27.78,15.0,18.44,11.78
2,CAPITL,61757.0,1241.900024,2016,6,22,10,0,27.78,15.0,18.44,11.78
3,CAPITL,61757.0,1240.900024,2016,6,22,15,0,27.78,15.0,18.44,11.78
4,CAPITL,61757.0,1226.900024,2016,6,22,20,0,27.78,15.0,18.44,11.78


# Cyclical transformations
