In [1]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('data/commercial_df_2021.csv', index_col=0) # Remove the unnamed column using index_col=0 instead of the command #df = df.iloc[:, 1:]

# Drop columns that are not needed for training
columns_to_drop = ['One_Day_Power', 'One_Day_Power_NaN', 'Air Temperature 1 Min_P19','Air Temperature 2 Max_P27', 'Air Temperature 2 Min_P26', 'Precipitation_P7']
df = df.drop(columns=columns_to_drop)

# Rename columns
df = df.rename(columns={'Dew Point Temperature_P39': 'Temperature', 
                         'Relative Humidity_P6': 'Humidity',
                         'Wind Speed_P4': 'WindSpeed',
                         'Power_Consumption': 'PowerConsumption'
                         # Add more renaming mappings as needed
                        })

# Add an ID column
df.insert(0, 'ID', range(1, len(df) + 1))  # Insert ID column as the first column

# Remove the first unnamed column
#df = df.iloc[:, 1:]

# Write the DataFrame back to a new CSV file without the first unnamed column
#df.to_csv('data/commercial_df_2021_new.csv', index=False)


In [2]:
df

Unnamed: 0,ID,DateTime,CUSTOMER,AREA,ISPRIVATEPERSON,PowerConsumption,Temperature,Humidity,WindSpeed,Price
6944448,1,2021-01-01 00:00:00,1060753805,Kvarnholmen,Nej,0.001000,4.8,88.0,3.0,24.35
6944449,2,2021-01-01 00:00:00,1060766019,Malmen,Nej,0.003267,4.8,88.0,3.0,24.35
6944451,3,2021-01-01 00:00:00,1060616621,Malmen,Nej,0.007463,4.8,88.0,3.0,24.35
6944452,4,2021-01-01 00:00:00,1060621516,Malmen,Nej,0.000147,4.8,88.0,3.0,24.35
6944453,5,2021-01-01 00:00:00,1060601163,Kvarnholmen,Nej,0.004140,4.8,88.0,3.0,24.35
...,...,...,...,...,...,...,...,...,...,...
14619619,5863076,2021-12-31 23:00:00,1060598838,Kvarnholmen,Nej,0.000558,5.3,86.0,5.0,46.60
14619620,5863077,2021-12-31 23:00:00,1060598788,Kvarnholmen,Nej,0.000424,5.3,86.0,5.0,46.60
14619621,5863078,2021-12-31 23:00:00,1060604204,Kvarnholmen,Nej,0.000432,5.3,86.0,5.0,46.60
14619622,5863079,2021-12-31 23:00:00,1060599041,Kvarnholmen,Nej,0.005087,5.3,86.0,5.0,46.60


In [3]:
df.shape[0]

5863080

In [4]:
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.set_index('DateTime', inplace=True)

In [5]:
commercial_df_2021_resampled = df.resample('H').agg({'PowerConsumption': 'sum', 'Price': 'mean', 'Temperature':'mean'})

  commercial_df_2021_resampled = df.resample('H').agg({'PowerConsumption': 'sum', 'Price': 'mean', 'Temperature':'mean'})


In [6]:
commercial_df_2021_resampled

Unnamed: 0_level_0,PowerConsumption,Price,Temperature
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-01 00:00:00,4.434627,24.35,4.8
2021-01-01 01:00:00,4.375336,23.98,4.8
2021-01-01 02:00:00,4.413750,23.72,4.8
2021-01-01 03:00:00,4.368521,23.73,4.8
2021-01-01 04:00:00,4.421918,24.06,4.8
...,...,...,...
2021-12-31 19:00:00,5.475141,33.96,6.6
2021-12-31 20:00:00,5.303456,33.08,6.0
2021-12-31 21:00:00,5.109125,32.34,5.8
2021-12-31 22:00:00,4.861598,29.76,5.5


In [7]:
# Reset index to make DateTime a column again
commercial_df_2021_resampled.reset_index(inplace=True)

# Assign new IDs
commercial_df_2021_resampled['ID'] = range(1, len(commercial_df_2021_resampled) + 1)



In [8]:
commercial_df_2021_resampled

Unnamed: 0,DateTime,PowerConsumption,Price,Temperature,ID
0,2021-01-01 00:00:00,4.434627,24.35,4.8,1
1,2021-01-01 01:00:00,4.375336,23.98,4.8,2
2,2021-01-01 02:00:00,4.413750,23.72,4.8,3
3,2021-01-01 03:00:00,4.368521,23.73,4.8,4
4,2021-01-01 04:00:00,4.421918,24.06,4.8,5
...,...,...,...,...,...
8755,2021-12-31 19:00:00,5.475141,33.96,6.6,8756
8756,2021-12-31 20:00:00,5.303456,33.08,6.0,8757
8757,2021-12-31 21:00:00,5.109125,32.34,5.8,8758
8758,2021-12-31 22:00:00,4.861598,29.76,5.5,8759


In [None]:
# Save DataFrame to CSV file
commercial_df_2021_resampled.to_csv('data/commercial_df_2021_resampled_vertexai.csv', index=False, columns=['ID', 'DateTime', 'PowerConsumption', 'Price', 'Temperature'])