In [40]:
import pandas as pd
import os

def read_and_concatenate(folder_path):
    # Lists to store DataFrames
    gen_dataframes = []
    load_dataframes = []

    # Iterate over all files in the folder
    for file in os.listdir(folder_path):
        if file.endswith('.csv'):
            file_path = os.path.join(folder_path, file)

            # Read 'gen' files
            if file.startswith('gen'):
                df = pd.read_csv(file_path)
                gen_dataframes.append(df)

            # Read 'load' files
            elif file.startswith('load'):
                df = pd.read_csv(file_path)
                load_dataframes.append(df)

    # Concatenate DataFrames vertically
    gen_concatenated = pd.concat(gen_dataframes, axis=0, ignore_index=True)
    load_concatenated = pd.concat(load_dataframes, axis=0, ignore_index=True)

    # Combine 'gen' and 'load' DataFrames
    combined_dataframe = pd.concat([gen_concatenated, load_concatenated], axis=0, ignore_index=True)

    return combined_dataframe

# Provide the path to your folder containing the CSV files
folder_path = '../data/raw_data/'
data = read_and_concatenate(folder_path)

# Optionally, save the combined DataFrame to a new CSV file
#combined_df.to_csv('combined_dataset.csv', index=False)


In [41]:
data['StartTime'] = pd.to_datetime(data['StartTime'].str.replace('\+00:00Z', '', regex=True)).dt.strftime('%Y-%m-%d %H:%M:%S')
data['EndTime'] = pd.to_datetime(data['EndTime'].str.replace('\+00:00Z', '', regex=True)).dt.strftime('%Y-%m-%d %H:%M:%S')
data['StartTime'] = pd.to_datetime(data['StartTime'])
data['EndTime'] = pd.to_datetime(data['EndTime'])

In [42]:
data

Unnamed: 0,StartTime,EndTime,AreaID,UnitName,PsrType,quantity,Load
0,2021-12-31 23:45:00,2022-01-01 00:00:00,10Y1001A1001A83F,MAW,B01,4333.0,
1,2022-01-01 00:00:00,2022-01-01 00:15:00,10Y1001A1001A83F,MAW,B01,4325.0,
2,2022-01-01 00:15:00,2022-01-01 00:30:00,10Y1001A1001A83F,MAW,B01,4319.0,
3,2022-01-01 00:30:00,2022-01-01 00:45:00,10Y1001A1001A83F,MAW,B01,4323.0,
4,2022-01-01 00:45:00,2022-01-01 01:00:00,10Y1001A1001A83F,MAW,B01,4328.0,
...,...,...,...,...,...,...,...
1654420,2022-07-18 05:00:00,2022-07-18 05:30:00,10Y1001A1001A92E,MAW,,,631.0
1654421,2022-07-18 05:30:00,2022-07-18 06:00:00,10Y1001A1001A92E,MAW,,,696.0
1654422,2022-07-18 06:00:00,2022-07-18 06:30:00,10Y1001A1001A92E,MAW,,,770.0
1654423,2022-07-18 06:30:00,2022-07-18 07:00:00,10Y1001A1001A92E,MAW,,,840.0


In [43]:
data['gen/load']=0
data['quantity'] =data['quantity'].fillna(0)
data['Load'] = data['Load'].fillna(0)
data.loc[data['Load']==0,'gen/load']=1

data['power']=data['quantity']+data['Load']
data

Unnamed: 0,StartTime,EndTime,AreaID,UnitName,PsrType,quantity,Load,gen/load,power
0,2021-12-31 23:45:00,2022-01-01 00:00:00,10Y1001A1001A83F,MAW,B01,4333.0,0.0,1,4333.0
1,2022-01-01 00:00:00,2022-01-01 00:15:00,10Y1001A1001A83F,MAW,B01,4325.0,0.0,1,4325.0
2,2022-01-01 00:15:00,2022-01-01 00:30:00,10Y1001A1001A83F,MAW,B01,4319.0,0.0,1,4319.0
3,2022-01-01 00:30:00,2022-01-01 00:45:00,10Y1001A1001A83F,MAW,B01,4323.0,0.0,1,4323.0
4,2022-01-01 00:45:00,2022-01-01 01:00:00,10Y1001A1001A83F,MAW,B01,4328.0,0.0,1,4328.0
...,...,...,...,...,...,...,...,...,...
1654420,2022-07-18 05:00:00,2022-07-18 05:30:00,10Y1001A1001A92E,MAW,,0.0,631.0,0,631.0
1654421,2022-07-18 05:30:00,2022-07-18 06:00:00,10Y1001A1001A92E,MAW,,0.0,696.0,0,696.0
1654422,2022-07-18 06:00:00,2022-07-18 06:30:00,10Y1001A1001A92E,MAW,,0.0,770.0,0,770.0
1654423,2022-07-18 06:30:00,2022-07-18 07:00:00,10Y1001A1001A92E,MAW,,0.0,840.0,0,840.0


In [44]:
# Extract date and hour
data['Date'] = data['StartTime'].dt.date
data['Hour'] = data['StartTime'].dt.hour
data

Unnamed: 0,StartTime,EndTime,AreaID,UnitName,PsrType,quantity,Load,gen/load,power,Date,Hour
0,2021-12-31 23:45:00,2022-01-01 00:00:00,10Y1001A1001A83F,MAW,B01,4333.0,0.0,1,4333.0,2021-12-31,23
1,2022-01-01 00:00:00,2022-01-01 00:15:00,10Y1001A1001A83F,MAW,B01,4325.0,0.0,1,4325.0,2022-01-01,0
2,2022-01-01 00:15:00,2022-01-01 00:30:00,10Y1001A1001A83F,MAW,B01,4319.0,0.0,1,4319.0,2022-01-01,0
3,2022-01-01 00:30:00,2022-01-01 00:45:00,10Y1001A1001A83F,MAW,B01,4323.0,0.0,1,4323.0,2022-01-01,0
4,2022-01-01 00:45:00,2022-01-01 01:00:00,10Y1001A1001A83F,MAW,B01,4328.0,0.0,1,4328.0,2022-01-01,0
...,...,...,...,...,...,...,...,...,...,...,...
1654420,2022-07-18 05:00:00,2022-07-18 05:30:00,10Y1001A1001A92E,MAW,,0.0,631.0,0,631.0,2022-07-18,5
1654421,2022-07-18 05:30:00,2022-07-18 06:00:00,10Y1001A1001A92E,MAW,,0.0,696.0,0,696.0,2022-07-18,5
1654422,2022-07-18 06:00:00,2022-07-18 06:30:00,10Y1001A1001A92E,MAW,,0.0,770.0,0,770.0,2022-07-18,6
1654423,2022-07-18 06:30:00,2022-07-18 07:00:00,10Y1001A1001A92E,MAW,,0.0,840.0,0,840.0,2022-07-18,6


In [45]:
aggregated_data = data.groupby(['AreaID', 'gen/load', 'Date', 'Hour'])['power'].sum().reset_index()
aggregated_data

Unnamed: 0,AreaID,gen/load,Date,Hour,power
0,10Y1001A1001A65H,0,2021-12-31,23,3314.0
1,10Y1001A1001A65H,0,2022-01-01,0,3218.0
2,10Y1001A1001A65H,0,2022-01-01,1,3126.0
3,10Y1001A1001A65H,0,2022-01-01,2,3080.0
4,10Y1001A1001A65H,0,2022-01-01,3,3044.0
...,...,...,...,...,...
148546,10YSE-1--------K,1,2023-01-01,18,12715.0
148547,10YSE-1--------K,1,2023-01-01,19,12347.0
148548,10YSE-1--------K,1,2023-01-01,20,11986.0
148549,10YSE-1--------K,1,2023-01-01,21,11227.0


In [47]:
pivot = data.pivot_table(
index=['Date', 'Hour'],
columns=['AreaID', 'gen/load'],
values='power',
aggfunc='sum'
)
pivot

Unnamed: 0_level_0,AreaID,10Y1001A1001A65H,10Y1001A1001A65H,10Y1001A1001A83F,10Y1001A1001A83F,10Y1001A1001A92E,10Y1001A1001A92E,10YES-REE------0,10YES-REE------0,10YHU-MAVIR----U,10YHU-MAVIR----U,10YIT-GRTN-----B,10YIT-GRTN-----B,10YNL----------L,10YNL----------L,10YPL-AREA-----S,10YPL-AREA-----S,10YSE-1--------K,10YSE-1--------K
Unnamed: 0_level_1,gen/load,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1
Date,Hour,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2021-12-31,23,3314.0,3652.0,42196.0,37054.0,677.0,,20827.0,9533.0,4254.0,285.0,21121.0,5698.0,10326.0,3535.0,14438.0,4641.0,15331.0,11060.0
2022-01-01,0,3218.0,3605.0,165125.0,145157.0,1244.0,,19530.0,8943.0,16457.0,1376.0,19756.0,5614.0,40706.0,12708.0,13935.0,4491.0,15331.0,11107.0
2022-01-01,1,3126.0,3309.0,160415.0,138401.0,1131.0,,18383.0,8625.0,15426.0,1526.0,18685.0,5528.0,39465.0,11097.0,13579.0,4436.0,15270.0,11036.0
2022-01-01,2,3080.0,3043.0,158035.0,132617.0,1091.0,,17680.0,8448.0,14781.0,1560.0,18124.0,5672.0,38923.0,11282.0,13397.0,4568.0,15150.0,10509.0
2022-01-01,3,3044.0,2852.0,157016.0,127011.0,969.0,,17396.0,8290.0,14630.0,1287.0,18400.0,5426.0,38211.0,11245.0,13364.0,4559.0,15387.0,10770.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-01,19,3507.0,4168.0,177225.0,131391.0,,55.0,98224.0,70192.0,17873.0,667.0,24617.0,3872.0,47635.0,8713.0,15468.0,4968.0,16145.0,12347.0
2023-01-01,20,3354.0,3975.0,172484.0,129744.0,,45.0,97620.0,71032.0,17141.0,648.0,22944.0,3667.0,45689.0,7626.0,14698.0,4220.0,15869.0,11986.0
2023-01-01,21,3229.0,3498.0,166627.0,121198.0,,37.0,91896.0,67136.0,16259.0,658.0,20791.0,3436.0,43276.0,6533.0,13749.0,3525.0,15143.0,11227.0
2023-01-01,22,3055.0,2879.0,153785.0,114446.0,,41.0,84324.0,60364.0,15273.0,623.0,19198.0,3264.0,41269.0,7731.0,13061.0,3236.0,14697.0,10515.0


In [48]:
pivot.dropna()

Unnamed: 0_level_0,AreaID,10Y1001A1001A65H,10Y1001A1001A65H,10Y1001A1001A83F,10Y1001A1001A83F,10Y1001A1001A92E,10Y1001A1001A92E,10YES-REE------0,10YES-REE------0,10YHU-MAVIR----U,10YHU-MAVIR----U,10YIT-GRTN-----B,10YIT-GRTN-----B,10YNL----------L,10YNL----------L,10YPL-AREA-----S,10YPL-AREA-----S,10YSE-1--------K,10YSE-1--------K
Unnamed: 0_level_1,gen/load,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1
Date,Hour,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2022-01-26,23,3912.0,4695.0,229000.0,146019.0,1649.0,433.0,25797.0,7517.0,21808.0,898.0,27465.0,4712.0,46721.0,15730.0,18504.0,5670.0,16174.0,11696.0
2022-01-27,0,3825.0,4637.0,225922.0,149743.0,1424.0,1021.0,24596.0,7110.0,20986.0,913.0,26588.0,4478.0,45289.0,16029.0,18105.0,5599.0,15759.0,11209.0
2022-01-27,1,3786.0,4789.0,224274.0,153174.0,1304.0,927.0,24061.0,6449.0,20356.0,840.0,26343.0,4447.0,44319.0,16375.0,18228.0,5659.0,15546.0,11231.0
2022-01-27,2,3838.0,4752.0,224262.0,156917.0,1305.0,673.0,24068.0,6469.0,20250.0,855.0,26740.0,4315.0,43847.0,16587.0,18518.0,5648.0,15856.0,11636.0
2022-01-27,3,4065.0,4832.0,231010.0,159611.0,1282.0,426.0,24733.0,6424.0,20953.0,895.0,28908.0,4772.0,44446.0,16761.0,19274.0,5659.0,16391.0,12357.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-03,18,4071.0,4239.0,222381.0,109587.0,1862.0,1121.0,29804.0,19753.0,22140.0,867.0,30605.0,5789.0,50416.0,3831.0,19404.0,3679.0,16916.0,13077.0
2022-04-03,19,3808.0,4431.0,219818.0,110195.0,1835.0,1169.0,28210.0,19212.0,21097.0,877.0,28246.0,4946.0,49185.0,4717.0,18527.0,3913.0,16092.0,12471.0
2022-04-03,20,3671.0,4340.0,215400.0,114248.0,1635.0,1307.0,25811.0,17936.0,20082.0,835.0,26001.0,4362.0,46835.0,7227.0,17458.0,4429.0,15467.0,12083.0
2022-04-03,21,3484.0,4465.0,202810.0,116266.0,1427.0,1333.0,23783.0,17162.0,19097.0,794.0,23445.0,3408.0,44071.0,9500.0,16595.0,4572.0,15081.0,11771.0
