In [1]:
import pandas as pd

#### Preprocess

In [2]:
pv_sim_df = pd.read_csv('2021_2022_PV_sim_PST_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
pv_sim_df['Time'] = pv_sim_df['Time'].apply(pd.Timestamp)

# remove timezone
pv_sim_df['Time'] = pv_sim_df['Time'].dt.tz_localize(None)

# sort by time
pv_sim_df.sort_values(by=['Time']).reset_index(drop=True)

# remove un-used columns
pv_sim_df = pv_sim_df.drop(columns=['Unnamed: 0', 'Time_local'])

In [3]:
pv_sim_df

Unnamed: 0,Time,PV_kwAC_out,PV_kwAC_out_Norm
0,2021-01-01 00:00:00,0.0,0.0
1,2021-01-01 01:00:00,0.0,0.0
2,2021-01-01 02:00:00,0.0,0.0
3,2021-01-01 03:00:00,0.0,0.0
4,2021-01-01 04:00:00,0.0,0.0
...,...,...,...
17515,2022-12-31 19:00:00,0.0,0.0
17516,2022-12-31 20:00:00,0.0,0.0
17517,2022-12-31 21:00:00,0.0,0.0
17518,2022-12-31 22:00:00,0.0,0.0


In [4]:
load_df = pd.read_csv('caiso_load_hourly_preprocessed.csv', sep=',')

# convert time (string) to timestamp
load_df['Time'] = load_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
load_df['Time'] = load_df['Time'].dt.tz_localize(None)

In [5]:
load_df

Unnamed: 0,Time,Load
0,2021-01-01 00:00:00,258190.0
1,2021-01-01 01:00:00,247591.0
2,2021-01-01 02:00:00,239411.0
3,2021-01-01 03:00:00,235442.0
4,2021-01-01 04:00:00,234975.0
...,...,...
17491,2022-12-30 19:00:00,308407.0
17492,2022-12-30 20:00:00,301158.0
17493,2022-12-30 21:00:00,293008.0
17494,2022-12-30 22:00:00,278617.0


In [6]:
get_as_prices_df = pd.read_csv('df_get_as_prices_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
get_as_prices_df['Time'] = get_as_prices_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
get_as_prices_df['Time'] = get_as_prices_df['Time'].dt.tz_localize(None)

# remove un-used columns
get_as_prices_df = get_as_prices_df.drop(columns=['Unnamed: 0'])

In [7]:
get_as_prices_df

Unnamed: 0,Time,Non-Spinning Reserves Total (MW),Regulation Down Total (MW),Regulation Mileage Down Total (MW),Regulation Mileage Up Total (MW),Regulation Up Total (MW),Total Volume(MW)
0,2021-01-01 00:00:00,1108.50,459.89,0.0,0.0,230.76,1799.15
1,2021-01-01 01:00:00,1086.00,429.89,0.0,0.0,226.26,1742.15
2,2021-01-01 02:00:00,1096.50,409.89,0.0,0.0,223.74,1730.13
3,2021-01-01 03:00:00,1100.50,389.89,0.0,0.0,226.74,1717.13
4,2021-01-01 04:00:00,1062.50,399.89,0.0,0.0,240.14,1702.53
...,...,...,...,...,...,...,...
17491,2022-12-30 19:00:00,846.72,570.00,0.0,0.0,482.92,1899.64
17492,2022-12-30 20:00:00,820.73,600.00,0.0,0.0,548.80,1969.53
17493,2022-12-30 21:00:00,792.98,590.00,0.0,0.0,391.06,1774.04
17494,2022-12-30 22:00:00,752.10,378.00,0.0,0.0,372.36,1502.46


In [8]:
get_load_forecast_df = pd.read_csv('df_get_load_forecast_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
get_load_forecast_df['Time'] = get_load_forecast_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
get_load_forecast_df['Time'] = get_load_forecast_df['Time'].dt.tz_localize(None)

# remove un-used columns
get_load_forecast_df = get_load_forecast_df.drop(columns=['Unnamed: 0', 'Forecast Time', 
                                                          'Interval Start', 'Interval End'])

In [9]:
get_load_forecast_df

Unnamed: 0,Time,Load Forecast,Load_Forecast_Norm
0,2021-01-01 00:00:00,21433.07,0.185736
1,2021-01-01 01:00:00,20637.49,0.164058
2,2021-01-01 02:00:00,20077.64,0.148804
3,2021-01-01 03:00:00,19592.67,0.135589
4,2021-01-01 04:00:00,19527.81,0.133822
...,...,...,...
17515,2022-12-31 19:00:00,25297.51,0.291031
17516,2022-12-31 20:00:00,24463.21,0.268299
17517,2022-12-31 21:00:00,23822.35,0.250837
17518,2022-12-31 22:00:00,23038.79,0.229487


In [10]:
fuel_mix_df = pd.read_csv('fuel_mix_preprocessed.csv', sep=',')

# convert time (string) to timestamp
fuel_mix_df['Time'] = fuel_mix_df['Time'].apply(pd.Timestamp)

In [11]:
fuel_mix_df

Unnamed: 0,Time,Solar,Wind,Geothermal,Biomass,Biogas,Small Hydro,Coal,Nuclear,Natural Gas,Large Hydro,Batteries,Imports,Other
0,2021-01-01 00:00:00,-41,1449,957,308,202,142,12,1143,7846,690,62,9222,0
1,2021-01-01 01:00:00,-41,1624,956,308,203,141,12,1142,7290,626,83,8810,0
2,2021-01-01 02:00:00,-41,1750,956,310,202,142,12,1142,6747,585,60,8603,0
3,2021-01-01 03:00:00,-42,1554,953,307,202,148,13,1143,6499,583,1,8856,0
4,2021-01-01 04:00:00,-43,1569,957,306,202,176,12,1143,6703,633,17,8429,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17515,2022-12-31 19:00:00,-27,3714,901,240,214,218,2,2246,8995,2092,1081,6746,0
17516,2022-12-31 20:00:00,-28,4078,900,253,214,213,2,2246,8614,2064,777,6373,0
17517,2022-12-31 21:00:00,-36,3580,900,286,214,207,3,2245,8828,1765,259,6838,0
17518,2022-12-31 22:00:00,-37,3161,900,287,211,201,2,2243,9275,1287,450,6477,0


In [12]:
gas_price_df = pd.read_csv('gas_price_preprocessed.csv', sep=',')

# convert time (string) to timestamp
gas_price_df['Time'] = gas_price_df['Time'].apply(pd.Timestamp)

# rename column
gas_price_df = gas_price_df.rename(columns={'Price': 'Gas Price'})

In [13]:
gas_price_df

Unnamed: 0,Time,Fuel Region Id,Gas Price
0,2021-01-01 00:00:00,FRBANCSMUD,3.63
1,2021-01-01 01:00:00,FRBANCSMUD,3.63
2,2021-01-01 02:00:00,FRBANCSMUD,3.63
3,2021-01-01 03:00:00,FRBANCSMUD,3.63
4,2021-01-01 04:00:00,FRBANCSMUD,3.63
...,...,...,...
17515,2022-12-31 19:00:00,FRBANCSMUD,15.79
17516,2022-12-31 20:00:00,FRBANCSMUD,15.79
17517,2022-12-31 21:00:00,FRBANCSMUD,15.79
17518,2022-12-31 22:00:00,FRBANCSMUD,15.79


In [14]:
ghg_df = pd.read_csv('GHG_hourly_preprocessed.csv', sep=',')

# rename column
ghg_df = ghg_df.rename(columns={'index': 'Time'})

# convert time (string) to timestamp
ghg_df['Time'] = ghg_df['Time'].apply(pd.Timestamp)

# remove un-used columns
ghg_df = ghg_df.drop(columns=['Unnamed: 0'])

In [15]:
ghg_df

Unnamed: 0,Time,GHG Allowance Price
0,2021-01-01 00:00:00,17.45
1,2021-01-01 01:00:00,17.45
2,2021-01-01 02:00:00,17.45
3,2021-01-01 03:00:00,17.45
4,2021-01-01 04:00:00,17.45
...,...,...
17492,2022-12-30 20:00:00,28.54
17493,2022-12-30 21:00:00,28.54
17494,2022-12-30 22:00:00,28.54
17495,2022-12-30 23:00:00,28.54


In [16]:
lmp_df = pd.read_csv('lmp_df_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
lmp_df['Time'] = lmp_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
lmp_df['Time'] = lmp_df['Time'].dt.tz_localize(None)

# remove un-used columns
lmp_df = lmp_df.drop(columns=['Unnamed: 0', 'Interval Start'])

In [17]:
lmp_df

Unnamed: 0,Time,Market,Location,Location Type,LMP,Energy,Congestion,Loss,LMP_Norm,Energy_Norm,Congestion_Norm,Loss_Norm
0,2021-01-01 00:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,35.19269,34.68627,0.0,0.50642,0.102169,0.029449,0.301568,0.378454
1,2021-01-01 01:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,33.24291,32.70009,0.0,0.54282,0.100805,0.027973,0.301568,0.378942
2,2021-01-01 02:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,34.50767,33.83769,0.0,0.66999,0.101690,0.028818,0.301568,0.380648
3,2021-01-01 03:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,33.77173,33.12253,0.0,0.64920,0.101175,0.028287,0.301568,0.380369
4,2021-01-01 04:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,35.15909,34.40224,0.0,0.75685,0.102145,0.029238,0.301568,0.381814
...,...,...,...,...,...,...,...,...,...,...,...,...
17515,2022-12-31 19:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,144.77129,140.78702,0.0,3.98427,0.178786,0.108320,0.301568,0.425120
17516,2022-12-31 20:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,139.04941,135.76392,0.0,3.28549,0.174785,0.104586,0.301568,0.415743
17517,2022-12-31 21:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,131.07526,128.26624,0.0,2.80903,0.169210,0.099013,0.301568,0.409350
17518,2022-12-31 22:00:00,DAY_AHEAD_HOURLY,CATALYST_7_N002,,123.50517,121.29756,0.0,2.20762,0.163917,0.093832,0.301568,0.401280


In [18]:
solar_local_df = pd.read_csv('solar_local_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
solar_local_df['Time'] = solar_local_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
solar_local_df['Time'] = solar_local_df['Time'].dt.tz_localize(None)

# remove un-used columns
solar_local_df = solar_local_df.drop(columns=['Unnamed: 0'])

# add prefix
solar_local_df = solar_local_df.set_index(['Time']).add_prefix('solar_local_').reset_index()

In [19]:
solar_local_df

Unnamed: 0,Time,solar_local_Curtailment Type,solar_local_Curtailment Reason,solar_local_Fuel Type,solar_local_Curtailment (MWh),solar_local_Curtailment (MW)
0,2021-01-01 07:00:00,Economic,Local,Solar,8.0,41.0
1,2021-01-01 08:00:00,Economic,Local,Solar,15.0,0.0
2,2021-01-01 09:00:00,Economic,Local,Solar,196.0,226.0
3,2021-01-01 10:00:00,Economic,Local,Solar,251.0,322.0
4,2021-01-01 11:00:00,Economic,Local,Solar,263.0,392.0
...,...,...,...,...,...,...
9772,2022-12-30 10:00:00,Economic,Local,Solar,14.0,0.0
9773,2022-12-30 11:00:00,Economic,Local,Solar,10.0,0.0
9774,2022-12-30 12:00:00,Economic,Local,Solar,32.0,163.0
9775,2022-12-30 13:00:00,Economic,Local,Solar,11.0,0.0


In [20]:
solar_system_df = pd.read_csv('solar_system_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
solar_system_df['Time'] = solar_system_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
solar_system_df['Time'] = solar_system_df['Time'].dt.tz_localize(None)

# add prefix
solar_system_df = solar_system_df.set_index(['Time']).add_prefix('solar_system_').reset_index()

In [21]:
solar_system_df

Unnamed: 0,Time,solar_system_Curtailment Type,solar_system_Curtailment Reason,solar_system_Fuel Type,solar_system_Curtailment (MWh),solar_system_Curtailment (MW)
0,2021-01-01 07:00:00,Economic,System,Solar,12.0,0.0
1,2021-01-01 08:00:00,Economic,System,Solar,69.0,274.0
2,2021-01-01 09:00:00,Economic,System,Solar,219.0,658.0
3,2021-01-01 10:00:00,Economic,System,Solar,1.0,0.0
4,2021-01-01 13:00:00,Economic,System,Solar,2.0,0.0
...,...,...,...,...,...,...
4099,2022-12-29 14:00:00,Economic,System,Solar,2.0,22.0
4100,2022-12-30 09:00:00,Economic,System,Solar,29.0,104.0
4101,2022-12-30 10:00:00,Economic,System,Solar,21.0,62.0
4102,2022-12-30 11:00:00,Economic,System,Solar,6.0,55.0


In [22]:
solcast_df = pd.read_csv('Solcast_2021_2022_PST_preprocessed.csv', sep='\t')

# rename column
solcast_df = solcast_df.rename(columns={'PeriodStart': 'Time'})

# add timezone to string
solcast_df['Time'] = solcast_df['Time'].apply(lambda x: x+'+00:00')

# convert time (string) to timestamp
solcast_df['Time'] = solcast_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
solcast_df['Time'] = solcast_df['Time'].dt.tz_localize(None)

# remove un-used columns
solcast_df = solcast_df.drop(columns=['Unnamed: 0', 'Period', 'PeriodEnd'])

In [23]:
solcast_df

Unnamed: 0,Time,AirTemp,Azimuth,CloudOpacity,DewpointTemp,Dhi,Dni,Ebh,Ghi,GtiFixedTilt,...,Ghi_norm,GtiFixedTilt_norm,GtiTracking_norm,PrecipitableWater_norm,RelativeHumidity_norm,SurfacePressure_norm,WindDirection10m_norm,WindSpeed10m_norm,Zenith_norm,AlbedoDaily_norm
0,2021-01-01 00:00:00,6.1,-17,0.0,3.6,0,0,0,0,0,...,0.0,0.0,0.0,0.222488,0.829348,0.624161,0.011111,0.185185,1.000000,0.75
1,2021-01-01 01:00:00,5.6,-55,0.0,3.2,0,0,0,0,0,...,0.0,0.0,0.0,0.229665,0.831522,0.624161,0.066667,0.148148,0.953333,0.50
2,2021-01-01 02:00:00,5.6,-74,0.0,3.1,0,0,0,0,0,...,0.0,0.0,0.0,0.241627,0.825000,0.627517,0.166667,0.148148,0.880000,0.50
3,2021-01-01 03:00:00,4.9,-86,0.0,2.4,0,0,0,0,0,...,0.0,0.0,0.0,0.255981,0.825000,0.624161,0.208333,0.172840,0.800000,0.50
4,2021-01-01 04:00:00,4.0,-95,0.0,1.6,0,0,0,0,0,...,0.0,0.0,0.0,0.265550,0.827174,0.617450,0.227778,0.160494,0.720000,0.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17515,2022-12-31 19:00:00,9.2,98,71.8,7.4,0,0,0,0,0,...,0.0,0.0,0.0,0.392344,0.875000,0.127517,0.863889,0.543210,0.693333,0.00
17516,2022-12-31 20:00:00,9.4,89,56.6,7.6,0,0,0,0,0,...,0.0,0.0,0.0,0.349282,0.878261,0.144295,0.850000,0.530864,0.773333,0.00
17517,2022-12-31 21:00:00,9.5,79,24.8,7.8,0,0,0,0,0,...,0.0,0.0,0.0,0.299043,0.888043,0.161074,0.844444,0.555556,0.853333,0.00
17518,2022-12-31 22:00:00,9.5,63,14.6,7.9,0,0,0,0,0,...,0.0,0.0,0.0,0.260766,0.884783,0.174497,0.836111,0.567901,0.926667,0.00


In [24]:
wind_local_df = pd.read_csv('wind_local_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
wind_local_df['Time'] = wind_local_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
wind_local_df['Time'] = wind_local_df['Time'].dt.tz_localize(None)

# remove un-used columns
wind_local_df = wind_local_df.drop(columns=['Unnamed: 0'])

# add prefix
wind_local_df = wind_local_df.set_index(['Time']).add_prefix('wind_local_').reset_index()

In [25]:
wind_local_df

Unnamed: 0,Time,wind_local_Curtailment Type,wind_local_Curtailment Reason,wind_local_Fuel Type,wind_local_Curtailment (MWh),wind_local_Curtailment (MW)
0,2021-01-01 08:00:00,Economic,Local,Wind,2.0,0.0
1,2021-01-01 09:00:00,Economic,Local,Wind,6.0,0.0
2,2021-01-01 10:00:00,Economic,Local,Wind,3.0,7.0
3,2021-01-01 11:00:00,Economic,Local,Wind,0.0,0.0
4,2021-01-01 12:00:00,Economic,Local,Wind,1.0,0.0
...,...,...,...,...,...,...
4300,2022-12-30 13:00:00,Economic,Local,Wind,10.0,74.0
4301,2022-12-30 14:00:00,Economic,Local,Wind,4.0,4.0
4302,2022-12-30 15:00:00,Economic,Local,Wind,0.0,0.0
4303,2022-12-30 22:00:00,Economic,Local,Wind,1.0,6.0


In [26]:
wind_system_df = pd.read_csv('wind_system_preprocessed.csv', sep='\t')

# convert time (string) to timestamp
wind_system_df['Time'] = wind_system_df['Time'].apply(lambda x: pd.Timestamp(x, tz='US/Pacific'))

# remove timezone
wind_system_df['Time'] = wind_system_df['Time'].dt.tz_localize(None)

# remove un-used columns
wind_system_df = wind_system_df.drop(columns=['Unnamed: 0'])

# add prefix
wind_system_df = wind_system_df.set_index(['Time']).add_prefix('wind_system_').reset_index()

In [27]:
wind_system_df

Unnamed: 0,Time,wind_system_Curtailment Type,wind_system_Curtailment Reason,wind_system_Fuel Type,wind_system_Curtailment (MWh),wind_system_Curtailment (MW)
0,2021-01-01 08:00:00,Economic,System,Wind,5.0,15.0
1,2021-01-01 09:00:00,Economic,System,Wind,9.0,2.0
2,2021-01-01 10:00:00,Economic,System,Wind,2.0,0.0
3,2021-01-01 11:00:00,Economic,System,Wind,0.0,0.0
4,2021-01-02 08:00:00,Economic,System,Wind,11.0,0.0
...,...,...,...,...,...,...
1747,2022-12-30 11:00:00,Economic,System,Wind,1.0,2.0
1748,2022-12-30 14:00:00,Economic,System,Wind,1.0,0.0
1749,2022-12-30 15:00:00,Economic,System,Wind,7.0,76.0
1750,2022-12-30 22:00:00,Economic,System,Wind,0.0,0.0


#### Merge

In [28]:
df = pd.merge(pv_sim_df, load_df, on='Time', how='left')
df = pd.merge(df, get_as_prices_df, on='Time', how='left')
df = pd.merge(df, get_load_forecast_df, on='Time', how='left')
df = pd.merge(df, fuel_mix_df, on='Time', how='left')
df = pd.merge(df, gas_price_df, on='Time', how='left')
df = pd.merge(df, ghg_df, on='Time', how='left')
df = pd.merge(df, lmp_df, on='Time', how='left')
df = pd.merge(df, solar_local_df, on='Time', how='left')
df = pd.merge(df, solar_system_df, on='Time', how='left')
df = pd.merge(df, wind_local_df, on='Time', how='left')
df = pd.merge(df, wind_system_df, on='Time', how='left')

# remove row with duplicate time
df = df.drop_duplicates(subset=['Time'], keep='last').reset_index(drop=True)

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17520 entries, 0 to 17519
Data columns (total 59 columns):
 #   Column                              Non-Null Count  Dtype         
---  ------                              --------------  -----         
 0   Time                                17520 non-null  datetime64[ns]
 1   PV_kwAC_out                         17520 non-null  float64       
 2   PV_kwAC_out_Norm                    17520 non-null  float64       
 3   Load                                17494 non-null  float64       
 4   Non-Spinning Reserves Total (MW)    17494 non-null  float64       
 5   Regulation Down Total (MW)          17494 non-null  float64       
 6   Regulation Mileage Down Total (MW)  17494 non-null  float64       
 7   Regulation Mileage Up Total (MW)    17494 non-null  float64       
 8   Regulation Up Total (MW)            17494 non-null  float64       
 9   Total Volume(MW)                    17494 non-null  float64       
 10  Load Forecast         

In [30]:
df.describe()

Unnamed: 0,PV_kwAC_out,PV_kwAC_out_Norm,Load,Non-Spinning Reserves Total (MW),Regulation Down Total (MW),Regulation Mileage Down Total (MW),Regulation Mileage Up Total (MW),Regulation Up Total (MW),Total Volume(MW),Load Forecast,...,Congestion_Norm,Loss_Norm,solar_local_Curtailment (MWh),solar_local_Curtailment (MW),solar_system_Curtailment (MWh),solar_system_Curtailment (MW),wind_local_Curtailment (MWh),wind_local_Curtailment (MW),wind_system_Curtailment (MWh),wind_system_Curtailment (MW)
count,17520.0,17520.0,17494.0,17494.0,17494.0,17494.0,17494.0,17494.0,17494.0,17518.0,...,17518.0,17518.0,7727.0,7727.0,4104.0,4104.0,4195.0,4195.0,1752.0,1752.0
mean,0.574945,0.228126,299650.828741,906.853552,606.315599,0.0,0.0,373.473852,1886.643003,24981.43761,...,0.303939,0.392843,227.189077,319.335991,172.977851,260.891618,35.716591,51.770536,29.337329,47.381279
std,0.778018,0.308701,60179.231696,179.469883,264.772515,0.0,0.0,138.188237,385.898862,5089.179126,...,0.016796,0.032775,548.149108,678.401104,602.007385,757.928876,64.904869,85.06937,72.796391,97.309269
min,0.0,0.0,166604.0,409.57,110.83,0.0,0.0,42.84,909.74,14616.43,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,256967.25,761.3025,410.89,0.0,0.0,289.89,1612.4575,21365.1,...,0.301568,0.379088,3.0,6.0,1.0,0.0,2.0,2.0,1.0,0.0
50%,0.0029,0.001151,287534.0,885.075,563.02,0.0,0.0,330.0,1865.135,23920.665,...,0.301568,0.389708,28.0,53.0,5.0,6.0,8.0,14.0,6.0,12.0
75%,1.159175,0.459935,323496.0,1054.6675,770.0,0.0,0.0,445.1325,2129.265,27022.955,...,0.302067,0.401471,154.0,258.0,29.0,70.25,38.0,62.0,24.0,42.0
max,2.5203,1.0,613160.0,1625.72,1800.0,0.0,0.0,1412.56,3719.09,51317.22,...,1.0,1.0,5268.0,5556.0,6980.0,7198.0,615.0,728.0,735.0,797.0


In [31]:
df

Unnamed: 0,Time,PV_kwAC_out,PV_kwAC_out_Norm,Load,Non-Spinning Reserves Total (MW),Regulation Down Total (MW),Regulation Mileage Down Total (MW),Regulation Mileage Up Total (MW),Regulation Up Total (MW),Total Volume(MW),...,wind_local_Curtailment Type,wind_local_Curtailment Reason,wind_local_Fuel Type,wind_local_Curtailment (MWh),wind_local_Curtailment (MW),wind_system_Curtailment Type,wind_system_Curtailment Reason,wind_system_Fuel Type,wind_system_Curtailment (MWh),wind_system_Curtailment (MW)
0,2021-01-01 00:00:00,0.0,0.0,258190.0,1108.5,459.89,0.0,0.0,230.76,1799.15,...,,,,,,,,,,
1,2021-01-01 01:00:00,0.0,0.0,247591.0,1086.0,429.89,0.0,0.0,226.26,1742.15,...,,,,,,,,,,
2,2021-01-01 02:00:00,0.0,0.0,239411.0,1096.5,409.89,0.0,0.0,223.74,1730.13,...,,,,,,,,,,
3,2021-01-01 03:00:00,0.0,0.0,235442.0,1100.5,389.89,0.0,0.0,226.74,1717.13,...,,,,,,,,,,
4,2021-01-01 04:00:00,0.0,0.0,234975.0,1062.5,399.89,0.0,0.0,240.14,1702.53,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17515,2022-12-31 19:00:00,0.0,0.0,,,,,,,,...,,,,,,,,,,
17516,2022-12-31 20:00:00,0.0,0.0,,,,,,,,...,,,,,,,,,,
17517,2022-12-31 21:00:00,0.0,0.0,,,,,,,,...,,,,,,,,,,
17518,2022-12-31 22:00:00,0.0,0.0,,,,,,,,...,,,,,,,,,,


In [32]:
df.to_csv('data_processed.csv', index=False)