In [29]:
import pandas as pd

In [30]:
# Import the original data, data_NO2
data_NO2 = pd.read_csv('data_NO2.csv', delimiter=';')
data_NO2.head()


Unnamed: 0,datetime_utc,volume_demand,volume_production,spot_price
0,2015-12-31 23:00:00+00:00,4043.0,7818.0,16.39
1,2016-01-01 00:00:00+00:00,4010.0,7694.0,16.04
2,2016-01-01 01:00:00+00:00,3930.0,7437.0,15.74
3,2016-01-01 02:00:00+00:00,3839.0,7120.0,15.57
4,2016-01-01 03:00:00+00:00,3833.0,7041.0,15.47


In [31]:
# Import weather data from Kristiansand
kristiansand_df = pd.read_csv('kristiansand.csv')
kristiansand_df.head()

Unnamed: 0,time,temperature_2m (°C),apparent_temperature (°C)
0,2015-12-31T23:00,6.3,3.3
1,2016-01-01T00:00,6.1,3.4
2,2016-01-01T01:00,6.3,3.3
3,2016-01-01T02:00,6.5,3.0
4,2016-01-01T03:00,6.7,2.8


In [32]:
# Import weather data from Stavanger
stavanger_df = pd.read_csv('stavanger.csv')
stavanger_df.head()

Unnamed: 0,time,temperature_2m (°C),apparent_temperature (°C)
0,2015-12-31T23:00,7.3,1.4
1,2016-01-01T00:00,6.9,0.9
2,2016-01-01T01:00,7.0,0.9
3,2016-01-01T02:00,7.4,1.1
4,2016-01-01T03:00,8.0,1.8


In [33]:
# Convert the time columns to datatime
data_NO2['datetime_utc'] = pd.to_datetime(data_NO2['datetime_utc']).dt.tz_localize(None)
kristiansand_df['time'] = pd.to_datetime(kristiansand_df['time'])
stavanger_df['time'] = pd.to_datetime(stavanger_df['time'])

In [34]:
# Standardize column names for merging
data_NO2 = data_NO2.rename(columns={'datetime_utc': 'datetime'})
kristiansand_df = kristiansand_df.rename(columns={'time': 'datetime', 'temperature_2m (°C)': 'krs_temp_2m', 'apparent_temperature (°C)': 'krs_app_temp'})
stavanger_df = stavanger_df.rename(columns={'time': 'datetime', 'temperature_2m (°C)': 'stv_temp_2m', 'apparent_temperature (°C)': 'stv_app_temp'})

In [35]:
# Merge the DataFrames
merged_df = pd.merge(data_NO2, kristiansand_df, on='datetime', how='inner')
merged_df = pd.merge(merged_df, stavanger_df, on='datetime', how='inner')

merged_df.head()

Unnamed: 0,datetime,volume_demand,volume_production,spot_price,krs_temp_2m,krs_app_temp,stv_temp_2m,stv_app_temp
0,2015-12-31 23:00:00,4043.0,7818.0,16.39,6.3,3.3,7.3,1.4
1,2016-01-01 00:00:00,4010.0,7694.0,16.04,6.1,3.4,6.9,0.9
2,2016-01-01 01:00:00,3930.0,7437.0,15.74,6.3,3.3,7.0,0.9
3,2016-01-01 02:00:00,3839.0,7120.0,15.57,6.5,3.0,7.4,1.1
4,2016-01-01 03:00:00,3833.0,7041.0,15.47,6.7,2.8,8.0,1.8


In [36]:
merged_df.shape

(23666, 8)

In [37]:
# Import gas prices
gas_price_df = pd.read_csv('gas_price.csv')
gas_price_df.head()

Unnamed: 0,Day,Natural Gas Price
0,09/13/2018,2.94
1,09/12/2018,2.98
2,09/11/2018,2.94
3,09/10/2018,2.9
4,09/7/2018,2.88


In [38]:
# Convert the Day column in gas_price_df to datetime
gas_price_df['Day'] = pd.to_datetime(gas_price_df['Day'])

# Convert data column in merged_df to date format (drop time) for merging
merged_df['date_only'] = pd.to_datetime(merged_df['datetime']).dt.date
gas_price_df['Day'] = gas_price_df['Day'].dt.date

# Merge the gas price data with the merged_df based on date
merged_df = pd.merge(merged_df, gas_price_df, left_on='date_only', right_on='Day', how='left')

merged_df.head()

Unnamed: 0,datetime,volume_demand,volume_production,spot_price,krs_temp_2m,krs_app_temp,stv_temp_2m,stv_app_temp,date_only,Day,Natural Gas Price
0,2015-12-31 23:00:00,4043.0,7818.0,16.39,6.3,3.3,7.3,1.4,2015-12-31,2015-12-31,2.28
1,2016-01-01 00:00:00,4010.0,7694.0,16.04,6.1,3.4,6.9,0.9,2016-01-01,2016-01-01,2.28
2,2016-01-01 01:00:00,3930.0,7437.0,15.74,6.3,3.3,7.0,0.9,2016-01-01,2016-01-01,2.28
3,2016-01-01 02:00:00,3839.0,7120.0,15.57,6.5,3.0,7.4,1.1,2016-01-01,2016-01-01,2.28
4,2016-01-01 03:00:00,3833.0,7041.0,15.47,6.7,2.8,8.0,1.8,2016-01-01,2016-01-01,2.28


In [39]:
# Drop unwanted columns
merged_df = merged_df.drop(columns=['date_only', 'Day'])

# Rename columns to fit the standard
merged_df = merged_df.rename(columns={'Natural Gas Price': 'gas_price'})

In [40]:
# Shift the gas price column by 24 hours
merged_df['gas_price'] = merged_df['gas_price'].shift(24)

# Set the first 24 rows of the gas price column to the 25th value (this avoids chained assignment)
merged_df.loc[:23, 'gas_price'] = merged_df['gas_price'].iloc[24]

# Fill gas prices forwards to fill weekends and none trading hours
merged_df['gas_price'] = merged_df['gas_price'].fillna(method='ffill')

  merged_df['gas_price'] = merged_df['gas_price'].fillna(method='ffill')


In [41]:
# Save the updated DataFrame to a CSV file
output_path = 'data_NO2_boosted.csv'
merged_df.to_csv(output_path, index=False)

print(f"Data saved to {output_path}")


Data saved to data_NO2_boosted.csv
