In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
df_2018 = pd.read_excel("System2018DKK.xlsx")
df_2019 = pd.read_excel("System2019DKK.xlsx")
df_2020 = pd.read_excel("System2020DKK.xlsx")
df_2021 = pd.read_excel("System2021DKK.xlsx")
df_2022 = pd.read_excel("System2022DKK.xlsx")
df_2023 = pd.read_excel("System2023DKK.xlsx")

In [4]:

# Function to transform the DataFrame
def transform_df(df):
    # Rename the first column to 'Date' for clarity
    df.rename(columns={df.columns[0]: 'Date'}, inplace=True)

    # Melt the DataFrame to go from wide to long format, keeping the date and transforming hour columns
    df_long = df.melt(id_vars=['Date'], var_name='Hour', value_name='SystemPrice')

    # Adjust the 'Hour' column to reflect actual hours ('1' becomes '00:00', '24' becomes '23:00')
    df_long['Hour'] = df_long['Hour'].astype(int) - 1  # Subtract 1 to align with 0-23 hour format
    df_long['Hour'] = df_long['Hour'].apply(lambda x: f'{x:02d}:00')  # Format as HH:MM

    # Combine 'Date' and 'Hour' into a single 'DateTime' column
    df_long['DateTime'] = pd.to_datetime(df_long['Date'].astype(str) + ' ' + df_long['Hour'])

    # Select only the 'DateTime' and 'SystemPrice' columns for the final output
    df_final = df_long[['DateTime', 'SystemPrice']]
    
    return df_final

# Read in the Excel files
df_2018 = pd.read_excel("System2018DKK.xlsx")
df_2019 = pd.read_excel("System2019DKK.xlsx")
df_2020 = pd.read_excel("System2020DKK.xlsx")
df_2021 = pd.read_excel("System2021DKK.xlsx")
df_2022 = pd.read_excel("System2022DKK.xlsx")
df_2023 = pd.read_excel("System2023DKK.xlsx")

# Apply the transformation to each DataFrame
dfs_transformed = {
    2018: transform_df(df_2018),
    2019: transform_df(df_2019),
    2020: transform_df(df_2020),
    2021: transform_df(df_2021),
    2022: transform_df(df_2022),
    2023: transform_df(df_2023),
}

# Example to display the transformed data for the first day of 2018 to confirm correct formatting
first_day_2018 = dfs_transformed[2018]['DateTime'].dt.date.min()
df_first_day_2018 = dfs_transformed[2018][dfs_transformed[2018]['DateTime'].dt.date == first_day_2018]
print(df_first_day_2018)


                DateTime  SystemPrice
0    2018-01-01 00:00:00       195.85
365  2018-01-01 01:00:00       196.74
730  2018-01-01 02:00:00       194.21
1095 2018-01-01 03:00:00       183.34
1460 2018-01-01 04:00:00       183.64
1825 2018-01-01 05:00:00       180.44
2190 2018-01-01 06:00:00       179.99
2555 2018-01-01 07:00:00       180.81
2920 2018-01-01 08:00:00       179.77
3285 2018-01-01 09:00:00       180.59
3650 2018-01-01 10:00:00       192.35
4015 2018-01-01 11:00:00       193.99
4380 2018-01-01 12:00:00       192.50
4745 2018-01-01 13:00:00       193.24
5110 2018-01-01 14:00:00       193.76
5475 2018-01-01 15:00:00       196.74
5840 2018-01-01 16:00:00       201.13
6205 2018-01-01 17:00:00       204.19
6570 2018-01-01 18:00:00       203.22
6935 2018-01-01 19:00:00       200.76
7300 2018-01-01 20:00:00       198.68
7665 2018-01-01 21:00:00       196.89
8030 2018-01-01 22:00:00       194.06
8395 2018-01-01 23:00:00       186.25


In [5]:
df_2018_transformed = dfs_transformed[2018]
df_2019_transformed = dfs_transformed[2019]
df_2020_transformed = dfs_transformed[2020]
df_2021_transformed = dfs_transformed[2021]
df_2022_transformed = dfs_transformed[2022]
df_2023_transformed = dfs_transformed[2023]


In [6]:
df_2018_transformed

Unnamed: 0,DateTime,SystemPrice
0,2018-01-01 00:00:00,195.85
1,2018-01-02 00:00:00,191.61
2,2018-01-03 00:00:00,201.35
3,2018-01-04 00:00:00,201.36
4,2018-01-05 00:00:00,207.57
...,...,...
8755,2018-12-27 23:00:00,371.68
8756,2018-12-28 23:00:00,374.40
8757,2018-12-29 23:00:00,352.75
8758,2018-12-30 23:00:00,390.32


In [7]:
df_2018_transformed.set_index("DateTime", inplace=True)
df_2018_transformed.sort_index(inplace=True)

In [8]:
df_2018_transformed.head(24)

Unnamed: 0_level_0,SystemPrice
DateTime,Unnamed: 1_level_1
2018-01-01 00:00:00,195.85
2018-01-01 01:00:00,196.74
2018-01-01 02:00:00,194.21
2018-01-01 03:00:00,183.34
2018-01-01 04:00:00,183.64
2018-01-01 05:00:00,180.44
2018-01-01 06:00:00,179.99
2018-01-01 07:00:00,180.81
2018-01-01 08:00:00,179.77
2018-01-01 09:00:00,180.59


In [9]:
# Assuming df_2019_transformed to df_2023_transformed are already defined

df_list = [df_2019_transformed, df_2020_transformed, df_2021_transformed, df_2022_transformed, df_2023_transformed]

for df in df_list:
    df.set_index("DateTime", inplace=True)
    df.sort_index(inplace=True)


In [10]:
df_2023_transformed.tail(24)

Unnamed: 0_level_0,SystemPrice
DateTime,Unnamed: 1_level_1
2023-12-31 00:00:00,374.68
2023-12-31 01:00:00,350.67
2023-12-31 02:00:00,340.76
2023-12-31 03:00:00,325.7
2023-12-31 04:00:00,303.41
2023-12-31 05:00:00,305.79
2023-12-31 06:00:00,301.55
2023-12-31 07:00:00,332.41
2023-12-31 08:00:00,358.72
2023-12-31 09:00:00,382.35


In [11]:
average_exchange_rate_2018 = 0.1341
average_exchange_rate_2019 = 0.1339
average_exchange_rate_2020 = 0.1341
average_exchange_rate_2021 = 0.1345
average_exchange_rate_2022 = 0.1344
average_exchange_rate_2023 = 0.1341

# Convert DKK to EUR for the systemPrice in each transformed DataFrame
df_2018_transformed["SystemPrice"] = df_2018_transformed["SystemPrice"] * average_exchange_rate_2018
df_2019_transformed["SystemPrice"] = df_2019_transformed["SystemPrice"] * average_exchange_rate_2019
df_2020_transformed["SystemPrice"] = df_2020_transformed["SystemPrice"] * average_exchange_rate_2020
df_2021_transformed["SystemPrice"] = df_2021_transformed["SystemPrice"] * average_exchange_rate_2021
df_2022_transformed["SystemPrice"] = df_2022_transformed["SystemPrice"] * average_exchange_rate_2022
df_2023_transformed["SystemPrice"] = df_2023_transformed["SystemPrice"] * average_exchange_rate_2023


In [12]:
# List of your DataFrames
dfs = [df_2018_transformed, df_2019_transformed, df_2020_transformed,
       df_2021_transformed, df_2022_transformed, df_2023_transformed]



# Combine all the DataFrames into one
df_combined = pd.concat(dfs)


In [13]:
df_combined

Unnamed: 0_level_0,SystemPrice
DateTime,Unnamed: 1_level_1
2018-01-01 00:00:00,26.263485
2018-01-01 01:00:00,26.382834
2018-01-01 02:00:00,26.043561
2018-01-01 03:00:00,24.585894
2018-01-01 04:00:00,24.626124
...,...
2023-12-31 19:00:00,49.594203
2023-12-31 20:00:00,44.985186
2023-12-31 21:00:00,44.755875
2023-12-31 22:00:00,43.986141


In [15]:
df_combined.isna().sum()

SystemPrice    6
dtype: int64