In [1]:
import numpy as np
import pandas as pd
import pytz

from dateutil.relativedelta import relativedelta

from pathlib import Path

In [2]:
DATA_DIR = Path('../../../data/assignment1')

In [3]:
# TODO: find out whether the datetime object of the power production should be associated to StartTimeUTC or EndTimeUTC: 
### both for the actual power and for the day ahead prices


# Read csv-file
actual_wind_power = pd.read_csv(DATA_DIR / 'raw/Actual wind power.csv', sep=';')
# Parse datetime by combining date and hour information
actual_wind_power['StartTimeUTC'] = pd.to_datetime(actual_wind_power['Date'] + ' ' + actual_wind_power['Time'], format='mixed')
# Add timezone to datetime element
actual_wind_power['StartTimeUTC'] = actual_wind_power.StartTimeUTC.dt.tz_localize(pytz.UTC) 

# Assume that the timestamp is from DK (since it starts at hour 0 of 2021 which the data from the other files does in DK time.
# For this reason we adjust the timeseries and express everything in terms of UTC timestamps 
actual_wind_power.StartTimeUTC - pd.to_timedelta('2 hours')

# Get rid of redundant columns
actual_wind_power = actual_wind_power.drop(columns=['Date', 'Time'])

actual_wind_power.head()

Unnamed: 0,Actual,StartTimeUTC
0,0.290185,2021-01-01 00:00:00+00:00
1,0.25889,2021-01-01 01:00:00+00:00
2,0.234708,2021-01-01 02:00:00+00:00
3,0.223329,2021-01-01 03:00:00+00:00
4,0.172119,2021-01-01 04:00:00+00:00


In [4]:
# Load information about up- and down-regulation prices for both 2021 and 2022
for year_idx, year in enumerate([2021, 2022]):
    for i, filename in enumerate([f'Down-regulation price_{year}.csv', f'Up-regulation price_{year}.csv']):
        # Determine filetype
        price_type = filename.split('-')[0]
        price_column_name = 'Up-regulating' if price_type == 'Up' else 'Down-regulation' 
        price_column_name = f'"{price_column_name} price in the Balancing energy market"""'
        
        # Read csv-file as temporary dataframe
        df_price_ = pd.read_csv(DATA_DIR / f'raw/{filename}', sep=',"', engine='python')
    
        # Handle encoding with quotation marks
        df_price_['StartTimeUTC'] = pd.to_datetime(df_price_['"Start time UTC'].str.strip('"')).dt.tz_localize(pytz.UTC)
        df_price_['EndTimeUTC'] = pd.to_datetime(df_price_['"End time UTC""'].str.strip('"')).dt.tz_localize(pytz.UTC)
        df_price_[f'BalancingMarketPrice_{price_type}Reg'] = df_price_[price_column_name].str.strip('"')
    
        # Restrict data to relevant information - Danish timezone is implicitly contained in UTC timestamp
        df_price_ = df_price_[['StartTimeUTC', 'EndTimeUTC', f'BalancingMarketPrice_{price_type}Reg']]

        prices_ = df_price_ if i == 0 else prices_.merge(df_price_, on=['StartTimeUTC', 'EndTimeUTC'], how='outer')
        
    # Merge prices from year with currently stored price information into combined dataframe
    balancing_prices = prices_ if year_idx == 0 else pd.concat([balancing_prices, prices_], axis=0).reset_index(drop=True)

# Remove variables used for temporarily storing information
del df_price_
del prices_ 

balancing_prices.head()

Unnamed: 0,StartTimeUTC,EndTimeUTC,BalancingMarketPrice_DownReg,BalancingMarketPrice_UpReg
0,2020-12-31 22:00:00+00:00,2020-12-31 23:00:00+00:00,18.5,24.07
1,2020-12-31 23:00:00+00:00,2021-01-01 00:00:00+00:00,18.5,24.95
2,2021-01-01 00:00:00+00:00,2021-01-01 01:00:00+00:00,24.35,24.35
3,2021-01-01 01:00:00+00:00,2021-01-01 02:00:00+00:00,23.98,23.98
4,2021-01-01 02:00:00+00:00,2021-01-01 03:00:00+00:00,23.72,23.72


In [None]:
# TODO: find out whether the datetime object of the power production should be associated to StartTimeUTC or EndTimeUTC: 
### both for the actual power and for the day ahead prices


# Read day ahead prices from excel sheet
day_ahead_prices = pd.read_excel(DATA_DIR / 'raw/Day-ahead price.xlsx')

# Represent time as datetime object
day_ahead_prices['StartTimeUTC'] = pd.to_datetime(day_ahead_prices['HourUTC']).dt.tz_localize(pytz.UTC)

# Get rid of redundant information
day_ahead_prices = day_ahead_prices[['StartTimeUTC', 'PriceArea', 'SpotPriceDKK', 'SpotPriceEUR']]

day_ahead_prices.head()