In [1]:
from eptr2 import EPTR2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
def convert_to_dataframe(data):
    items = data['items']
    
    df = pd.DataFrame(items)
    
    df['date'] = pd.to_datetime(df['date'])
    
    return df

In [3]:
day_ahead_prices = pd.read_csv("MCP_PREDICTIONS.csv")
day_ahead_prices = day_ahead_prices.iloc[:-24]
day_ahead_prices

Unnamed: 0,DateTime,renewable_ratio,priceEur,Difference_24,Difference_168,Workday,Workday_168,Workday_24,forecast,BV Forecast,price_lag_24,price_lag_168,forecasted_price_eur
0,2021-01-14T00:00:00Z,0.040737,35.26,1.16,2.17,1,1,1,27.40,4187.97,34.10,33.09,46.060030
1,2021-01-14T01:00:00Z,0.045864,35.13,4.79,0.94,1,1,1,30.10,3975.82,30.34,34.19,44.125388
2,2021-01-14T02:00:00Z,0.051034,34.87,2.32,5.17,1,1,1,37.30,3885.02,32.55,29.70,42.345044
3,2021-01-14T03:00:00Z,0.059953,34.58,7.66,5.35,1,1,1,41.00,3589.79,26.92,29.23,38.267237
4,2021-01-14T04:00:00Z,0.062077,34.57,6.45,7.32,1,1,1,52.60,3137.07,28.12,27.25,37.610058
...,...,...,...,...,...,...,...,...,...,...,...,...,...
28723,2024-04-24T19:00:00Z,0.073455,77.99,0.01,-0.39,1,1,0,98.48,3706.28,77.98,78.38,89.920105
28724,2024-04-24T20:00:00Z,0.073878,77.99,0.01,-0.39,1,1,0,78.06,3386.47,77.98,78.38,89.864853
28725,2024-04-24T21:00:00Z,0.075954,77.99,0.01,1.09,1,1,0,72.15,3207.80,77.98,76.90,88.881698
28726,2024-04-24T22:00:00Z,0.076030,77.99,9.63,20.95,1,1,0,63.76,3123.62,68.36,57.04,74.717507


In [4]:
day_ahead_prices['DateTime'] = pd.to_datetime(day_ahead_prices['DateTime'])
day_ahead_prices = day_ahead_prices.rename(columns={'DateTime': 'date'})

train_day_ahead = day_ahead_prices[day_ahead_prices['date'] < '2023-04-25']
test_day_ahead = day_ahead_prices[day_ahead_prices['date'] >= '2023-04-25']

train_day_ahead['date'] = train_day_ahead['date'].dt.tz_localize(None)
test_day_ahead['date'] = test_day_ahead['date'].dt.tz_localize(None)

train_day_ahead.reset_index(drop=True, inplace=True)
test_day_ahead.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_day_ahead['date'] = train_day_ahead['date'].dt.tz_localize(None)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_day_ahead['date'] = test_day_ahead['date'].dt.tz_localize(None)


In [5]:
train_start_date = train_day_ahead['date'].min().strftime('%Y-%m-%d')
train_end_date = train_day_ahead['date'].max().strftime('%Y-%m-%d')

test_start_date = test_day_ahead['date'].min().strftime('%Y-%m-%d')
test_end_date = test_day_ahead['date'].max().strftime('%Y-%m-%d')

print(f"Train start Date: {train_start_date}")
print(f"Train end Date: {train_end_date}")

print(f"Test start Date: {test_start_date}")
print(f"Test end Date: {test_end_date}")

Train start Date: 2021-01-14
Train end Date: 2023-04-24
Test start Date: 2023-04-25
Test end Date: 2024-04-24


In [6]:
train_start_date = pd.to_datetime(train_start_date)
train_end_date = pd.to_datetime(train_end_date)
test_start_date = pd.to_datetime(test_start_date)
test_end_date = pd.to_datetime(test_end_date)

### intra day

In [7]:
def calculate_hourly_quantiles_and_cumulative_lots(data, day, price_column, quantity_column):
    """
    Calculate quantiles and cumulative quantities for each hour of the day.

    Parameters:
    data (pd.DataFrame): DataFrame containing the market data.
    price_column (str): Column name in the DataFrame for the price data.
    quantity_column (str): Column name in the DataFrame for the quantity data.

    Returns:
    pd.DataFrame: A DataFrame containing the hourly quantiles, corresponding prices, and cumulative quantities.
    """
    data['hour'] = pd.Categorical(data['hour'], categories=[f"{hour:02d}:00" for hour in range(24)], ordered=True)

    grouped = data.groupby('hour')

    results = []
    for hour, group in grouped:
        price_quantiles = group[price_column].quantile([0.25, 0.50, 0.75]).dropna()  # Ensure to drop NAs after calculation

        cumulative_lots_quantiles = {}
        for quantile, threshold_price in price_quantiles.items():
            cumulative_lots = group[group[price_column] >= threshold_price][quantity_column].sum()
            cumulative_lots_quantiles[quantile] = cumulative_lots

        quantile_data = pd.DataFrame({
            'Date': day,
            'Hour': hour,
            'Quantile': price_quantiles.index,
            'Price': price_quantiles.values,
            'Cumulative Lots': [cumulative_lots_quantiles[q] for q in price_quantiles.index]
        })
        results.append(quantile_data)

    return pd.concat(results, ignore_index=True) if results else pd.DataFrame()


In [8]:
def calculate_quantiles(df):
    grouped = df.groupby(['date', 'hour'])
    
    results = []
    
    for (date, hour), group in grouped:
        group = group.sort_values(by='price')
        group['cumulative_quantity'] = group['quantity'].cumsum()
        
        total_quantity = group['cumulative_quantity'].iloc[-1]
        
        target_quantiles = [0.25, 0.5, 0.75]
        targets = [total_quantity * q for q in target_quantiles]
        
        for quantile, target in zip(target_quantiles, targets):
            matched_row = group[group['cumulative_quantity'] >= target].iloc[0]
            price_at_quantile = matched_row['price']
            cumulative_lots_at_quantile = total_quantity - matched_row['cumulative_quantity']
            
            results.append({
                'date': date,
                'hour': hour,
                'price': price_at_quantile,
                'quantile': quantile,
                'cumulative_lots': cumulative_lots_at_quantile
            })
    
    return pd.DataFrame(results)

In [9]:
eptr = EPTR2(
    ssl_verify=True,
    postprocess=False, 
    get_raw_response=False
)

In [10]:
all_intra_day_data = pd.read_csv("all_intraday_data.csv")
all_intra_day_data.head()

Unnamed: 0,contract_date,contract_hour,contractName,price,quantity,id,prefix,date,hour
0,2021-01-14,00:00,PH21011402,289.98,2,172045281,PH,2021-01-14,02:00
1,2021-01-14,00:00,PH21011402,289.99,20,172045519,PH,2021-01-14,02:00
2,2021-01-14,00:00,PH21011407,269.0,7,172046058,PH,2021-01-14,07:00
3,2021-01-14,00:00,PH21011404,275.0,100,172046087,PH,2021-01-14,04:00
4,2021-01-14,00:00,PH21011408,279.99,20,172045984,PH,2021-01-14,08:00


In [11]:
all_intra_day_data['date'] = pd.to_datetime(all_intra_day_data['date'])

In [12]:
all_intra_day_data = all_intra_day_data.drop('contract_date', axis=1)
all_intra_day_data = all_intra_day_data.drop('contract_hour', axis=1)
all_intra_day_data = all_intra_day_data.drop('id', axis=1)

In [13]:
all_quantiles_data = calculate_quantiles(all_intra_day_data)
print(all_quantiles_data.shape[0])
all_quantiles_data.head(10)

86247


Unnamed: 0,date,hour,price,quantile,cumulative_lots
0,2021-01-14,02:00,288.99,0.25,1417
1,2021-01-14,02:00,290.0,0.5,0
2,2021-01-14,02:00,290.0,0.75,0
3,2021-01-14,03:00,270.0,0.25,911
4,2021-01-14,03:00,274.98,0.5,618
5,2021-01-14,03:00,278.48,0.75,293
6,2021-01-14,04:00,286.98,0.25,3315
7,2021-01-14,04:00,287.99,0.5,2207
8,2021-01-14,04:00,288.85,0.75,1064
9,2021-01-14,05:00,277.3,0.25,3328


### Production + Day Ahead + Exchange

In [14]:
production_test =pd.read_csv('AlgopolyPerformance-PerformanceReport_2024-04-25_06-16-54.csv')
start_date = str(test_start_date)
end_date = str(test_end_date)

production_test = production_test[(production_test['time'] >= start_date) & (production_test['time'] <= end_date)]
production_test = production_test[(production_test['eic'] == '40W000000010903W')]

production_test = production_test[['time', 'forecast']]

production_test['time'] = pd.to_datetime(production_test['time'])

production_test = production_test.sort_values(by=['time', 'forecast'], ascending=[True, False])

production_test = production_test.drop_duplicates(subset=['time'], keep='first')

production_test = production_test.rename(columns={'forecast': 'predicted_production'})

production_test.rename(columns={'time': 'date'}, inplace=True)

production_test['date'] = production_test['date'].dt.tz_localize(None)

production_test

Unnamed: 0,date,predicted_production
454702,2023-04-25 00:00:00,5.460000
749147,2023-04-25 01:00:00,7.060000
64952,2023-04-25 02:00:00,7.720000
584674,2023-04-25 03:00:00,9.060000
108499,2023-04-25 04:00:00,14.025820
...,...,...
260510,2024-04-23 19:00:00,51.530000
357861,2024-04-23 20:00:00,53.850000
228946,2024-04-23 21:00:00,58.062974
554018,2024-04-23 22:00:00,53.740000


In [15]:
test_day_ahead

Unnamed: 0,date,renewable_ratio,priceEur,Difference_24,Difference_168,Workday,Workday_168,Workday_24,forecast,BV Forecast,price_lag_24,price_lag_168,forecasted_price_eur
0,2023-04-25 00:00:00,0.055586,122.57,53.03,10.35,1,1,1,94.57,1821.30,69.54,112.22,99.139640
1,2023-04-25 01:00:00,0.066580,122.57,75.43,15.29,1,1,1,102.25,1506.94,47.14,107.28,84.631126
2,2023-04-25 02:00:00,0.081633,105.39,59.45,12.02,1,1,1,111.28,1324.29,45.94,93.37,75.398830
3,2023-04-25 03:00:00,0.089394,80.14,68.35,-13.23,1,1,1,121.90,1221.89,11.79,93.37,58.079089
4,2023-04-25 04:00:00,0.085833,80.14,44.78,-4.02,1,1,1,134.15,1140.74,35.36,84.16,65.365217
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8779,2024-04-24 19:00:00,0.073455,77.99,0.01,-0.39,1,1,0,98.48,3706.28,77.98,78.38,89.920105
8780,2024-04-24 20:00:00,0.073878,77.99,0.01,-0.39,1,1,0,78.06,3386.47,77.98,78.38,89.864853
8781,2024-04-24 21:00:00,0.075954,77.99,0.01,1.09,1,1,0,72.15,3207.80,77.98,76.90,88.881698
8782,2024-04-24 22:00:00,0.076030,77.99,9.63,20.95,1,1,0,63.76,3123.62,68.36,57.04,74.717507


In [16]:
test_df = pd.merge(production_test, test_day_ahead[['date','renewable_ratio','Workday','priceEur','forecasted_price_eur']],  on='date', how='left')

In [17]:
test_df

Unnamed: 0,date,predicted_production,renewable_ratio,Workday,priceEur,forecasted_price_eur
0,2023-04-25 00:00:00,5.460000,0.055586,1,122.57,99.139640
1,2023-04-25 01:00:00,7.060000,0.066580,1,122.57,84.631126
2,2023-04-25 02:00:00,7.720000,0.081633,1,105.39,75.398830
3,2023-04-25 03:00:00,9.060000,0.089394,1,80.14,58.079089
4,2023-04-25 04:00:00,14.025820,0.085833,1,80.14,65.365217
...,...,...,...,...,...,...
8755,2024-04-23 19:00:00,51.530000,0.145035,0,77.98,56.476050
8756,2024-04-23 20:00:00,53.850000,0.152725,0,77.98,55.473172
8757,2024-04-23 21:00:00,58.062974,0.160924,0,77.98,54.403865
8758,2024-04-23 22:00:00,53.740000,0.176247,0,68.36,48.588193


In [18]:
start_date = pd.to_datetime("2023-04-25")
end_date = pd.to_datetime("2024-04-24")
exchange_test = eptr.call("mcp", start_date=start_date, end_date=end_date)
exchange_test = convert_to_dataframe(exchange_test)
exchange_test['exchange_rate'] = exchange_test['price'] / exchange_test['priceEur']
#exchange = pd.concat([exchange]*3, ignore_index=True).sort_values(by='date')
exchange_test['date'] = pd.to_datetime(exchange_test['date'])
exchange_test['date'] = exchange_test['date'].dt.tz_localize(None)
exchange_test

Unnamed: 0,date,hour,price,priceUsd,priceEur,exchange_rate
0,2023-04-25 00:00:00,00:00,2600.00,134.15,122.57,21.212368
1,2023-04-25 01:00:00,01:00,2600.00,134.15,122.57,21.212368
2,2023-04-25 02:00:00,02:00,2235.58,115.35,105.39,21.212449
3,2023-04-25 03:00:00,03:00,1700.00,87.72,80.14,21.212877
4,2023-04-25 04:00:00,04:00,1700.00,87.72,80.14,21.212877
...,...,...,...,...,...,...
8779,2024-04-24 19:00:00,19:00,2700.00,83.08,77.99,34.619823
8780,2024-04-24 20:00:00,20:00,2700.00,83.08,77.99,34.619823
8781,2024-04-24 21:00:00,21:00,2700.00,83.08,77.99,34.619823
8782,2024-04-24 22:00:00,22:00,2700.00,83.08,77.99,34.619823


In [19]:
test_df = pd.merge(test_df, exchange_test[['date','hour','price','exchange_rate']], on='date', how='left')
test_df

Unnamed: 0,date,predicted_production,renewable_ratio,Workday,priceEur,forecasted_price_eur,hour,price,exchange_rate
0,2023-04-25 00:00:00,5.460000,0.055586,1,122.57,99.139640,00:00,2600.00,21.212368
1,2023-04-25 01:00:00,7.060000,0.066580,1,122.57,84.631126,01:00,2600.00,21.212368
2,2023-04-25 02:00:00,7.720000,0.081633,1,105.39,75.398830,02:00,2235.58,21.212449
3,2023-04-25 03:00:00,9.060000,0.089394,1,80.14,58.079089,03:00,1700.00,21.212877
4,2023-04-25 04:00:00,14.025820,0.085833,1,80.14,65.365217,04:00,1700.00,21.212877
...,...,...,...,...,...,...,...,...,...
8755,2024-04-23 19:00:00,51.530000,0.145035,0,77.98,56.476050,19:00,2700.00,34.624263
8756,2024-04-23 20:00:00,53.850000,0.152725,0,77.98,55.473172,20:00,2700.00,34.624263
8757,2024-04-23 21:00:00,58.062974,0.160924,0,77.98,54.403865,21:00,2700.00,34.624263
8758,2024-04-23 22:00:00,53.740000,0.176247,0,68.36,48.588193,22:00,2366.89,34.623903


In [20]:
production_train =pd.read_csv('AlgopolyPerformance-PerformanceReport_2024-04-25_06-16-54.csv')
start_date = str(train_start_date)
end_date = str(train_end_date)

production_train = production_train[(production_train['time'] >= start_date) & (production_train['time'] <= end_date)]
production_train = production_train[(production_train['eic'] == '40W000000010903W')]

production_train = production_train[['time', 'forecast']]

production_train['time'] = pd.to_datetime(production_train['time'])

production_train = production_train.sort_values(by=['time', 'forecast'], ascending=[True, False])

production_train = production_train.drop_duplicates(subset=['time'], keep='first')

production_train = production_train.rename(columns={'forecast': 'predicted_production'})

production_train.rename(columns={'time': 'date'}, inplace=True)

production_train['date'] = production_train['date'].dt.tz_localize(None)

production_train

Unnamed: 0,date,predicted_production
748172,2022-05-23 00:00:00,5.490000
650335,2022-05-23 01:00:00,3.160000
63894,2022-05-23 02:00:00,1.400000
63895,2022-05-23 03:00:00,0.770000
258414,2022-05-23 04:00:00,0.750000
...,...,...
695083,2023-04-23 19:00:00,23.959069
302805,2023-04-23 20:00:00,30.160805
108415,2023-04-23 21:00:00,27.072721
10761,2023-04-23 22:00:00,20.504129


In [21]:
train_day_ahead

Unnamed: 0,date,renewable_ratio,priceEur,Difference_24,Difference_168,Workday,Workday_168,Workday_24,forecast,BV Forecast,price_lag_24,price_lag_168,forecasted_price_eur
0,2021-01-14 00:00:00,0.040737,35.26,1.16,2.17,1,1,1,27.40,4187.97,34.10,33.09,46.060030
1,2021-01-14 01:00:00,0.045864,35.13,4.79,0.94,1,1,1,30.10,3975.82,30.34,34.19,44.125388
2,2021-01-14 02:00:00,0.051034,34.87,2.32,5.17,1,1,1,37.30,3885.02,32.55,29.70,42.345044
3,2021-01-14 03:00:00,0.059953,34.58,7.66,5.35,1,1,1,41.00,3589.79,26.92,29.23,38.267237
4,2021-01-14 04:00:00,0.062077,34.57,6.45,7.32,1,1,1,52.60,3137.07,28.12,27.25,37.610058
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19939,2023-04-24 19:00:00,0.040206,122.57,43.37,1.00,1,1,0,119.81,3007.49,79.20,121.57,115.630050
19940,2023-04-24 20:00:00,0.036043,122.57,38.66,1.00,1,1,0,117.13,3003.73,83.91,121.57,118.422175
19941,2023-04-24 21:00:00,0.031918,122.57,42.76,1.00,1,1,0,111.04,2930.69,79.81,121.57,117.002163
19942,2023-04-24 22:00:00,0.036577,122.57,66.09,31.20,1,1,0,109.62,2679.69,56.48,91.37,90.715791


In [22]:
train_df = pd.merge(train_day_ahead[['date','renewable_ratio','Workday','priceEur','forecasted_price_eur']],production_train, on='date', how='left')

In [23]:
start_date = pd.to_datetime("2021-01-14")
end_date = pd.to_datetime("2023-04-24")
exchange_train = eptr.call("mcp", start_date=start_date, end_date=end_date)
exchange_train = convert_to_dataframe(exchange_train)
exchange_train['exchange_rate'] = exchange_train['price'] / exchange_train['priceEur']
exchange_train['date'] = pd.to_datetime(exchange_train['date'])
exchange_train['date'] = exchange_train['date'].dt.tz_localize(None)
exchange_train

Unnamed: 0,date,hour,price,priceUsd,priceEur,exchange_rate
0,2021-01-14 00:00:00,00:00,319.81,42.87,35.26,9.070051
1,2021-01-14 01:00:00,01:00,318.60,42.70,35.13,9.069172
2,2021-01-14 02:00:00,02:00,316.21,42.38,34.87,9.068254
3,2021-01-14 03:00:00,03:00,313.66,42.04,34.58,9.070561
4,2021-01-14 04:00:00,04:00,313.50,42.02,34.57,9.068557
...,...,...,...,...,...,...
19939,2023-04-24 19:00:00,19:00,2600.00,134.15,122.57,21.212368
19940,2023-04-24 20:00:00,20:00,2600.00,134.15,122.57,21.212368
19941,2023-04-24 21:00:00,21:00,2600.00,134.15,122.57,21.212368
19942,2023-04-24 22:00:00,22:00,2600.00,134.15,122.57,21.212368


In [24]:
train_df = pd.merge(train_df, exchange_train[['date','hour','price','exchange_rate']], on='date', how='left')
train_df

Unnamed: 0,date,renewable_ratio,Workday,priceEur,forecasted_price_eur,predicted_production,hour,price,exchange_rate
0,2021-01-14 00:00:00,0.040737,1,35.26,46.060030,,00:00,319.81,9.070051
1,2021-01-14 01:00:00,0.045864,1,35.13,44.125388,,01:00,318.60,9.069172
2,2021-01-14 02:00:00,0.051034,1,34.87,42.345044,,02:00,316.21,9.068254
3,2021-01-14 03:00:00,0.059953,1,34.58,38.267237,,03:00,313.66,9.070561
4,2021-01-14 04:00:00,0.062077,1,34.57,37.610058,,04:00,313.50,9.068557
...,...,...,...,...,...,...,...,...,...
19939,2023-04-24 19:00:00,0.040206,1,122.57,115.630050,,19:00,2600.00,21.212368
19940,2023-04-24 20:00:00,0.036043,1,122.57,118.422175,,20:00,2600.00,21.212368
19941,2023-04-24 21:00:00,0.031918,1,122.57,117.002163,,21:00,2600.00,21.212368
19942,2023-04-24 22:00:00,0.036577,1,122.57,90.715791,,22:00,2600.00,21.212368


In [25]:
result_df = pd.concat([train_df, test_df], ignore_index=True)
result_df

Unnamed: 0,date,renewable_ratio,Workday,priceEur,forecasted_price_eur,predicted_production,hour,price,exchange_rate
0,2021-01-14 00:00:00,0.040737,1,35.26,46.060030,,00:00,319.81,9.070051
1,2021-01-14 01:00:00,0.045864,1,35.13,44.125388,,01:00,318.60,9.069172
2,2021-01-14 02:00:00,0.051034,1,34.87,42.345044,,02:00,316.21,9.068254
3,2021-01-14 03:00:00,0.059953,1,34.58,38.267237,,03:00,313.66,9.070561
4,2021-01-14 04:00:00,0.062077,1,34.57,37.610058,,04:00,313.50,9.068557
...,...,...,...,...,...,...,...,...,...
28699,2024-04-23 19:00:00,0.145035,0,77.98,56.476050,51.530000,19:00,2700.00,34.624263
28700,2024-04-23 20:00:00,0.152725,0,77.98,55.473172,53.850000,20:00,2700.00,34.624263
28701,2024-04-23 21:00:00,0.160924,0,77.98,54.403865,58.062974,21:00,2700.00,34.624263
28702,2024-04-23 22:00:00,0.176247,0,68.36,48.588193,53.740000,22:00,2366.89,34.623903


## Convert prices from TL to EUR in all_quantiles_data

In [26]:
all_quantiles_data['date'] = all_quantiles_data['date'].astype(str)

all_quantiles_data['date'] = all_quantiles_data['date'] + ' ' + all_quantiles_data['hour']

all_quantiles_data['date'] = pd.to_datetime(all_quantiles_data['date'])
all_quantiles_data

Unnamed: 0,date,hour,price,quantile,cumulative_lots
0,2021-01-14 02:00:00,02:00,288.99,0.25,1417
1,2021-01-14 02:00:00,02:00,290.00,0.50,0
2,2021-01-14 02:00:00,02:00,290.00,0.75,0
3,2021-01-14 03:00:00,03:00,270.00,0.25,911
4,2021-01-14 03:00:00,03:00,274.98,0.50,618
...,...,...,...,...,...
86242,2024-04-26 22:00:00,22:00,2705.00,0.50,2437
86243,2024-04-26 22:00:00,22:00,2724.00,0.75,1191
86244,2024-04-26 23:00:00,23:00,2700.00,0.25,1948
86245,2024-04-26 23:00:00,23:00,2700.00,0.50,1507


In [27]:
exchange = pd.concat([exchange_train, exchange_test], ignore_index=True)
exchange['date'] = pd.to_datetime(exchange['date'])
exchange

Unnamed: 0,date,hour,price,priceUsd,priceEur,exchange_rate
0,2021-01-14 00:00:00,00:00,319.81,42.87,35.26,9.070051
1,2021-01-14 01:00:00,01:00,318.60,42.70,35.13,9.069172
2,2021-01-14 02:00:00,02:00,316.21,42.38,34.87,9.068254
3,2021-01-14 03:00:00,03:00,313.66,42.04,34.58,9.070561
4,2021-01-14 04:00:00,04:00,313.50,42.02,34.57,9.068557
...,...,...,...,...,...,...
28723,2024-04-24 19:00:00,19:00,2700.00,83.08,77.99,34.619823
28724,2024-04-24 20:00:00,20:00,2700.00,83.08,77.99,34.619823
28725,2024-04-24 21:00:00,21:00,2700.00,83.08,77.99,34.619823
28726,2024-04-24 22:00:00,22:00,2700.00,83.08,77.99,34.619823


In [28]:
all_quantiles_data = pd.merge(all_quantiles_data, exchange[['date', 'exchange_rate']], on='date', how='left')

In [29]:
all_quantiles_data['priceEUR'] = all_quantiles_data['price'] / all_quantiles_data['exchange_rate']
all_quantiles_data

Unnamed: 0,date,hour,price,quantile,cumulative_lots,exchange_rate,priceEUR
0,2021-01-14 02:00:00,02:00,288.99,0.25,1417,9.068254,31.868319
1,2021-01-14 02:00:00,02:00,290.00,0.50,0,9.068254,31.979697
2,2021-01-14 02:00:00,02:00,290.00,0.75,0,9.068254,31.979697
3,2021-01-14 03:00:00,03:00,270.00,0.25,911,9.070561,29.766626
4,2021-01-14 03:00:00,03:00,274.98,0.50,618,9.070561,30.315655
...,...,...,...,...,...,...,...
86242,2024-04-26 22:00:00,22:00,2705.00,0.50,2437,,
86243,2024-04-26 22:00:00,22:00,2724.00,0.75,1191,,
86244,2024-04-26 23:00:00,23:00,2700.00,0.25,1948,,
86245,2024-04-26 23:00:00,23:00,2700.00,0.50,1507,,


## Find similar days

In [30]:
import numpy as np

similar_days_list = []

for index, row in result_df[::-1].iterrows():
    similar_rows_index = result_df.loc[:index][(result_df.loc[:index]['priceEur'] >= 0.85 * row['forecasted_price_eur']) &
                                                (result_df.loc[:index]['priceEur'] <= 1.15 * row['forecasted_price_eur']) &
                                                (result_df.loc[:index]['hour'] == row['hour']) &
                                                (result_df.loc[:index]['Workday'] == row['Workday'])].index.tolist()
    similar_rows_index = [idx for idx in similar_rows_index if idx != index]
    
    similar_dates = result_df.loc[similar_rows_index, 'date'].tolist()
    
    similar_days = [date.date() for date in similar_dates]
    
    similar_days_list.append(similar_days)

similar_days_list = similar_days_list[::-1]

result_df['similar_days'] = similar_days_list


In [31]:
result_df

Unnamed: 0,date,renewable_ratio,Workday,priceEur,forecasted_price_eur,predicted_production,hour,price,exchange_rate,similar_days
0,2021-01-14 00:00:00,0.040737,1,35.26,46.060030,,00:00,319.81,9.070051,[]
1,2021-01-14 01:00:00,0.045864,1,35.13,44.125388,,01:00,318.60,9.069172,[]
2,2021-01-14 02:00:00,0.051034,1,34.87,42.345044,,02:00,316.21,9.068254,[]
3,2021-01-14 03:00:00,0.059953,1,34.58,38.267237,,03:00,313.66,9.070561,[]
4,2021-01-14 04:00:00,0.062077,1,34.57,37.610058,,04:00,313.50,9.068557,[]
...,...,...,...,...,...,...,...,...,...,...
28699,2024-04-23 19:00:00,0.145035,0,77.98,56.476050,51.530000,19:00,2700.00,34.624263,"[2021-07-15, 2021-07-19, 2021-07-20, 2021-07-2..."
28700,2024-04-23 20:00:00,0.152725,0,77.98,55.473172,53.850000,20:00,2700.00,34.624263,"[2021-05-14, 2021-05-15, 2021-05-19, 2021-07-1..."
28701,2024-04-23 21:00:00,0.160924,0,77.98,54.403865,58.062974,21:00,2700.00,34.624263,"[2021-05-14, 2021-05-15, 2021-05-19, 2021-07-1..."
28702,2024-04-23 22:00:00,0.176247,0,68.36,48.588193,53.740000,22:00,2366.89,34.623903,"[2021-05-14, 2021-05-15, 2021-05-19, 2023-04-2..."


In [32]:
days_to_solve = result_df[(result_df["date"] >= test_start_date)&(result_df["date"] <= test_end_date)]
days_to_solve

Unnamed: 0,date,renewable_ratio,Workday,priceEur,forecasted_price_eur,predicted_production,hour,price,exchange_rate,similar_days
19944,2023-04-25 00:00:00,0.055586,1,122.57,99.139640,5.460000,00:00,2600.00,21.212368,"[2021-10-23, 2021-10-24, 2021-12-24, 2021-12-2..."
19945,2023-04-25 01:00:00,0.066580,1,122.57,84.631126,7.060000,01:00,2600.00,21.212368,"[2021-10-16, 2021-10-22, 2021-10-24, 2021-10-2..."
19946,2023-04-25 02:00:00,0.081633,1,105.39,75.398830,7.720000,02:00,2235.58,21.212449,"[2021-08-05, 2021-10-21, 2021-10-23, 2021-10-2..."
19947,2023-04-25 03:00:00,0.089394,1,80.14,58.079089,9.060000,03:00,1700.00,21.212877,"[2021-07-06, 2021-07-18, 2021-07-30, 2021-07-3..."
19948,2023-04-25 04:00:00,0.085833,1,80.14,65.365217,14.025820,04:00,1700.00,21.212877,"[2021-08-01, 2021-08-03, 2021-08-04, 2021-08-0..."
...,...,...,...,...,...,...,...,...,...,...
28699,2024-04-23 19:00:00,0.145035,0,77.98,56.476050,51.530000,19:00,2700.00,34.624263,"[2021-07-15, 2021-07-19, 2021-07-20, 2021-07-2..."
28700,2024-04-23 20:00:00,0.152725,0,77.98,55.473172,53.850000,20:00,2700.00,34.624263,"[2021-05-14, 2021-05-15, 2021-05-19, 2021-07-1..."
28701,2024-04-23 21:00:00,0.160924,0,77.98,54.403865,58.062974,21:00,2700.00,34.624263,"[2021-05-14, 2021-05-15, 2021-05-19, 2021-07-1..."
28702,2024-04-23 22:00:00,0.176247,0,68.36,48.588193,53.740000,22:00,2366.89,34.623903,"[2021-05-14, 2021-05-15, 2021-05-19, 2023-04-2..."


In [33]:
unique_days = days_to_solve['date'].dt.date.unique()

In [35]:
from collections import Counter

matched_days = {}

for day in unique_days:
    rows_for_day = days_to_solve[days_to_solve['date'].dt.date == day]
    
    all_similar_dates = [date for sublist in rows_for_day['similar_days'] for date in sublist]
    
    date_counts = Counter(all_similar_dates)
    
    if date_counts:
        most_common_date = date_counts.most_common(1)[0][0]
        
        matched_days[day] = most_common_date
    else:
        matched_days[day] = None

for day, matched_day in matched_days.items():
    print(f"Day: {day}, Matched Day: {matched_day}")


Day: 2023-04-25, Matched Day: 2023-04-04
Day: 2023-04-26, Matched Day: 2023-04-19
Day: 2023-04-27, Matched Day: 2023-04-18
Day: 2023-04-28, Matched Day: 2022-02-13
Day: 2023-04-29, Matched Day: 2023-04-18
Day: 2023-04-30, Matched Day: 2022-01-06
Day: 2023-05-01, Matched Day: 2022-01-01
Day: 2023-05-02, Matched Day: 2023-04-25
Day: 2023-05-03, Matched Day: 2023-05-02
Day: 2023-05-04, Matched Day: 2023-04-29
Day: 2023-05-05, Matched Day: 2023-04-27
Day: 2023-05-06, Matched Day: 2021-10-23
Day: 2023-05-07, Matched Day: 2023-04-30
Day: 2023-05-08, Matched Day: 2023-04-08
Day: 2023-05-09, Matched Day: 2023-04-27
Day: 2023-05-10, Matched Day: 2023-05-09
Day: 2023-05-11, Matched Day: 2023-05-10
Day: 2023-05-12, Matched Day: 2023-05-11
Day: 2023-05-13, Matched Day: 2023-05-09
Day: 2023-05-14, Matched Day: 2021-10-29
Day: 2023-05-15, Matched Day: 2023-04-08
Day: 2023-05-16, Matched Day: 2023-05-11
Day: 2023-05-17, Matched Day: 2023-05-16
Day: 2023-05-18, Matched Day: 2023-05-17
Day: 2023-05-19,

In [36]:
matchings_df = pd.DataFrame.from_dict(matched_days, orient='index', columns=['Matched_Day'])

matchings_df = matchings_df.reset_index().rename(columns={'index': 'Day'})

matchings_df['Day'] = pd.to_datetime(matchings_df['Day'])
matchings_df['Matched_Day'] = pd.to_datetime(matchings_df['Matched_Day'])

In [37]:
matchings_df

Unnamed: 0,Day,Matched_Day
0,2023-04-25,2023-04-04
1,2023-04-26,2023-04-19
2,2023-04-27,2023-04-18
3,2023-04-28,2022-02-13
4,2023-04-29,2023-04-18
...,...,...
360,2024-04-19,2023-11-19
361,2024-04-20,2024-04-18
362,2024-04-21,2024-04-20
363,2024-04-22,2023-11-19


In [38]:
all_quantiles_data

Unnamed: 0,date,hour,price,quantile,cumulative_lots,exchange_rate,priceEUR
0,2021-01-14 02:00:00,02:00,288.99,0.25,1417,9.068254,31.868319
1,2021-01-14 02:00:00,02:00,290.00,0.50,0,9.068254,31.979697
2,2021-01-14 02:00:00,02:00,290.00,0.75,0,9.068254,31.979697
3,2021-01-14 03:00:00,03:00,270.00,0.25,911,9.070561,29.766626
4,2021-01-14 03:00:00,03:00,274.98,0.50,618,9.070561,30.315655
...,...,...,...,...,...,...,...
86242,2024-04-26 22:00:00,22:00,2705.00,0.50,2437,,
86243,2024-04-26 22:00:00,22:00,2724.00,0.75,1191,,
86244,2024-04-26 23:00:00,23:00,2700.00,0.25,1948,,
86245,2024-04-26 23:00:00,23:00,2700.00,0.50,1507,,


## Solving Model

In [39]:
import pulp as pl
import pandas as pd

def setup_and_solve_trading_model(all_quantiles_data, production_wind, dayahead):
    """
    Set up and solve the linear programming problem for electricity trading optimization.

    Parameters:
    all_quantiles_data (pd.DataFrame): DataFrame containing hourly quantiles data with 'Hour', 'Quantile', 'Price', and 'Cumulative Lots'.
    production_wind (pd.DataFrame): DataFrame containing production data from wind with columns 'hour' and 'forecast'.
    dayahead (pd.DataFrame): DataFrame containing day-ahead prices with columns 'hour' and 'price'.

    Returns:
    dict: Dictionary containing the solution variables and the objective value.
    """
    # Ensure that dataframes use a consistent 'hour' format, assuming no change needed if already "HH:00"
    # No conversion required for 'dayahead' as it's already "HH:00"

    # Create dictionaries for model parameters
    intraday_dict = all_quantiles_data.set_index(['hour', 'quantile']).to_dict('index')
    production_wind_dict = production_wind.set_index('hour')['forecast'].to_dict()
    dayahead_dict = dayahead.set_index('hour')['price'].to_dict()

    # Set up the model
    model = pl.LpProblem("Electricity_Trading", pl.LpMaximize)

    # Time periods and quantiles
    T = dayahead['hour'].unique()  # Assuming hours are already in 'HH:00' format
    Q = all_quantiles_data['quantile'].unique()

    # Decision Variables
    Q1 = pl.LpVariable.dicts("Q1", T, lowBound=0)
    Q2 = pl.LpVariable.dicts("Q2", [(t, q) for t in T for q in Q], lowBound=0)

    # Objective Function
    model += pl.lpSum([dayahead_dict[t] * Q1[t] for t in T if t in dayahead_dict] +
                      [intraday_dict[(t, q)]['price'] * Q2[(t, q)] for t in T for q in Q if (t, q) in intraday_dict])

    # Constraints
    for t in T:
        if t in production_wind_dict:
            model += Q1[t] + pl.lpSum([Q2[(t, q)] for q in Q if (t, q) in intraday_dict]) <= production_wind_dict[t], f"Production_{t}"

        for q in Q:
            if (t, q) in intraday_dict:
                model += Q2[(t, q)] <= intraday_dict[(t, q)]['cumulative_lots'], f"Demand_{t}_{q}"

    # Solve the model
    model.solve()

    if pl.LpStatus[model.status] != 'Optimal':
        print("No feasible solution found.")
        return None

    # Collect results
    results = {'Variables': {v.name: v.varValue for v in model.variables()},
               'Objective Value': pl.value(model.objective)}

    return results


# BURAYA BAK

In [41]:
import pandas as pd
from datetime import datetime

max_price_df = pd.read_csv("fiyatlar.csv")
max_price_df["Başlangıç tarihi"] = pd.to_datetime(max_price_df["Başlangıç tarihi"]).dt.date
max_price_df["Bitiş tarihi"] = pd.to_datetime(max_price_df["Bitiş tarihi"]).dt.date

daily_results = []
results_list = []

def get_max_price_for_date(date, max_price_df):
    """
    Belirtilen tarih için geçerli maksimum fiyatı döndüren fonksiyon.
    """
    row = max_price_df[(max_price_df["Başlangıç tarihi"] <= date) & (max_price_df["Bitiş tarihi"] >= date)]
    if not row.empty:
        return row["Azami fiyat"].values[0]
    else:
        return None  # Veya uygun bir varsayılan değer

for index, row in matchings_df.iterrows():
    day = row['Day'].date()
    matched_day = row['Matched_Day'].date()

    day_df = days_to_solve[days_to_solve['date'].dt.date == day]
    day_df['price'].fillna(0, inplace=True)
    day_df['forecasted_price_eur'].fillna(0, inplace=True)
    day_df['exchange_rate'].fillna(0, inplace=True)

    intraday_data = all_quantiles_data[all_quantiles_data["date"].dt.date == matched_day]
    intraday_data['priceEUR'].fillna(0, inplace=True)

    intraday_data = pd.merge(intraday_data[['date', 'hour', 'price', 'quantile', 'cumulative_lots', 'priceEUR']], 
                             day_df[['hour', 'exchange_rate']], on='hour', how='left')
    
    intraday_data["price"] = intraday_data["exchange_rate"] * intraday_data["priceEUR"]

    max_price = get_max_price_for_date(day, max_price_df)

    if max_price is not None:
        intraday_data["price"] = intraday_data["price"].apply(lambda x: min(x, max_price))
    else:
        print(f"{day} için maksimum fiyat bulunamadı.")

    production_wind = pd.DataFrame({'hour': day_df['hour'], 'forecast': day_df['predicted_production']})
    production_wind['forecast'].fillna(0, inplace=True)

    dayahead = pd.DataFrame({'hour': day_df['hour'], 'price': day_df['forecasted_price_eur'] * day_df['exchange_rate']})
    if max_price is not None:
        dayahead['price'] = dayahead['price'].apply(lambda x: min(x, max_price))  # Gün öncesi fiyatlara da maksimum sınır uygulama

    result = setup_and_solve_trading_model(intraday_data[['hour', 'quantile', 'price', 'cumulative_lots']],
                                           production_wind, dayahead)
    
    results_list.append({'date': day, 'results': result})
    daily_results.append(result)

results_df = pd.DataFrame(results_list)
results_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['price'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['forecasted_price_eur'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['exchange_rate'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  intraday_data['priceEUR']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  intraday_data['priceEUR'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['price'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['forecasted_price_eur'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['exchange_rate']

Unnamed: 0,date,results
0,2023-04-25,"{'Variables': {'Q1_00:00': 5.46, 'Q1_01:00': 7..."
1,2023-04-26,"{'Variables': {'Q1_00:00': 0.0, 'Q1_01:00': 38..."
2,2023-04-27,"{'Variables': {'Q1_00:00': 0.0, 'Q1_01:00': 0...."
3,2023-04-28,"{'Variables': {'Q1_00:00': 62.391755, 'Q1_01:0..."
4,2023-04-29,"{'Variables': {'Q1_00:00': 41.021926, 'Q1_01:0..."
...,...,...
360,2024-04-19,"{'Variables': {'Q1_00:00': 20.85, 'Q1_01:00': ..."
361,2024-04-20,"{'Variables': {'Q1_00:00': 0.0, 'Q1_01:00': 0...."
362,2024-04-21,"{'Variables': {'Q1_00:00': 1.08, 'Q1_01:00': 0..."
363,2024-04-22,"{'Variables': {'Q1_00:00': 48.86, 'Q1_01:00': ..."


In [43]:
structured_results = []
current_date = test_start_date
from datetime import datetime, timedelta

for result in daily_results:
    day_dict = {'Date': current_date.strftime('%Y-%m-%d')}
    day_dict.update(result['Variables'])
    day_dict['Objective Value'] = result['Objective Value']
    structured_results.append(day_dict)
    current_date += timedelta(days=1)

results_df = pd.DataFrame(structured_results)

expected_total_revenue = results_df['Objective Value'].sum()

In [44]:
expected_total_revenue

466702990.69976044

In [None]:
#results_df.to_csv('trading_optimization_results_rule_based_expected.csv', index=False)


In [45]:
results_df

Unnamed: 0,Date,Q1_00:00,Q1_01:00,Q1_02:00,Q1_03:00,Q1_04:00,Q1_05:00,Q1_06:00,Q1_07:00,Q1_08:00,...,"Q2_('21:00',_0.25)","Q2_('21:00',_0.5)","Q2_('21:00',_0.75)","Q2_('22:00',_0.25)","Q2_('22:00',_0.5)","Q2_('22:00',_0.75)","Q2_('23:00',_0.25)","Q2_('23:00',_0.5)","Q2_('23:00',_0.75)",Objective Value
0,2023-04-25,5.460000,7.060000,0.000000,0.000000,0.000000,13.950000,14.630000,15.63,13.170000,...,0.0,0.0,30.650000,0.0,0.0,0.00,0.0,0.0,0.000000,1.108632e+06
1,2023-04-26,0.000000,38.432007,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,...,0.0,0.0,0.000000,0.0,0.0,12.75,0.0,0.0,13.110000,1.605967e+06
2,2023-04-27,0.000000,0.000000,0.000000,0.000000,2.360870,3.068171,0.000000,0.00,1.550000,...,0.0,0.0,65.484879,0.0,0.0,0.00,0.0,0.0,64.222108,1.594055e+06
3,2023-04-28,62.391755,59.919545,60.571263,59.798947,61.185766,61.828438,60.784790,0.00,63.447005,...,0.0,0.0,0.000000,0.0,0.0,0.00,0.0,0.0,0.000000,2.582320e+06
4,2023-04-29,41.021926,0.000000,0.000000,0.000000,0.000000,35.520859,36.961611,0.00,0.000000,...,0.0,0.0,35.735778,0.0,0.0,0.00,0.0,0.0,0.000000,1.922689e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2024-04-19,20.850000,20.448717,20.657458,0.000000,0.000000,12.780000,11.070000,9.61,9.560000,...,0.0,0.0,0.000000,0.0,0.0,14.10,0.0,0.0,15.750000,6.599851e+05
361,2024-04-20,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,...,0.0,0.0,1.780000,0.0,0.0,1.46,0.0,0.0,1.130000,7.668977e+05
362,2024-04-21,1.080000,0.000000,0.000000,0.000000,0.000000,3.340000,0.000000,0.00,0.000000,...,0.0,0.0,46.520000,0.0,0.0,47.66,0.0,0.0,49.368974,1.170832e+06
363,2024-04-22,48.860000,0.000000,0.000000,42.660144,32.760000,25.140000,21.340000,17.70,0.000000,...,0.0,0.0,0.000000,0.0,0.0,3.07,0.0,0.0,3.680000,6.299542e+05


In [46]:
import pandas as pd


final_data = []

for index, row in results_df.iterrows():
    date = row['Date']
    objective_value = row['Objective Value']
    
    for column in results_df.columns:
        if column.startswith('Q1') and column not in ['Date', 'Objective Value']:
            hour = column.split('_')[1]  # Extract hour from column name
            day_ahead_bidded = row[column]
            
            final_data.append({'date': date, 'hour': hour, 'day_ahead_bidded': day_ahead_bidded, 'Objective Value': objective_value})

final_dayahead_df = pd.DataFrame(final_data)

final_dayahead_df


Unnamed: 0,date,hour,day_ahead_bidded,Objective Value
0,2023-04-25,00:00,5.46,1.108632e+06
1,2023-04-25,01:00,7.06,1.108632e+06
2,2023-04-25,02:00,0.00,1.108632e+06
3,2023-04-25,03:00,0.00,1.108632e+06
4,2023-04-25,04:00,0.00,1.108632e+06
...,...,...,...,...
8755,2024-04-23,19:00,0.00,1.162612e+06
8756,2024-04-23,20:00,0.00,1.162612e+06
8757,2024-04-23,21:00,0.00,1.162612e+06
8758,2024-04-23,22:00,0.00,1.162612e+06


## Real Day Ahead Check

In [47]:
start_date = pd.to_datetime("2023-04-25")
end_date = pd.to_datetime("2024-04-23")
real_dayahead_prices = eptr.call("mcp", start_date=start_date, end_date=end_date)
real_dayahead_prices = convert_to_dataframe(real_dayahead_prices)

In [48]:
real_dayahead_prices = real_dayahead_prices.drop(columns=['priceEur', 'priceUsd'])
real_dayahead_prices['date'] = pd.to_datetime(real_dayahead_prices['date']).dt.tz_localize(None)
real_dayahead_prices

Unnamed: 0,date,hour,price
0,2023-04-25 00:00:00,00:00,2600.00
1,2023-04-25 01:00:00,01:00,2600.00
2,2023-04-25 02:00:00,02:00,2235.58
3,2023-04-25 03:00:00,03:00,1700.00
4,2023-04-25 04:00:00,04:00,1700.00
...,...,...,...
8755,2024-04-23 19:00:00,19:00,2700.00
8756,2024-04-23 20:00:00,20:00,2700.00
8757,2024-04-23 21:00:00,21:00,2700.00
8758,2024-04-23 22:00:00,22:00,2366.89


In [49]:
final_dayahead_df['date'] = pd.to_datetime(final_dayahead_df['date'] + ' ' + final_dayahead_df['hour'])
real_dayahead_prices['date'] = pd.to_datetime(real_dayahead_prices['date'])

dayahead_df = pd.merge(real_dayahead_prices, final_dayahead_df, on="date")
dayahead_df

Unnamed: 0,date,hour_x,price,hour_y,day_ahead_bidded,Objective Value
0,2023-04-25 00:00:00,00:00,2600.00,00:00,5.46,1.108632e+06
1,2023-04-25 01:00:00,01:00,2600.00,01:00,7.06,1.108632e+06
2,2023-04-25 02:00:00,02:00,2235.58,02:00,0.00,1.108632e+06
3,2023-04-25 03:00:00,03:00,1700.00,03:00,0.00,1.108632e+06
4,2023-04-25 04:00:00,04:00,1700.00,04:00,0.00,1.108632e+06
...,...,...,...,...,...,...
8755,2024-04-23 19:00:00,19:00,2700.00,19:00,0.00,1.162612e+06
8756,2024-04-23 20:00:00,20:00,2700.00,20:00,0.00,1.162612e+06
8757,2024-04-23 21:00:00,21:00,2700.00,21:00,0.00,1.162612e+06
8758,2024-04-23 22:00:00,22:00,2366.89,22:00,0.00,1.162612e+06


In [50]:
dayahead_df['revenue'] = dayahead_df['price'] * dayahead_df['day_ahead_bidded']
dayahead_df

Unnamed: 0,date,hour_x,price,hour_y,day_ahead_bidded,Objective Value,revenue
0,2023-04-25 00:00:00,00:00,2600.00,00:00,5.46,1.108632e+06,14196.0
1,2023-04-25 01:00:00,01:00,2600.00,01:00,7.06,1.108632e+06,18356.0
2,2023-04-25 02:00:00,02:00,2235.58,02:00,0.00,1.108632e+06,0.0
3,2023-04-25 03:00:00,03:00,1700.00,03:00,0.00,1.108632e+06,0.0
4,2023-04-25 04:00:00,04:00,1700.00,04:00,0.00,1.108632e+06,0.0
...,...,...,...,...,...,...,...
8755,2024-04-23 19:00:00,19:00,2700.00,19:00,0.00,1.162612e+06,0.0
8756,2024-04-23 20:00:00,20:00,2700.00,20:00,0.00,1.162612e+06,0.0
8757,2024-04-23 21:00:00,21:00,2700.00,21:00,0.00,1.162612e+06,0.0
8758,2024-04-23 22:00:00,22:00,2366.89,22:00,0.00,1.162612e+06,0.0


## Real Intraday Check

In [51]:
import pandas as pd


final_data = []

for index, row in results_df.iterrows():
    date = row['Date']
    objective_value = row['Objective Value']
    
    for column in results_df.columns:
        if column.startswith('Q2') and column not in ['Date', 'Objective Value']:
            text = column.split('_')
            hour = text[1]  # Extract hour from column name
            quantile = text[-1]
            intraday_bidded = row[column]
            
            final_data.append({'date': date, 'hour': hour, 'quantile': quantile, 'intraday_bidded': intraday_bidded, 'Objective Value': objective_value})

final_intraday_df = pd.DataFrame(final_data)


final_intraday_df['hour'] = final_intraday_df['hour'].str.replace("(", "").str.replace(")", "").str.replace("'", "").str.replace(",", "")
final_intraday_df['quantile'] = final_intraday_df['quantile'].str.replace(")", "")

final_intraday_df



Unnamed: 0,date,hour,quantile,intraday_bidded,Objective Value
0,2023-04-25,00:00,0.25,0.00,1.108632e+06
1,2023-04-25,00:00,0.5,0.00,1.108632e+06
2,2023-04-25,00:00,0.75,0.00,1.108632e+06
3,2023-04-25,01:00,0.25,0.00,1.108632e+06
4,2023-04-25,01:00,0.5,0.00,1.108632e+06
...,...,...,...,...,...
26275,2024-04-23,22:00,0.5,0.00,1.162612e+06
26276,2024-04-23,22:00,0.75,53.74,1.162612e+06
26277,2024-04-23,23:00,0.25,0.00,1.162612e+06
26278,2024-04-23,23:00,0.5,0.00,1.162612e+06


In [52]:
final_intraday_df['date'] = pd.to_datetime(final_intraday_df['date'] + ' ' + final_intraday_df['hour'])
final_intraday_df = final_intraday_df.sort_values(by=['date', 'hour', 'quantile'])
final_intraday_df

Unnamed: 0,date,hour,quantile,intraday_bidded,Objective Value
0,2023-04-25 00:00:00,00:00,0.25,0.00,1.108632e+06
1,2023-04-25 00:00:00,00:00,0.5,0.00,1.108632e+06
2,2023-04-25 00:00:00,00:00,0.75,0.00,1.108632e+06
3,2023-04-25 01:00:00,01:00,0.25,0.00,1.108632e+06
4,2023-04-25 01:00:00,01:00,0.5,0.00,1.108632e+06
...,...,...,...,...,...
26275,2024-04-23 22:00:00,22:00,0.5,0.00,1.162612e+06
26276,2024-04-23 22:00:00,22:00,0.75,53.74,1.162612e+06
26277,2024-04-23 23:00:00,23:00,0.25,0.00,1.162612e+06
26278,2024-04-23 23:00:00,23:00,0.5,0.00,1.162612e+06


In [53]:
import pandas as pd

real_intra_day_data = pd.DataFrame(columns=['date', 'hour', 'price', 'quantile', 'cumulative_lots', 'priceEUR', 'exchange_rate'])

for index, row in matchings_df.iterrows():
    day = row['Day'].date()
    matched_day = row['Matched_Day'].date()

    day_df = result_df[result_df['date'].dt.date == day]
    day_df['price'].fillna(0, inplace=True)
    day_df['forecasted_price_eur'].fillna(0, inplace=True)
    day_df['exchange_rate'].fillna(0, inplace=True)

    intraday_data = all_quantiles_data[all_quantiles_data["date"].dt.date == matched_day]
    intraday_data['priceEUR'].fillna(0, inplace=True)

    intraday_data = pd.merge(intraday_data[['date','hour','price','quantile','cumulative_lots','priceEUR']], day_df[['hour', 'exchange_rate']], on='hour', how='left')
    
    intraday_data["price"] = intraday_data["exchange_rate"] * intraday_data["priceEUR"]
    intraday_data["date"] = day
    
    real_intra_day_data = pd.concat([real_intra_day_data, intraday_data])

real_intra_day_data


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['price'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['forecasted_price_eur'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  day_df['exchange_rate'].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  intraday_data['priceEUR']

Unnamed: 0,date,hour,price,quantile,cumulative_lots,priceEUR,exchange_rate
0,2023-04-25,00:00,1934.428548,0.25,14705,91.193426,21.212368
1,2023-04-25,00:00,2004.133102,0.50,9920,94.479459,21.212368
2,2023-04-25,00:00,2035.169435,0.75,4716,95.942584,21.212368
3,2023-04-25,01:00,1627.453667,0.25,10512,76.721922,21.212368
4,2023-04-25,01:00,1638.342095,0.50,6984,77.235227,21.212368
...,...,...,...,...,...,...,...
67,2024-04-23,22:00,2095.818808,0.50,7433,60.530981,34.623903
68,2024-04-23,22:00,2116.233179,0.75,3714,61.120584,34.623903
69,2024-04-23,23:00,1973.839522,0.25,6843,57.006900,34.624572
70,2024-04-23,23:00,2034.886105,0.50,3994,58.770000,34.624572


In [54]:
real_intra_day_data = real_intra_day_data.dropna()
real_intra_day_data

Unnamed: 0,date,hour,price,quantile,cumulative_lots,priceEUR,exchange_rate
0,2023-04-25,00:00,1934.428548,0.25,14705,91.193426,21.212368
1,2023-04-25,00:00,2004.133102,0.50,9920,94.479459,21.212368
2,2023-04-25,00:00,2035.169435,0.75,4716,95.942584,21.212368
3,2023-04-25,01:00,1627.453667,0.25,10512,76.721922,21.212368
4,2023-04-25,01:00,1638.342095,0.50,6984,77.235227,21.212368
...,...,...,...,...,...,...,...
67,2024-04-23,22:00,2095.818808,0.50,7433,60.530981,34.623903
68,2024-04-23,22:00,2116.233179,0.75,3714,61.120584,34.623903
69,2024-04-23,23:00,1973.839522,0.25,6843,57.006900,34.624572
70,2024-04-23,23:00,2034.886105,0.50,3994,58.770000,34.624572


In [55]:
real_intra_day_data['date'] = real_intra_day_data['date'].astype(str)

real_intra_day_data['date'] = pd.to_datetime(real_intra_day_data['date'] + ' ' + real_intra_day_data['hour'])

real_intra_day_data['date'] = pd.to_datetime(real_intra_day_data['date'])

real_intra_day_data

Unnamed: 0,date,hour,price,quantile,cumulative_lots,priceEUR,exchange_rate
0,2023-04-25 00:00:00,00:00,1934.428548,0.25,14705,91.193426,21.212368
1,2023-04-25 00:00:00,00:00,2004.133102,0.50,9920,94.479459,21.212368
2,2023-04-25 00:00:00,00:00,2035.169435,0.75,4716,95.942584,21.212368
3,2023-04-25 01:00:00,01:00,1627.453667,0.25,10512,76.721922,21.212368
4,2023-04-25 01:00:00,01:00,1638.342095,0.50,6984,77.235227,21.212368
...,...,...,...,...,...,...,...
67,2024-04-23 22:00:00,22:00,2095.818808,0.50,7433,60.530981,34.623903
68,2024-04-23 22:00:00,22:00,2116.233179,0.75,3714,61.120584,34.623903
69,2024-04-23 23:00:00,23:00,1973.839522,0.25,6843,57.006900,34.624572
70,2024-04-23 23:00:00,23:00,2034.886105,0.50,3994,58.770000,34.624572


In [56]:
real_intra_day_data['quantile'] = real_intra_day_data['quantile'].astype(float)
final_intraday_df['quantile'] = final_intraday_df['quantile'].astype(float)

In [57]:

final_intraday_df = pd.merge(real_intra_day_data, final_intraday_df, on=['date', 'hour', 'quantile'], how='outer')
final_intraday_df

Unnamed: 0,date,hour,price,quantile,cumulative_lots,priceEUR,exchange_rate,intraday_bidded,Objective Value
0,2023-04-25 00:00:00,00:00,1934.428548,0.25,14705,91.193426,21.212368,0.0,1.108632e+06
1,2023-04-25 00:00:00,00:00,2004.133102,0.50,9920,94.479459,21.212368,0.0,1.108632e+06
2,2023-04-25 00:00:00,00:00,2035.169435,0.75,4716,95.942584,21.212368,0.0,1.108632e+06
3,2023-04-25 01:00:00,01:00,1627.453667,0.25,10512,76.721922,21.212368,0.0,1.108632e+06
4,2023-04-25 01:00:00,01:00,1638.342095,0.50,6984,77.235227,21.212368,0.0,1.108632e+06
...,...,...,...,...,...,...,...,...,...
26275,2023-12-02 03:00:00,03:00,,0.50,,,,,2.039265e+06
26276,2023-12-02 03:00:00,03:00,,0.75,,,,,2.039265e+06
26277,2023-12-02 04:00:00,04:00,,0.25,,,,,2.039265e+06
26278,2023-12-02 04:00:00,04:00,,0.50,,,,,2.039265e+06


In [58]:
final_intraday_df = final_intraday_df.dropna()
final_intraday_df

Unnamed: 0,date,hour,price,quantile,cumulative_lots,priceEUR,exchange_rate,intraday_bidded,Objective Value
0,2023-04-25 00:00:00,00:00,1934.428548,0.25,14705,91.193426,21.212368,0.00,1.108632e+06
1,2023-04-25 00:00:00,00:00,2004.133102,0.50,9920,94.479459,21.212368,0.00,1.108632e+06
2,2023-04-25 00:00:00,00:00,2035.169435,0.75,4716,95.942584,21.212368,0.00,1.108632e+06
3,2023-04-25 01:00:00,01:00,1627.453667,0.25,10512,76.721922,21.212368,0.00,1.108632e+06
4,2023-04-25 01:00:00,01:00,1638.342095,0.50,6984,77.235227,21.212368,0.00,1.108632e+06
...,...,...,...,...,...,...,...,...,...
26239,2024-04-23 22:00:00,22:00,2095.818808,0.50,7433,60.530981,34.623903,0.00,1.162612e+06
26240,2024-04-23 22:00:00,22:00,2116.233179,0.75,3714,61.120584,34.623903,53.74,1.162612e+06
26241,2024-04-23 23:00:00,23:00,1973.839522,0.25,6843,57.006900,34.624572,0.00,1.162612e+06
26242,2024-04-23 23:00:00,23:00,2034.886105,0.50,3994,58.770000,34.624572,0.00,1.162612e+06


In [59]:
final_intraday_df = final_intraday_df.sort_values(by=['date', 'hour', 'quantile'])
final_intraday_df

Unnamed: 0,date,hour,price,quantile,cumulative_lots,priceEUR,exchange_rate,intraday_bidded,Objective Value
0,2023-04-25 00:00:00,00:00,1934.428548,0.25,14705,91.193426,21.212368,0.00,1.108632e+06
1,2023-04-25 00:00:00,00:00,2004.133102,0.50,9920,94.479459,21.212368,0.00,1.108632e+06
2,2023-04-25 00:00:00,00:00,2035.169435,0.75,4716,95.942584,21.212368,0.00,1.108632e+06
3,2023-04-25 01:00:00,01:00,1627.453667,0.25,10512,76.721922,21.212368,0.00,1.108632e+06
4,2023-04-25 01:00:00,01:00,1638.342095,0.50,6984,77.235227,21.212368,0.00,1.108632e+06
...,...,...,...,...,...,...,...,...,...
26239,2024-04-23 22:00:00,22:00,2095.818808,0.50,7433,60.530981,34.623903,0.00,1.162612e+06
26240,2024-04-23 22:00:00,22:00,2116.233179,0.75,3714,61.120584,34.623903,53.74,1.162612e+06
26241,2024-04-23 23:00:00,23:00,1973.839522,0.25,6843,57.006900,34.624572,0.00,1.162612e+06
26242,2024-04-23 23:00:00,23:00,2034.886105,0.50,3994,58.770000,34.624572,0.00,1.162612e+06


In [60]:
final_intraday_df = final_intraday_df[["date","hour","price","quantile","intraday_bidded"]]
final_intraday_df

Unnamed: 0,date,hour,price,quantile,intraday_bidded
0,2023-04-25 00:00:00,00:00,1934.428548,0.25,0.00
1,2023-04-25 00:00:00,00:00,2004.133102,0.50,0.00
2,2023-04-25 00:00:00,00:00,2035.169435,0.75,0.00
3,2023-04-25 01:00:00,01:00,1627.453667,0.25,0.00
4,2023-04-25 01:00:00,01:00,1638.342095,0.50,0.00
...,...,...,...,...,...
26239,2024-04-23 22:00:00,22:00,2095.818808,0.50,0.00
26240,2024-04-23 22:00:00,22:00,2116.233179,0.75,53.74
26241,2024-04-23 23:00:00,23:00,1973.839522,0.25,0.00
26242,2024-04-23 23:00:00,23:00,2034.886105,0.50,0.00


In [61]:
all_intra_day_data = pd.read_csv("all_intraday_data.csv")
all_intra_day_data = all_intra_day_data.drop('contract_date', axis=1)
all_intra_day_data = all_intra_day_data.drop('contract_hour', axis=1)
all_intra_day_data = all_intra_day_data.drop('contractName', axis=1)
all_intra_day_data = all_intra_day_data.drop('prefix', axis=1)
all_intra_day_data = all_intra_day_data.drop('id', axis=1)
all_intra_day_data = all_intra_day_data[(all_intra_day_data["date"] >= "2023-04-25") & (all_intra_day_data["date"] <= "2024-04-23")]

all_intra_day_data['date'] = all_intra_day_data['date'].astype(str)
all_intra_day_data['date'] = pd.to_datetime(all_intra_day_data['date'] + ' ' + all_intra_day_data['hour'])
all_intra_day_data['date'] = pd.to_datetime(all_intra_day_data['date'])

all_intra_day_data.reset_index(drop=True, inplace=True)


all_intra_day_data

Unnamed: 0,price,quantity,date,hour
0,2522.04,150,2023-04-25 00:00:00,00:00
1,2525.01,150,2023-04-25 00:00:00,00:00
2,2600.00,3,2023-04-25 00:00:00,00:00
3,2525.02,11,2023-04-25 00:00:00,00:00
4,2525.03,5,2023-04-25 00:00:00,00:00
...,...,...,...,...
3902846,2649.00,5,2024-04-23 23:00:00,23:00
3902847,2649.00,1,2024-04-23 23:00:00,23:00
3902848,2650.00,24,2024-04-23 23:00:00,23:00
3902849,2650.00,25,2024-04-23 23:00:00,23:00


In [62]:
all_intra_day_data = all_intra_day_data.sort_values(by=['date', 'hour', 'price'])
all_intra_day_data

Unnamed: 0,price,quantity,date,hour
1894,2302.02,9,2023-04-25 00:00:00,00:00
1895,2302.02,2,2023-04-25 00:00:00,00:00
1893,2302.03,1,2023-04-25 00:00:00,00:00
1858,2357.03,2,2023-04-25 00:00:00,00:00
1862,2359.68,2,2023-04-25 00:00:00,00:00
...,...,...,...,...
3902827,2714.90,9,2024-04-23 23:00:00,23:00
3902829,2714.90,9,2024-04-23 23:00:00,23:00
3902830,2714.90,4,2024-04-23 23:00:00,23:00
3902828,2715.00,2,2024-04-23 23:00:00,23:00


In [63]:
for index, row in final_intraday_df.iterrows():
    if row["intraday_bidded"] == 0:
        final_intraday_df.at[index, "revenue"] = 0

    else:
        date = row["date"]
        price = row["price"]
        quantity = row["intraday_bidded"]
        threshold_quantity = all_intra_day_data[(all_intra_day_data["date"] == date) & (all_intra_day_data["price"] >= price)]["quantity"].sum()

        if threshold_quantity is None:
            final_intraday_df.at[index, "revenue"] = 0
        else:
            final_intraday_df.at[index, "revenue"] = min(threshold_quantity, quantity) * price
        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_intraday_df.at[index, "revenue"] = 0


In [64]:
final_intraday_df

Unnamed: 0,date,hour,price,quantile,intraday_bidded,revenue
0,2023-04-25 00:00:00,00:00,1934.428548,0.25,0.00,0.000000
1,2023-04-25 00:00:00,00:00,2004.133102,0.50,0.00,0.000000
2,2023-04-25 00:00:00,00:00,2035.169435,0.75,0.00,0.000000
3,2023-04-25 01:00:00,01:00,1627.453667,0.25,0.00,0.000000
4,2023-04-25 01:00:00,01:00,1638.342095,0.50,0.00,0.000000
...,...,...,...,...,...,...
26239,2024-04-23 22:00:00,22:00,2095.818808,0.50,0.00,0.000000
26240,2024-04-23 22:00:00,22:00,2116.233179,0.75,53.74,113726.371032
26241,2024-04-23 23:00:00,23:00,1973.839522,0.25,0.00,0.000000
26242,2024-04-23 23:00:00,23:00,2034.886105,0.50,0.00,0.000000


In [65]:
intraday_bids = final_intraday_df["intraday_bidded"].sum()
intraday_bids

138068.101120792

In [66]:
dayahead_df

Unnamed: 0,date,hour_x,price,hour_y,day_ahead_bidded,Objective Value,revenue
0,2023-04-25 00:00:00,00:00,2600.00,00:00,5.46,1.108632e+06,14196.0
1,2023-04-25 01:00:00,01:00,2600.00,01:00,7.06,1.108632e+06,18356.0
2,2023-04-25 02:00:00,02:00,2235.58,02:00,0.00,1.108632e+06,0.0
3,2023-04-25 03:00:00,03:00,1700.00,03:00,0.00,1.108632e+06,0.0
4,2023-04-25 04:00:00,04:00,1700.00,04:00,0.00,1.108632e+06,0.0
...,...,...,...,...,...,...,...
8755,2024-04-23 19:00:00,19:00,2700.00,19:00,0.00,1.162612e+06,0.0
8756,2024-04-23 20:00:00,20:00,2700.00,20:00,0.00,1.162612e+06,0.0
8757,2024-04-23 21:00:00,21:00,2700.00,21:00,0.00,1.162612e+06,0.0
8758,2024-04-23 22:00:00,22:00,2366.89,22:00,0.00,1.162612e+06,0.0


In [67]:
dayahead_bids = dayahead_df["day_ahead_bidded"].sum()
dayahead_bids

97046.8321485227

In [68]:
real_revenue = final_intraday_df['revenue'].sum() + dayahead_df['revenue'].sum()

print("Expected revenue: ", expected_total_revenue)
print("Real revenue: ", real_revenue)


Expected revenue:  466702990.69976044
Real revenue:  333959871.0173793


In [69]:
dayahead_df['date'] = pd.to_datetime(dayahead_df['date'])

dayahead_df['month'] = dayahead_df['date'].dt.month

dayahead_monthly_total_revenue = dayahead_df.groupby('month')['revenue'].sum()

print("Monthly total revenue from dayahead: ", dayahead_monthly_total_revenue)
print("Monthly avg revenue from dayahead: ", dayahead_monthly_total_revenue.mean())
print("Total yearly revenue from dayahead: ", dayahead_monthly_total_revenue.sum())

Monthly total revenue from dayahead:  month
1     2.253853e+07
2     1.652949e+07
3     1.234295e+07
4     1.180215e+07
5     1.306201e+07
6     1.225177e+07
7     1.854405e+07
8     2.248172e+07
9     2.276214e+07
10    1.552905e+07
11    1.587986e+07
12    1.306380e+07
Name: revenue, dtype: float64
Monthly avg revenue from dayahead:  16398959.105132451
Total yearly revenue from dayahead:  196787509.2615894


In [70]:
final_intraday_df['date'] = pd.to_datetime(final_intraday_df['date'])

final_intraday_df['month'] = final_intraday_df['date'].dt.month

intraday_monthly_total_revenue = final_intraday_df.groupby('month')['revenue'].sum()

print("Monthly total revenue from intraday: ", intraday_monthly_total_revenue)
print("Monthly avg revenue from intraday: ", intraday_monthly_total_revenue.mean())
print("Total yearly revenue from intraday: ", intraday_monthly_total_revenue.sum())


Monthly total revenue from intraday:  month
1     1.829426e+07
2     8.899291e+06
3     6.917568e+06
4     1.659633e+07
5     7.088745e+06
6     6.336998e+06
7     1.423832e+07
8     1.712505e+07
9     1.520861e+07
10    7.555783e+06
11    1.039964e+07
12    8.511765e+06
Name: revenue, dtype: float64
Monthly avg revenue from intraday:  11431030.14631582
Total yearly revenue from intraday:  137172361.75578985


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_intraday_df['date'] = pd.to_datetime(final_intraday_df['date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_intraday_df['month'] = final_intraday_df['date'].dt.month


In [71]:
total_monthly_revenue = dayahead_monthly_total_revenue + intraday_monthly_total_revenue
print("Total revenue: ", total_monthly_revenue.sum())

Total revenue:  333959871.0173793


In [72]:
dayahead_df['date'] = pd.to_datetime(dayahead_df['date'])

dayahead_df['month'] = dayahead_df['date'].dt.month
dayahead_df['month_name'] = dayahead_df['date'].dt.strftime('%B')  # %B gives full month name

dayahead_monthly_total_revenue = dayahead_df.groupby(['month', 'month_name'])['revenue'].sum()

print("Monthly revenues from dayahead:")
for month, revenue in dayahead_monthly_total_revenue.items():
    print(f"{month[1]}: {revenue}")

print("Monthly avg revenue from dayahead:", dayahead_monthly_total_revenue.mean())
print("Total yearly revenue from dayahead:", dayahead_monthly_total_revenue.sum())


Monthly revenues from dayahead:
January: 22538529.46119098
February: 16529489.728271931
March: 12342947.70241602
April: 11802147.232962163
May: 13062012.967370791
June: 12251772.844810445
July: 18544050.695106257
August: 22481719.06549262
September: 22762139.15338932
October: 15529047.671992743
November: 15879856.149226012
December: 13063796.589360137
Monthly avg revenue from dayahead: 16398959.105132451
Total yearly revenue from dayahead: 196787509.2615894


In [73]:
final_intraday_df['date'] = pd.to_datetime(final_intraday_df['date'])

final_intraday_df['month'] = final_intraday_df['date'].dt.month
final_intraday_df['month_name'] = final_intraday_df['date'].dt.strftime('%B')  # %B gives full month name

intraday_monthly_total_revenue = final_intraday_df.groupby(['month', 'month_name'])['revenue'].sum()

print("Monthly revenues from intraday:")
for month, revenue in intraday_monthly_total_revenue.items():
    print(f"{month[1]}: {revenue}")

print("Monthly avg revenue from intraday:", intraday_monthly_total_revenue.mean())
print("Total yearly revenue from intraday:", intraday_monthly_total_revenue.sum())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_intraday_df['date'] = pd.to_datetime(final_intraday_df['date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_intraday_df['month'] = final_intraday_df['date'].dt.month


Monthly revenues from intraday:
January: 18294262.2301929
February: 8899290.775790937
March: 6917567.797402608
April: 16596334.390676545
May: 7088745.05469615
June: 6336997.883285765
July: 14238318.926761685
August: 17125046.99604459
September: 15208613.227637002
October: 7555783.153742903
November: 10399636.572131302
December: 8511764.747427443
Monthly avg revenue from intraday: 11431030.14631582
Total yearly revenue from intraday: 137172361.75578985


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_intraday_df['month_name'] = final_intraday_df['date'].dt.strftime('%B')  # %B gives full month name


In [74]:
intraday_bids = final_intraday_df["intraday_bidded"].sum()
intraday_bids

138068.101120792

In [75]:
dayahead_bids = dayahead_df["day_ahead_bidded"].sum()
dayahead_bids

97046.8321485227