## LMP predictions

This notebook shows how to access, navigate and analyze data from backtest predictions for the Day Ahead trading in MISO market. Predictions are calculated with model trained on data excluding the period for which predictions are given.

The few-line trading algorithm included at the end is entirely based on the DA, RT and trading probability predictions. There are several options in the code comments, allowing to turn off the usage of each quantity. You can also play with the parameters, e.g. change the probability threshold value and verify stability of the algorithm.

Contact us for any further questions!

contact@crystalball24.com

In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt
import datetime
import numpy as np
import pandas as pd
import requests
import json
import io

### Read data from server

In [2]:
# your access configuration:
config_file = 'access_config.json'

with open(config_file, 'r') as f: config = json.load(f)
addr = config['address']
folder = config['bt_folder']

# backtest data file:
filename = 'miso_g9_2018_bt.csv'  # 10/2017 - 10/2018
#filename = 'miso_g9_2017_bt.csv' # 10/2016 - 10/2017

url_crystalball24 = 'http://{0}/{1}/{2}'.format(addr, folder, filename)
try:
    print('Downloading data...')
    r = requests.get(url_crystalball24, timeout=20)
    with io.StringIO(r.text) as f:
        bt = pd.read_csv(f)
    print('...done.')
except:
    print('Problem with reading from Crystalball24.com')
    
print(bt.columns.values)

Downloading data...
...done.
['date' 'hour' 'node' 'true_dalmp' 'true_spread' 'pred_dalmp' 'pred_rtlmp'
 'probability']


#### Reshape data for convenience, estimate predictions threshold

In [3]:
n_nodes = bt.node.nunique()
n_days = int(bt.shape[0] / n_nodes / 24)
print('Nodes:', n_nodes, ', days:', n_days)

dates = bt.date.values.reshape((n_days, 24, n_nodes))
hours = bt.hour.values.reshape((n_days, 24, n_nodes))
node = bt.node.values.reshape((n_days, 24, n_nodes))
true_dalmp = bt.true_dalmp.values.reshape((n_days, 24, n_nodes))
true_spread = bt.true_spread.values.reshape((n_days, 24, n_nodes))
pred_dalmp = bt.pred_dalmp.values.reshape((n_days, 24, n_nodes))
pred_rtlmp = bt.pred_rtlmp.values.reshape((n_days, 24, n_nodes))
probability = bt.probability.values.reshape((n_days, 24, n_nodes))

pmax = np.percentile(bt.probability.values, 99.8)
phigh = np.percentile(bt.probability.values, 94.0)
thr = 0.5 * (0.7 * pmax + phigh)
print('Use threshold', np.round(thr,2))

Nodes: 32 , days: 372
Use threshold 0.68


### Inspect one day / one node predictions

In [4]:
day = 249
n = 20

plt.figure(1, figsize=(7,7.5))

plt.subplot(211)
plt.title('DALMP and RTLMP predictions for node: {0}, date: {1}'.format(node[day,0,n], dates[day,0,n]), fontsize=10)
plt.plot(pred_dalmp[day,:,n], '-', color='red', linewidth=0.5, label='predicted DALMP')
plt.plot(true_dalmp[day,:,n], '.', color='red', markersize=4, label='actual DALMP')
plt.plot(pred_rtlmp[day,:,n], '-', color='green', linewidth=0.5, label='predicted RTLMP')
plt.plot(true_dalmp[day,:,n]+true_spread[day,:,0], '.', color='green', markersize=4, label='actual RTLMP')
plt.grid(color='grey', linestyle='-', linewidth=0.3)
plt.xlabel('hour')
plt.ylabel('price [$]')
plt.legend(loc='upper left')

plt.subplot(212)
plt.title('Prediction for making offers', fontsize=10)
plt.plot(probability[day,:,n], '-', color='blue', linewidth=0.75, label='probability')
plt.axhline(thr, color='red', linestyle='dotted', linewidth=1, label='threshold')
plt.grid(color='grey', linestyle='-', linewidth=0.3)
plt.xlabel('hour')
plt.ylabel('probability')
plt.ylim(-0.2,1)
plt.legend()

plt.tight_layout(pad=0.4, w_pad=0.75, h_pad=1.0)
plt.show()

<IPython.core.display.Javascript object>

### Examine all days / all nodes

In [5]:
true_rt = bt.true_dalmp.values+bt.true_spread.values
rt_pred_corr = np.corrcoef(true_rt, bt.pred_rtlmp.values)[0,1]
da_pred_corr = np.corrcoef(bt.true_dalmp.values, bt.pred_dalmp.values)[0,1]

plt.figure(2, figsize=(8.5,8))
ax = plt.subplot(221)
plt.title('RTLMP, c='+str(np.round(rt_pred_corr, 3)), fontsize=10)
plt.plot(true_rt, bt.pred_rtlmp.values, ',', color=(0.5, 0.5, 0.9))
plt.xlim(-50,200)
plt.ylim(-50,200)
plt.plot(ax.get_xlim(), ax.get_ylim(), color='black', alpha=0.7, linewidth=0.5, ls="--")
plt.xlabel('actual RTLMP')
plt.ylabel('prediction')

ax = plt.subplot(222)
plt.title('DALMP, c='+str(np.round(da_pred_corr, 3)), fontsize=10)
plt.plot(bt.true_dalmp.values, bt.pred_dalmp.values, ',', color=(0, 0.5, 0))
plt.xlim(-50,200)
plt.ylim(-50,200)
plt.plot(ax.get_xlim(), ax.get_ylim(), color='black', alpha=0.7, linewidth=0.5, ls="--")
plt.xlabel('actual DALMP')
plt.ylabel('prediction')

plt.subplot(223)
plt.title('Offer probability vs spread', fontsize=10)
plt.plot(bt.true_spread.values, bt.probability.values, ',', markersize=0.1, color=(0,0,0.5))
plt.grid(color='grey', linestyle='-', linewidth=0.3)
plt.xlim(-30,30)
plt.ylim(-1,1)
plt.xlabel('actual spread')
plt.ylabel('offer probability')

plt.subplot(224)
plt.title('Offer probability distribution', fontsize=10)
plt.hist(bt.probability.values, bins=300, range=(-1,1))
plt.axvline(thr, color='red', linestyle='dotted', linewidth=0.8)
plt.xlabel('offer probability')

plt.tight_layout(pad=0.4, w_pad=0.75, h_pad=1.0)
plt.show()

<IPython.core.display.Javascript object>

### Example trading parameters

In [6]:
# Initial capital, let's say $1M:
capital = 1e+6
# Safe, 10% exposure, so target using $100k each day:
exposure = 0.1

daily_volume = exposure * capital
max_virtual = 0.02 * daily_volume
threshold = thr # to ensure stability of result try also: thr +/- 0.05
offer_fee = 0.5
price_margin = 2
trade = -1 # offers gain on negative spread (use +1 for bids)

### Run backtest

In [7]:
pnl_daily = np.zeros(n_days, dtype=np.float32)
usd_daily = np.zeros(n_days, dtype=np.float32)
mw_daily = np.zeros(n_days, dtype=np.float32)
op_day = []

print('date\t\t desired\t executed\t total MW\t total $\t PNL\t\t [%]')
for d in range(n_days):
    op_day.append(dates[d,0,0])
    p = probability[d]
    da = pred_dalmp[d]
    rt = pred_rtlmp[d]
    
    tda = true_dalmp[d]
    tspread = true_spread[d]
    
    # just make offer on every hour/node:
    #go_idx = p > -1
    
    # pre-select hour/node with the confidence prediction
    go_idx = p > threshold
    
    go_count = go_idx.sum()
    if go_count == 0: continue
        
    # price taker: no use of RT prediction:
    #exec_idx = go_idx
        
    # use RT prediction for a simple price managment
    exec_idx = go_idx & (tda > rt + price_margin)
    
    # 0.1MW on each selected hour/node: no use of DA prediction
    #virtual_mw = 0.1 * da[exec_idx]
    
    # keep constant MW **value** each day: use DA prediction for a simple risk managment
    virtual_usd = min(daily_volume / go_count, max_virtual)
    virtual_mw = virtual_usd/da[exec_idx]

    pnl_daily[d] = trade * ((tspread[exec_idx] - offer_fee) * virtual_mw).sum()
    usd_daily[d] = (virtual_mw * tda[exec_idx] + offer_fee).sum()
    mw_daily[d] = (virtual_usd/da[exec_idx]).sum()
    
    print(op_day[d], '\t', go_count, '\t\t', exec_idx.sum(),
          '\t\t', np.round(mw_daily[d],1), '   \t', np.round(usd_daily[d],1),
          '   \t', np.round(pnl_daily[d],1), '   \t', np.round(100*pnl_daily[d]/capital,2))

date		 desired	 executed	 total MW	 total $	 PNL		 [%]
2017-10-05 	 21 		 21 		 798.7    	 41641.3    	 11622.7    	 1.16
2017-10-06 	 44 		 43 		 1581.1    	 79962.9    	 17692.1    	 1.77
2017-10-07 	 152 		 150 		 2029.8    	 93154.9    	 19408.2    	 1.94
2017-10-09 	 4 		 4 		 103.6    	 6274.9    	 -1914.3    	 -0.19
2017-10-10 	 114 		 114 		 1816.3    	 109859.6    	 47336.3    	 4.73
2017-10-11 	 5 		 5 		 233.9    	 13058.9    	 4980.9    	 0.5
2017-10-12 	 1 		 1 		 36.5    	 2001.5    	 531.2    	 0.05
2017-10-13 	 34 		 33 		 1350.4    	 83572.3    	 16938.6    	 1.69
2017-10-14 	 11 		 11 		 459.1    	 32971.4    	 6954.7    	 0.7
2017-10-15 	 9 		 9 		 360.6    	 20040.8    	 8323.1    	 0.83
2017-10-19 	 24 		 20 		 1043.7    	 36875.7    	 8386.2    	 0.84
2017-10-20 	 6 		 6 		 323.6    	 11950.9    	 4154.8    	 0.42
2017-10-23 	 98 		 98 		 2862.0    	 91615.1    	 22808.0    	 2.28
2017-10-27 	 160 		 158 		 2916.4    	 91955.8    	 16766.2    	 1.68
2017-11-05 	 1

In [8]:
tot_usd = np.round(np.cumsum(pnl_daily)[-1] * 1e-6,3)
tot_pct = np.round(100 * np.cumsum(pnl_daily)[-1] / capital,1)
max_loss_usd = np.round(np.amin(pnl_daily) * 1e-6,3)
max_loss_pct = np.round(100 * np.amin(pnl_daily) / capital,1)

days = np.arange(len(op_day))
ticks = np.arange(len(op_day), step=90)
labels = [op_day[d] for d in ticks]

plt.figure(3, figsize=(8,7))

plt.subplot(211)
plt.title('total: {0}M ({1}%),  max loss: {2}M ({3}%)'.format(tot_usd,tot_pct,max_loss_usd,max_loss_pct), fontsize=10)
plt.plot(1e-6 * np.cumsum(pnl_daily), '-', color='blue', linewidth=0.5, label='net PNL')
plt.plot(1e-6 * usd_daily, '-', color='red', linewidth=0.5, label='volume')
plt.grid(color='grey', linestyle='-', linewidth=0.3)
plt.xticks(ticks, labels)
plt.ylabel('$M')
plt.legend()

plt.subplot(212)
plt.title('GW daily', fontsize=10)
plt.plot(1e-3*mw_daily, '-', color='black', alpha=0.5, linewidth=0.5, label='volume')
plt.grid(color='grey', linestyle='-', linewidth=0.3)
plt.xticks(ticks, labels)
plt.ylabel('GW')
plt.ylim(0,5)
plt.legend()

plt.tight_layout(pad=0.4, w_pad=0.75, h_pad=1.0)
plt.show()

<IPython.core.display.Javascript object>