In [4]:
import pandas as pd
import numpy as np
import math

# Methodology
We implement the following methodology to create a uniform measure of forecasting ability:
- __Data setup__: identify universe of assets, get returns for each period per asset;
- __Forecast model__: run model to get model/forecast score for beginning of each period;

----

## Data Setup

*Below we have generated the returns data normally distributed; this is the percentange change of the underlying assets X0000 to X0099*

### Investment periods

Sample periods: based on one rebalance per business day over 1 year.

__Task__: Replace sample. The period length might be different than a day for your model.

__Warning__: We keep things simple by ignoring non-trading days, you __should__ change this.

In [6]:
periods = pd.date_range(start='2018-01-01', end='2018-12-31', freq='B', name='period')
periods

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10',
               '2018-01-11', '2018-01-12',
               ...
               '2018-12-18', '2018-12-19', '2018-12-20', '2018-12-21',
               '2018-12-24', '2018-12-25', '2018-12-26', '2018-12-27',
               '2018-12-28', '2018-12-31'],
              dtype='datetime64[ns]', name='period', length=261, freq='B')

### Investable universe

Sample universe: set of unique identifiers for assets.

__Task__: Replace sample.

In [8]:
universe_size = 100
universe = pd.Series(['X{:04d}'.format(x) for x in range(0, universe_size)])
universe.head()

0    X0000
1    X0001
2    X0002
3    X0003
4    X0004
dtype: object

### Periodic returns of universe

Sample returns: just a randomised variable for now.

__Task__: Replace sample for each period in your model.

---

*__Definition__: Returns <p>
Traditionally simple returns are denoted with a capital R and log returns with a lower-case r.  These are defined as:*

*R<sub>t</sub> = (P<sub>t</sub> – P<sub>t-1</sub>) / P<sub>t-1</sub> = P<sub>t</sub> / P<sub>t-1</sub> – 1*

*r<sub>t</sub> = log(P<sub>t</sub> / P<sub>t-1</sub>) = log(P<sub>t</sub>) – log(P<sub>t-1</sub>)*

*where P<sub>t</sub> is the price of the asset at time t.  We are defining the return from time t-1 to time t.  The log function here is the natural logarithm.*

---

In [11]:
yearly_return = 0.05                     # for a sample 5% yearly growth
period_return = yearly_return / 260.0    # business days
returns = pd.DataFrame(data=[np.random.normal(period_return, 0.01, len(universe)) for p in periods], columns=universe, index=periods)
returns.head()

Unnamed: 0_level_0,X0000,X0001,X0002,X0003,X0004,X0005,X0006,X0007,X0008,X0009,...,X0090,X0091,X0092,X0093,X0094,X0095,X0096,X0097,X0098,X0099
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,0.010908,0.000334,-0.000586,-0.006591,0.007305,-0.003786,0.005338,-0.001945,0.001255,0.013255,...,-0.014357,-0.012908,-0.005885,-0.009846,0.011709,0.00092,0.012995,0.011962,-0.000764,-0.001065
2018-01-02,0.007013,-0.003659,0.003218,-0.010403,0.020972,0.010219,0.000778,0.004748,0.003741,0.00732,...,0.007063,-0.004894,0.004412,0.007874,0.000113,0.009033,0.010397,-0.003984,0.005255,0.020058
2018-01-03,0.004695,-0.004861,0.019964,-0.001419,-0.005983,0.000125,-0.015753,0.013186,0.009251,0.000606,...,-0.002106,-0.005763,0.007291,0.026858,-0.003325,-0.021504,-0.022193,0.018268,-0.007994,0.010453
2018-01-04,-0.015665,-0.002899,0.011327,0.019781,0.004744,0.00289,-0.004013,0.006394,0.022979,0.015575,...,0.02114,-0.017054,0.012061,0.012999,-0.006308,0.007687,0.011646,0.010349,-0.006598,-0.002679
2018-01-05,0.001149,0.00596,-0.013397,0.002911,-0.018057,0.00373,0.007247,-0.012295,0.003293,-0.001851,...,0.00899,0.007926,-0.008188,0.003563,-0.010347,-0.001241,0.003493,0.007976,0.006351,0.000265


### Save Example Data

In [12]:
returns.to_csv('returns_example_data.csv')

---

# Forecast model

### Define forecast provider

Sample provider: assume we can predict returns with random noise.

__Task__: Replace sample function for each period in your model.

__Warning__: This is only an example. Your model __should not__ use returns or any data from the same period but only from previous ones if relevant.

In [13]:
noise_factor = 10000.0
def forecast(period):
    return returns.loc[period].apply(lambda ret: ret + noise_factor * period_return * np.random.normal(0, 1))

In [14]:
forecast(periods[0]).head()

X0000    0.781161
X0001    0.265538
X0002    0.890048
X0003    1.203040
X0004   -1.335270
Name: 2018-01-01 00:00:00, dtype: float64

### First pass check

Correlation against returns... anything positive is good! The closer to 1.0 the better!

In [23]:
forecasts = pd.DataFrame(data=[forecast(p) for p in periods], index=periods)
forecasts.corrwith(returns).mean()

0.005753698140956397

In [24]:
forecasts.head()

Unnamed: 0_level_0,X0000,X0001,X0002,X0003,X0004,X0005,X0006,X0007,X0008,X0009,...,X0090,X0091,X0092,X0093,X0094,X0095,X0096,X0097,X0098,X0099
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,-2.107066,0.143936,-2.454639,-0.585614,-1.370174,-0.757645,-2.195033,0.244954,1.977399,1.771116,...,-0.743485,0.348614,-0.431445,4.395474,1.325933,-3.754003,-1.197294,-1.719758,-0.176842,-2.394018
2018-01-02,2.617617,-0.924026,0.652346,2.629858,0.304073,-5.356134,0.52606,0.46786,1.664121,0.550999,...,2.414521,-0.107721,-0.661561,-2.636892,-1.109693,0.359117,-1.182784,-1.648104,0.7139,1.753632
2018-01-03,-2.346082,-1.027965,1.32511,3.547902,3.533142,-0.010182,-3.707546,-0.376756,-0.552376,-1.44166,...,0.563521,-0.202065,3.103828,-2.702157,-1.454736,0.858888,-1.388349,2.304567,-2.244748,0.863345
2018-01-04,1.345747,2.661427,2.02867,-0.575971,-2.096493,2.94054,3.307477,1.562727,0.999004,0.419512,...,-2.130224,1.784122,-0.804171,0.709902,0.771744,-1.388993,0.913326,-0.73308,-3.13946,-1.182151
2018-01-05,-1.819045,1.636994,-0.313652,1.11526,0.154416,-1.318836,-0.587513,-1.22716,0.57919,2.62204,...,0.140813,0.223872,0.813251,-1.462279,0.797072,-1.886018,2.467804,1.277751,4.153122,1.002937


In [25]:
forecasts.to_csv('forecast_example_data.csv')