# How to make new data for unit testing

(Almost) everything is contained in the DataMaker class

In [5]:
import DataHelper
import pandas as pd
import numpy as np

There's three variables to create a new row:

```python
holding_asset_row = {'caldt':year, 'cash':100*pct, 'equity':100*pct, 'bond':100*pct, 'security':100*pct}
daily_returns_ts = pd.Series(daily_returns_series, index=datetime_axis)
fund_mrnstar_row = {'caldt':year, 'lipser_class_name':string}
```

Example:

In [8]:
holding_asset_row = {'caldt':2020, 'cash':50, 'equity':25, 'bond':12.5, 'security':12.5}

In [9]:
daily_returns_ts = pd.Series(np.random.rand(9), index=pd.to_datetime([f"2020-10-0{i}" for i in range(1, 10)]))
daily_returns_ts

2020-10-01    0.632268
2020-10-02    0.805323
2020-10-03    0.754230
2020-10-04    0.836431
2020-10-05    0.737519
2020-10-06    0.317066
2020-10-07    0.589445
2020-10-08    0.425249
2020-10-09    0.948570
dtype: float64

In [10]:
fund_mrnstar_row = {'caldt':2020, 'lipser_class_name':'No one cares'}

In [13]:
holding_asset_row_2 = {'caldt':2020, 'cash':100, 'equity':0, 'bond':0, 'security':0}
daily_returns_ts_2 = pd.Series(np.random.rand(9), index=pd.to_datetime([f"2020-10-0{i}" for i in range(1, 10)]))
fund_mrnstar_row_2 = {'caldt':2020, 'lipser_class_name':'No one cares'}

# There's 3 way to add fake data

## 1. Add one at a time

In [15]:
maker = DataHelper.get_data_maker()

maker.add_fake_fund(holding_asset_row, daily_returns_ts, fund_mrnstar_row)
maker.add_fake_fund(holding_asset_row_2, daily_returns_ts_2, fund_mrnstar_row_2);

## 2. Bulk add by list of 3-tuples

In [16]:
maker = DataHelper.get_data_maker()

maker.bulkadd_fake_fund([
    (holding_asset_row, daily_returns_ts, fund_mrnstar_row),
    (holding_asset_row_2, daily_returns_ts_2, fund_mrnstar_row_2),
])

## 3. (Not important) Builder Design Pattern

In [19]:
maker = DataHelper.get_data_maker()

# maker.add_fake_fund return itself, so we can continue to use .add_fake_fund again and again.
maker.add_fake_fund(holding_asset_row, daily_returns_ts, fund_mrnstar_row)\
    .add_fake_fund(holding_asset_row_2, daily_returns_ts_2, fund_mrnstar_row_2);

# When every fake funds has been added, convert to actual DataCache

In [21]:
preprocessor = DataHelper.get_data_preprocessor()
clustering_year = 2020
cache = maker.convert_to_data_cache(preprocessor, clustering_year)

# Expected results

In [22]:
cache.returns

Unnamed: 0,0,1
2020-10-01,0.632268,0.967424
2020-10-02,0.805323,0.178519
2020-10-03,0.75423,0.58371
2020-10-04,0.836431,0.903183
2020-10-05,0.737519,0.841046
2020-10-06,0.317066,0.202601
2020-10-07,0.589445,0.786422
2020-10-08,0.425249,0.164523
2020-10-09,0.94857,0.958079


In [23]:
cache.cumul_returns

Unnamed: 0,0,1
2020-10-01,1.632268,1.967424
2020-10-02,2.946772,2.318647
2020-10-03,5.169315,3.672064
2020-10-04,9.493091,6.98861
2020-10-05,16.494422,12.866349
2020-10-06,21.724237,15.473083
2020-10-07,34.529482,27.641462
2020-10-08,49.213101,32.189118
2020-10-09,95.895189,63.028839


In [24]:
cache.holding_asset

Unnamed: 0,crsp_fundno,caldt,cash,equity,bond,security
0,0,2020-12-31,50,25,12.5,12.5
1,1,2020-12-31,100,0,0.0,0.0


In [25]:
cache.asset_type

['cash', 'equity', 'bond', 'security']

In [26]:
cache.fund_mrnstar

Unnamed: 0,crsp_fundno,caldt,lipper_class_name,lipser_class_name
0,0,2020-12-31,,No one cares
1,1,2020-12-31,,No one cares


In [27]:
cache.fundno_ticker

{0: 0, 1: 1}