TODO:
- Figure out how to properly implement rebalancing
- Add user input reading to determine parameters

## Imports

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import datetime
import seaborn as sns
import math

### Parameters

In [3]:
# Tickers of ETFS and their portfolio weighings
products = {
          "SPY":0.6,    #SP500
          "QQQ":0.0,   #NASDAQ100
          "IWM":0.0,    #Russel2000
           #Bonds
          "BIL":0.0,     #0-3 month 
          "SHY":0.4,    #1-3 yr 
          "IEF":0.0,      #7-10yr
          "TLT":0.0       #20+yr
}
# Start and end dates for the test 
start_date = '2010-01-01'
end_date = '2020-01-01'

# Deposit frequency and amount
 # ["daily","weekly","monthly","quarterly","yearly"] -> [1,7,30,120,365]
frequency = 7  # Deposit frequency 
deposit = 100 # Deposit amount
  
# Rebalancing?
rebalancing = False
rebalancing_frequency = 120

### Function to build dataframe with required parameters

In [4]:
def create_df(products,start_date,end_date,frequency,deposit,rebalancing,rebalancing_frequency):
    # Get data from yahoo finance
    tickers_use = sorted([key for key,value in products.items() if value != 0])
    #dates = pd.date_range(start_date, end_date,frequency='B')

    data = yf.download(tickers_use, start=start_date, end=end_date)
    df = pd.DataFrame(data['Adj Close'])
    df.reset_index(inplace=True)

    # Add Transact, Deposit, Rebalancing Flags
    df['Deposit'] = 0
    df['Transact'] = False
    df['Rebalance'] = False


    for i in range(0,len(df),frequency):
        df.at[i,'Transact'] = True
        df.at[i,'Deposit'] = deposit

    for i in range(rebalancing_frequency,len(df),rebalancing_frequency):
        df.at[i,'Rebalance'] = True


    #Calculate amount of each instrument bought on given date
    units_bought = [f'{ticker}_units_bought' for ticker in tickers_use ]
    units_cum = [f'{ticker}_units_cum' for ticker in tickers_use ]
    units_value = [f'{ticker}_value' for ticker in tickers_use ]

    for i in range(len(units_bought)): # For each product
        p = tickers_use[i]
        weight = products[p] 
        df[units_bought[i]] =  df.Deposit * weight / df[tickers_use[i]]

        df[units_cum[i]] = df[units_bought[i]].cumsum()

        weight = products[tickers_use[i]] 
        df[units_value[i]] =  df[tickers_use[i]] * df[units_cum[i]]
        
        #if df['Rebalance'][i] == True: ## Commence rebalancing 

    #Portfolio value and cumulative deposits
    df['Portfolio_Value'] = df[units_value].sum(axis=1)
    df['Deposits_cum'] = df.Deposit.cumsum()
    df['OverallPnL%'] = (df['Portfolio_Value'] - df['Deposits_cum'])/ df['Deposits_cum']
    #Figure out smart way to do this
    df['DoDPnL%'] = 0
    df['DoDLogReturn'] = 0
    for i in range(1,len(df)):
        df.at[i,'DoDPnL%'] = (df.at[i,'Portfolio_Value'] - df.at[i-1,'Portfolio_Value']-df.at[i,'Deposit'])/df.at[i-1,'Portfolio_Value']
        df['DoDLogReturn'] =  np.log((df.at[i,'Portfolio_Value'] -df.at[i,'Deposit']) / (df.at[i-1,'Portfolio_Value']-df.at[i,'Deposit']))
    
    return df
    

In [5]:
df = create_df(products,start_date,end_date,frequency,deposit,rebalancing,rebalancing_frequency)

[*********************100%***********************]  2 of 2 completed


In [6]:
df.tail()

Unnamed: 0,Date,SHY,SPY,Deposit,Transact,Rebalance,SHY_units_bought,SHY_units_cum,SHY_value,SPY_units_bought,SPY_units_cum,SPY_value,Portfolio_Value,Deposits_cum,OverallPnL%,DoDPnL%,DoDLogReturn
2511,2019-12-24,82.794525,307.631134,0,False,False,0.0,183.802342,15217.827627,0.0,142.526135,43845.476665,59063.304292,35900,0.645217,5.4e-05,0.00177
2512,2019-12-26,82.823921,309.268768,0,False,False,0.0,183.802342,15223.230691,0.0,142.526135,44078.882349,59302.113041,35900,0.651869,0.004043,0.00177
2513,2019-12-27,82.882713,309.192047,100,True,False,0.48261,184.284952,15274.036819,0.194054,142.72019,44127.947575,59401.984394,36000,0.650055,-2e-06,0.00177
2514,2019-12-30,82.912071,307.487457,0,False,False,0.0,184.284952,15279.44704,0.0,142.72019,43884.668189,59164.115229,36000,0.643448,-0.004004,0.00177
2515,2019-12-31,82.902283,308.234436,0,False,False,0.0,184.284952,15277.643165,0.0,142.72019,43991.277139,59268.920304,36000,0.646359,0.001771,0.00177


### Calculate Performance Metrics

Using Portfolio log returns

In [7]:
r = df.DoDLogReturn.to_numpy()
std = np.sqrt(np.sum(np.square(r)))
std /= np.sqrt(251)
std

0.0056034824620875565

#### Portfolio Volatility as weighted sum of individual securities
- This assumes the portfolio weights hold up

Plotting and stuff