# Backtesting Strategy 1 - Use and throw monthly

In [2]:
import numpy as np
import pandas as pd 
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
import copy

### KPI Metrics

These are tweaked and slightly different from Performance measurement.ipynb
Best Strategy after a lot to trial and error is to write CAGR, volatility nd other kpi calculations related to your strategy. Usign generic functions at the present time will cause a lot of error.

In [3]:
def CAGR(DF, col, normalizeTime):
    df = DF.copy()
    cumret = (1+df[col]).cumprod()
    # print(cumret)
    n = len(df)/normalizeTime 
    CAGR = (cumret.iloc[-1])**(1/n) - 1
    return CAGR

In [4]:
def volatility(DF, col, normalizeTime):
    df = DF.copy()
    v = df[col].std() * np.sqrt(normalizeTime)
    return v

In [5]:
def sharpe(stock, col, normalizeTime, rf = 0.03):
    rp = CAGR(stock, col, normalizeTime)
    σ_p = volatility(stock, col, normalizeTime) 
    return (rp - rf)/σ_p  # x - μ/σ style 

In [6]:
# for sortino, we need to remove positive volatility 
# we use regular cagr for sortino. onpy σ changes

def sortino(df, col,normalizeTime, rf = 0.03):
    returns = df[col].pct_change()
    returns_neg = np.where(returns < 0 ,returns, 0)
    returns_neg = pd.Series(returns_neg[returns_neg!=0]) # no need to convert to nan values 
    rp = CAGR(df, col, normalizeTime) 
    σ_p = returns_neg.std() * np.sqrt(normalizeTime)
    return (rp - rf)/σ_p

In [7]:
def max_drawdown(df, col):
    # returns = df[col].pct_change()
    cum_returns = (returns + 1).cumprod()
    cum_roll_max = cum_returns.cummax()
    drawdown = cum_roll_max - cum_returns # wow why does this work?
    drawdown /= cum_roll_max
    return drawdown.max()

In [8]:
def calmar(DF, col, normalizeTime):
    return CAGR(DF, col, normalizeTime)/max_drawdown(DF, col)

---

## Get Data from yfinance 

In [9]:
# this is present day dow30. How about 5 years ago? 
# assume that this is the stock that was there 5 years ago. Not true 

### Survivorship bias ### 

dow30 = ["AMZN", "AXP", "AMGN",
        "AAPL","BA","CAT",
        "CSCO", "GS", "HD", "CVX",
        "HON","IBM","INTC","JNJ","KO",
        "JPM","MCD","MMM","MRK","MSFT",
        "NKE","PG","TRV","UNH","CRM",
        "VZ","V","WMT","DIS","DOW"]
tickers = dow30
print(len(dow30))

30


In [11]:
drop = []
ohlcv_mon = {}
attempt = 0

while len(tickers)>0 and attempt <= 5:
    tickers = [j for j in tickers if j not in drop]
    for i, ticker in enumerate(tickers):
        try:
            ohlcv_mon[ticker] = yf.download(tickers = ticker,
                                            start = dt.datetime.today() - dt.timedelta(365),   #5*365),
                                            end = dt.datetime.today())
            ohlcv_mon[ticker].dropna(inplace = True)
            drop.append(ticker)
        except:
            print(f" Failed to fetch data for ticker {ticker}. retrying ...")
            continue 
    attempt+=1

tickers = ohlcv_mon.keys()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [12]:
test_stock = "MSFT"
ohlcv_mon[test_stock].describe() # try 

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,250.0,250.0,250.0,250.0,250.0,250.0
mean,374.03552,377.044841,370.76676,374.08772,372.660456,23235860.0
std,38.445272,38.393518,38.329401,38.614518,39.260346,9127292.0
min,310.98999,314.299988,309.450012,312.140015,310.372955,10176600.0
25%,334.419991,337.474998,331.247513,334.344994,332.074242,17708350.0
50%,373.644989,375.944992,370.649994,374.375,373.011154,21183700.0
75%,410.475006,413.709991,406.644997,410.490005,409.691742,26181680.0
max,440.850006,443.399994,439.369995,442.570007,442.570007,78478200.0


In [13]:
ohlcv = copy.deepcopy(ohlcv_mon)

---

# Backtesting Start 

### Calculate Monthly returns

It is a good habit to use print which ticker we are using and parsing. 

In [14]:
monthly_returns = pd.DataFrame()
for ticker in tickers:
    print("Ticker : {}".format(ticker))
    ohlcv[ticker]["mon_ret"] = ohlcv[ticker]["Adj Close"].pct_change()
    monthly_returns[ticker] = ohlcv[ticker]["mon_ret"]
monthly_returns.dropna(inplace = True)

Ticker : AMZN
Ticker : AXP
Ticker : AMGN
Ticker : AAPL
Ticker : BA
Ticker : CAT
Ticker : CSCO
Ticker : GS
Ticker : HD
Ticker : CVX
Ticker : HON
Ticker : IBM
Ticker : INTC
Ticker : JNJ
Ticker : KO
Ticker : JPM
Ticker : MCD
Ticker : MMM
Ticker : MRK
Ticker : MSFT
Ticker : NKE
Ticker : PG
Ticker : TRV
Ticker : UNH
Ticker : CRM
Ticker : VZ
Ticker : V
Ticker : WMT
Ticker : DIS
Ticker : DOW


In [15]:
monthly_returns

Unnamed: 0_level_0,AMZN,AXP,AMGN,AAPL,BA,CAT,CSCO,GS,HD,CVX,...,NKE,PG,TRV,UNH,CRM,VZ,V,WMT,DIS,DOW
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-21,-0.007553,-0.008795,-0.006781,-0.005675,-0.001366,0.001368,-0.013579,-0.017084,-0.002355,0.007809,...,0.004017,0.008639,-0.001538,0.015680,-0.034014,0.000559,0.003444,0.001946,-0.012368,-0.000770
2023-06-22,0.042618,0.002025,0.011540,0.016525,-0.030507,-0.017144,0.005310,-0.016889,0.001596,-0.014463,...,0.004365,0.003413,-0.001597,0.004824,0.017654,0.000558,0.005017,0.008352,-0.001692,-0.009445
2023-06-23,-0.006300,-0.001129,-0.011103,-0.001711,-0.000973,-0.012219,-0.015454,-0.015239,-0.001228,-0.008451,...,-0.008510,-0.009936,-0.007371,-0.004279,-0.015003,-0.009484,0.005079,-0.001862,-0.004407,-0.000584
2023-06-26,-0.015464,-0.005712,-0.010480,-0.007553,0.000828,0.010706,-0.002384,-0.007467,0.023536,0.017575,...,0.020270,0.001010,-0.007656,0.004319,-0.012994,0.016897,-0.014158,-0.002637,0.006810,0.011487
2023-06-27,0.014529,0.006463,-0.009433,0.015059,0.018727,0.015784,0.012946,0.003490,0.019000,-0.003117,...,0.016735,0.009488,0.001160,0.007306,0.007041,0.012185,0.004596,-0.003612,0.004059,0.011935
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-10,0.014976,-0.000989,-0.000984,-0.019148,-0.000631,0.002037,0.000218,-0.002990,0.016910,0.004229,...,-0.005697,0.001736,0.005191,0.008784,-0.000041,-0.012946,-0.013026,0.016393,0.011818,0.005742
2024-06-11,0.000909,-0.034030,-0.013192,0.072649,-0.024300,-0.006978,-0.001745,-0.020461,0.009502,-0.001595,...,-0.001354,0.000777,-0.016854,0.002465,-0.003515,0.000247,-0.001345,-0.003435,-0.018201,0.001784
2024-06-12,-0.001816,-0.002182,0.001264,0.028578,-0.015256,0.004338,-0.003277,0.009971,0.025080,-0.014506,...,-0.022843,-0.013733,-0.010267,-0.006348,-0.021536,-0.016081,-0.015837,-0.006294,-0.000694,0.001781
2024-06-13,-0.016373,-0.008257,-0.008569,0.005491,-0.010784,-0.007210,-0.002192,-0.005237,0.010868,-0.008948,...,0.005337,0.007446,-0.000917,0.008579,-0.028711,0.000251,0.003218,0.005881,-0.007044,-0.003378


In [16]:
def pflio(DF, m = 6, drop = 3):

    df = DF.copy()
    portfolio = []
    returns = [0] 
    for i in range(len(df)):
        sorted_stocks = df.iloc[i,:].sort_values(ascending = False).index.values.tolist()
        pick = m - len(portfolio) # m - drop
        good_stocks = sorted_stocks[:pick]
        if(len(portfolio) == 0):
            portfolio = good_stocks
        else:
            sorted_portfolio = df[portfolio].iloc[1,:].sort_values(ascending = True).index.values.tolist()
            portfolio = sorted_portfolio[:drop] + good_stocks[:drop]
            returns.append(df[portfolio].iloc[i,:].mean())
            # print(*portfolio, ':', returns[-1])
        
    return pd.DataFrame(returns, columns = ["mon_ret"])
    

In [17]:
returns = pflio(monthly_returns, 6, 3)
print(CAGR(returns, "mon_ret", 12))
print(volatility(returns, "mon_ret", 12))
sharpe(returns, "mon_ret", 12)

0.058911585771860064
0.057868078496774784


0.4996119885589673

# Intraday trading strategies - Strategy 2

Intraday vs long term trading strategies exist. Choose your poison. Also, we can use these strategies to design long term strategies... 

## Resistance Breakout level 
1. price of the stock has breached a resistance price. We may define an upper and lower bound for the stock. These can be called resistance line and support line respectively. Bouncing b/w resistance and support lines. Stock will most likely go on an uptrend when resistance is breached. 
2. We need to choose high volume high activity stocks.
3. 1. Pre market movers - look at those.
4. Define breakout rule
5. define stop losses/exit strategy

alpha_vantage library for intraday - used

## Get Data from Alpha Vantage

In [8]:
from alpha_vantage.timeseries import TimeSeries
import pandas as pd 
import time

In [19]:
key_path = "/Users/achu/Documents/Quant/alpha_vantage_api-key.txt"
with open(key_path) as f:
    key = f.readlines()[1][:-1] # 2nd line contains the keypath, remove '\n'

In [None]:
ohlcv_intraday = copy.deepcopy(ohlcv_data)
tech_stocks = all_tickers

for ticker in ohlcv_intraday:
    print("Ticker : ", ticker)
    ohlcv_intraday[ticker].columns = ["Open","Low","High","Close","Volume"]
    ohlcv_intraday[ticker] = ohlcv_intraday[ticker].between_time("09:35", "16:00")

In [None]:
ohlcv_intraday["AAPL"]

In [None]:
class Resistance:
    
    def ATR(DF, n):
        " function to calculate true range and average true range "
        df = DF.copy()
        h_l = abs(df["High"] - df["Low"])
        h_pc = abs(df["High"] - df["Close"].shift(1))
        l_pc = abs(df["Low"] - df["Close"].shift(1))
        true_range = pd.DataFrame([h_l, h_pc, l_pc]).max(axis = 1, skipna = False)
        atr = true_range.rolling(n).mean()
        return atr
        
    def CAGR(DF, timescale):
        "function to calculate the cumulative annual growth rate of a trading strategy"
        df = DF.copy()
        cum_return = (1+df["ret"]).cumprod()
        n = len(df)/timescale 
        cagr = (cum_return.tolist()[-1])**(1/n) - 1
        return cagr 

    def Volatility(DF, timescale):
        "function to calculate annualized volatility "
        df = DF.copy()
        vol = df["ret"].std() * np.sqrt(timescale)
        return vol 

    def Sharpe(DF, timescale, rf = 0.03):
        "function to calculate sharpe ratio"
        df = DF.copy()
        vol = df["ret"].std() * np.sqrt(timescale)
        cagr = Resistance.CAGR(DF, timescale)
        return cagr - rf/vol

    def max_dd(DF, timescale):
        "maximum drawdown"
        df = DF.copy()
        cum_return = (1+df["ret"]).cumprod()
        cum_roll_max = cum_return.cummax()
        drawdown = cum_roll_max - cum_return 
        return drawdown.max() 


In [None]:
# initialize all data structures 

# params 
intraday_timescale = 252*78
ticker_signal = dict()
ticker_ret = dict()

for ticker, data in ohlcv_intraday.items():
    print("Calculating rolling max price and ATR for ticker : ", ticker)
    data["ATR"] = Resistance.ATR(data, 20) # 20 day moving average 
    data["roll_max_cp"] = data["High"].rolling(20).max()
    data["roll_min_cp"] = data["Low"].rolling(20).min()
    data["roll_max_vol"] = data.Volume.rolling(20).max() # try syntax
    
    ticker_signal[ticker] = ""
    ticker_ret[ticker] = [0]


In [None]:
ticker_signal

### Identifying the signals and calculating the daily returns (stop loss factored in) 

In [None]:
for ticker, stock_data in ohlcv_intraday.items():
    print("Calculating returns for ", ticker)
    nrows = len(stock_data)
    for i in range(1,nrows):
        prev_row = stock_data.iloc[i-1]
        row = stock_data.iloc[i]
        
        if ticker_signal[ticker] == "" :
            ticker_ret[ticker].append(0)
            if row["High"] >= row["roll_max_cp"] and row["Volume"] >= row["roll_max_vol"]:
                ticker_signal[ticker] = "Buy"
            elif row["Low"] <= row["roll_min_cp"] and row["Volume"] > 1.5*prev_row["roll_max_vol"]:
                ticker_signal[ticker] = "Sell"

        elif ticker_signal[ticker] == "Buy":
            if row["Low"] < row["Close"] - row["ATR"]:
                ticker_signal[ticker] = ""
                ticker_ret[ticker].append(row["Close"]/prev_row["Close"] - 1)
            elif row["Low"] <= row["roll_min_cp"] and row["Volume"] > 1.5*prev_row["roll_max_vol"]:
                ticker_signal[ticker] = "Sell"
                ticker_ret[ticker].append(row["Close"]/prev_row["Close"] - 1)
            else:
                ticker_ret[ticker].append(row["Close"]/prev_row["Close"] - 1)

        elif ticker_signal[ticker] == "Sell":
            if row["High"] > prev_row["Close"] + prev_row["ATR"]:
                ticker_signal = ""
                ticker_ret[ticker].append(row["Close"]/prev_row["Close"] - 1)
            elif row["High"] > row["roll_max_cp"] and roll["Volume"] > 1.5 * prev_row["Volume"]:
                ticker_signal[ticker] = "Buy"
                ticker_ret[ticker].append(row["Close"]/prev_row["Close"] - 1)
            else:
                ticker_ret[ticker].append(row["Close"]/prev_row["Close"] - 1)

    ohlcv_intraday[ticker]["ret"] = np.array(ticker_ret[ticker])

In [None]:
# calculate strategy using KPI

strategy_df = pd.DataFrame()
for ticker in ohlcv_intraday:
    strategy_df[ticker] = ohlcv_intraday[ticker]["ret"]
strategy_df["ret"] = strategy_df.mean(axis =  1)

Resistance.CAGR(strategy_df,  intraday_timescale)
Resistance.Sharpe(strategy_df, intraday_timescale, rf = 0.025)
Resistance.max_dd(strategy_df, intraday_timescale)

In [None]:
(1+strategy_df["ret"]).cumprod().plot()

On kichu's birthday, the strategy does exponetially good return, otherwise it is a very bad, losing strategy. 

# BackTesting Strategy III - Renko and OBV 

OBV : On Balance Volume - Volume signed on the daily return. If daily return is negative, volume becomes negative. 
Use renko from stocktrends 


In [2]:
from stocktrends import Renko
from alpha_vantage.timeseries import TimeSeries
import statsmodels.api as sm 

In [3]:
class Robv:
    "Renko - OBV strategy"

    def ATR(DF, n, timescale):
        df = DF.copy()
        h_l = abs(df["High"] - df["Low"])
        h_pc = abs(df["High"] - df["Close"].shift(1))
        l_pc = abs(df["Low"] - df["Close"].shift(1))
        tr = pd.DataFrame([h_l, h_pc, l_pc]).max(axis = 1)
        atr = tr.rolling(n).mean()
        return atr 
        
    def CAGR(DF, timescale):
        df = DF.copy()
        cum_ret = (1+ df["ret"]).cumprod()
        n = len(df)/timescale
        cagr = (cum_ret.tolist()[-1])**(1/n) - 1
        return cagr 

    def volatility(DF, timescale):
        df = DF.copy()
        v = df["ret"].std() * np.sqrt(timescale)
        return v 

    def sharpe(DF, timescale, rf):
        cagr = Robv.CAGR(DF, timescale)
        v = Robv.volatility(DF, timescale)
        return (cage - rf)/v

    def max_dd(DF, timescale):
        "max drawdown"
        cum_ret = (1+df["ret"]).cumprod()
        cum_max = cum_ret.cummax()
        dd = (cum_max - cum_ret)/cum_max
        return dd.max() 

    def slope(ser, n):
        "function to calculate the slope of n consecutive slopes on a plot "
        slopes = [0]*range(n-1) # why?
        for i in range(1, len(ser) + 1):
            y = ser[i-n: i]
            x = np.arange(n)
            y_scaled = (y - y.min())/(y.max() - y.min())
            x_scaled = (x - x.min())/(x.max() - x.min())
            model = sm.OLS(y_scaled, x_scaled)  # REMEMBER THIS - SLOPE OF A LINE from statsmodels  
            results = model.fit() # Same is used for machine learning 
            slopes.append(result.params[-1])
        slopes_angle = np.rad2deg(np.arctan(np.array(slopes)))
        return slopes_angle

    def renko_DF(DF): # HARDHARDHARD 
        "function to convert ohlc data into renko bricks"
        df = DF.copy()
        df.reset_index(inplace = True)
        df = df.iloc[:,list(range(6))]
        df.columns = ["date","open","high","low","close","volume"]
        df2 = Renko(df)
        df2.brick_size = max(0.5, round(ATR(df, 120, 252*78)[-1],
                                       0))
        renko_df = df2.get_bricks()
        renko_df["bar_num"] = np.where(renko_df["uptrend"] == True, 1,
                                      np.where(renko_df["uptrend"] == False, -1, 0)
                                      )

        # cumulative sum?
        for i in range(1, len(renko_df["bar_num"])):
            if renko_df["bar_num"][i] > 0 and renko_df["bar_num"][i - 1] > 0: 
                renko_df["bar_num"][i] += renko_df["bar_num"][i-1]
            elif renko_df["bar_num"][i] < 0 and renko_df["bar_num"][i] < 0 : 
                renko_df["bar_num"][i] += renko_df["bar_num"][i-1]
        renko_df.drop_duplicates(subset = "date", keep = "last", inplace = True)
        return renko_df 

    def OBV(DF):
        "function to calculate on balance volume "
        direction = np.where(df["ret"] > 0, 1, -1)
        direction[0] = 0 # why??
        obv = (direction * df["volume"]).cumsum()
        return obv 
        

In [4]:
tickers = ["MSFT","AAPL","META","AMZN","CSCO","INTC","VZ","IBM","QCOM","LYFT"]
ohlc_intraday = {} 
key = open("./alpha_vantage_api-key.txt").readlines()[-1][:-1] 
# print(key)

In [5]:
# GET DATA alphavantage intraday.

ts = TimeSeries(key = key, output_format = "pandas")
attempt = 0
drop = []

while len(tickers)!=0 and attempt <=3:
    ticker = [j for j in tickers if j not in drop]
    for i in range(len(tickers)):
    # try:
        ohlc_intraday[tickers[i]] = ts.get_intraday(symbol = tickers[i],
                                                   interval = "5min", 
                                                   outputsize = "full")[0]
        ohlc_intraday[tickers[i]].columns = ["Open","High","Low","Adj Close","Volume"]
        drop.append(tickers[i])
        # except:
        #     print(f"{ticker[i]} : failed to fetch data. Retrying.. ")
        #     continue
    attempt += 1

tickers = ohlc_intraday.keys()

ValueError: Thank you for using Alpha Vantage! Our standard API rate limit is 25 requests per day. Please subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly remove all daily rate limits.