In [23]:
#import libs

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Turn off the warning
pd.options.mode.chained_assignment = None  # default='warn'

In [24]:
#Function to import OLHCV data using yfinance, drop unwanted columns
def import_stonk(ticker_name,start,end):  
    ticker = yf.Ticker(ticker_name)
    df = ticker.history(start=start, end=end,autoadjust=True,interval="1d")
    df.drop(columns=['Dividends','Stock Splits','High','Low','Volume'],inplace=True)
    return df, ticker_name

In [25]:
#Converts the dataframe to a np array, to speed things up!
def convert_to_array(df,ticker_name):
    df.reset_index(drop=True, inplace=True)
    columns = list(df.columns)
    array = df.to_numpy()
    return array, ticker_name

In [26]:
# funtion to calculate an EMA of given window and adds to array in nth +1 position
def numpy_ewma_vectorized_v2(array, window):
    alpha = 2 /(window + 1.0)
    alpha_rev = 1-alpha
    n = array.shape[0]
    pows = alpha_rev**(np.arange(n+1))
    scale_arr = 1/pows[:-1]
    offset = array[0]*pows[1:]
    pw0 = alpha*alpha_rev**(n-1)
    mult = array*pw0*scale_arr
    cumsums = mult.cumsum()
    out = offset + cumsums*scale_arr[::-1]
    return out

In [27]:
#Function to adds the MACD and MACD signal in the nth + 1 and nth + 2 columns
def add_MACD(ticker_name,array,close_price_column,short_period,long_period,signal_period):
    ewma_short = numpy_ewma_vectorized_v2(array[:,close_price_column], short_period)
    ewma_long = numpy_ewma_vectorized_v2(array[:,close_price_column], long_period)
    MACD = ewma_long - ewma_short
    MACD_signal = numpy_ewma_vectorized_v2(MACD, 6)
    array = np.column_stack((array,MACD,MACD_signal))
    return array, ticker_name

In [28]:
#Function to convert the MACD and MACD signals into a single trading signal
def add_trading_signal(array,MACD_column,MACD_signal_column,ticker_name,weighting_list):
    
    signal_less_MACD = array[:,MACD_signal_column] - array[:,MACD_column]
    signal_less_MACD = signal_less_MACD/(np.amax(signal_less_MACD)-np.amin(signal_less_MACD))
    signal_less_MACD_diff = np.diff(signal_less_MACD,n=1)
    signal_less_MACD_diff = np.insert(signal_less_MACD_diff,0,0,axis=0)
    MACD_norm = array[:,MACD_column]/(np.amax(array[:,MACD_column])-np.amin(array[:,MACD_column]))
    trading_signal = (weighting_list[0]*MACD_norm - 
                      weighting_list[1]*signal_less_MACD + 
                      weighting_list[2]*signal_less_MACD_diff)
    array = np.column_stack((array,trading_signal))
    return array, ticker_name
    

In [46]:
#Function to calculate the returns in a trading period, returns the total profit/loss
def calculate_returns(array,ticker_name, buy_threshold, open_column,signal_column,hold_period):
    sig_up = array[:,signal_column]>buy_threshold
    sig_dwn = array[:,signal_column]<=buy_threshold
    
    price=np.insert(array[:,open_column],-1,0,axis=0)
    price=np.delete(price,0,0)

    forward_returns = np.diff(price,n=hold_period)
    forward_returns = np.insert(forward_returns,-1,0,axis=0)
    
    pnlx = (forward_returns*sig_up-forward_returns*sig_dwn).sum()

    return pnlx,price,ticker_name

In [47]:
#Test the functions with a single example
ticker_name = "GOLD"
start = "2017-01-01"
end = "2019-12-31"
close_price_column = 1
short_period=3
long_period = 24
signal_period = 6
split = 0.8
MACD_column = 2
MACD_signal_column = 3
weighting_list = [0.25,0.5,0.25]
buy_threshold = -0.1
open_column = 0
signal_column = 4
hold_period = 1

df, ticker_name = import_stonk(ticker_name,start=start,end=end)
array, ticker_name = convert_to_array(df,ticker_name)
array, ticker_name = add_MACD(ticker_name,array,close_price_column,short_period,long_period,signal_period)
array,ticker_name = add_trading_signal(array,MACD_column,MACD_signal_column,ticker_name,weighting_list)
pnlx,price, ticker_name = calculate_returns(array,ticker_name, buy_threshold, open_column,signal_column,hold_period)


print(pnlx)

-2.510198562631105


In [65]:
#Create function to optimise
def function_to_optimize(x, array):
    short_period = int(x[0])
    long_period =  int(x[1])
    signal_period = int(x[2])
    
    buynhold = array[-1,0] - array[0,0]
    
    MACD_weighting = 0.25
    signal_less_MACD_weighting = 0.5
    signal_less_MACD_diff_weighting = 0.25
    
    MACD_column = 2
    MACD_signal_column = 3
    buy_threshold = 0
    open_column = 0
    signal_column = 4
    hold_period = 1
    ticker_name = 'GOLD'

    array, ticker_name = add_MACD(ticker_name,array,close_price_column,short_period,long_period,signal_period)
    array,ticker_name = add_trading_signal(array,MACD_column,MACD_signal_column,ticker_name,weighting_list)
    pnlx,price, ticker_name = calculate_returns(array,ticker_name, buy_threshold, open_column,signal_column,hold_period)
    
    return  pnlx.sum()/buynhold

#Function to run optimisation
def optimize_periods(ticker_name, start, end,short_range, long_range, signal_range):
    
    df, ticker_name = import_stonk(ticker_name,start,end)
    array, ticker_name = convert_to_array(df,ticker_name)
    
    #generate MACD's to try
    test_periods = []
    for i in range(short_range[0],short_range[1]+1):
        for j in range(long_range[0],long_range[1]+1):
            for k in range(signal_range[0],signal_range[1]+1):
                if i >= j:
                    continue
        
                test_periods.append([i,j,k])
        
    return_ratios = []
    
    for i in range(len(test_periods)):
        return_ratio = function_to_optimize(test_periods[i], array)
        #print(test_periods[i], return_ratio)
        return_ratios.append(return_ratio)
    
    best_period = test_periods[return_ratios.index(max(return_ratios))]
    best_ratio = max(return_ratios)
    return best_period, best_ratio


In [60]:
#test optimizer with single stock
ticker_name = 'GOLD'
start = "2015-01-01"
end = "2015-12-31"
short_range = [3,12]
long_range = [5,30]
signal_range = [3,10]

optimize_periods(ticker_name, start, end,short_range, long_range,signal_range)


([6, 7, 3], 0.06496060540651753)

In [62]:
#Import more libraries
%pylab inline --no-import-all
import bs4 as bs
import pickle
import requests
#import datetime as dt
#import os
#import pandas_datareader.data as web
#import pytz
#from pytz import timezone
#import warnings

#warnings.filterwarni#ngs('ignore')
#yf.pdr_override()

Populating the interactive namespace from numpy and matplotlib


In [63]:
#Function to grab all the SnP 500 tickers from wikipedia
def save_sp500_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text,"lxml")
    table = soup.find('table',{'class':'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)
    tickers = list(map(lambda s: s.strip(), tickers))
    with open("sp500tickers.pickle","wb") as f:
        pickle.dump(tickers,f)
    
    return tickers

In [64]:
#Let's see what's in there...
tickers = save_sp500_tickers()
print(tickers)

['MMM', 'ABT', 'ABBV', 'ABMD', 'ACN', 'ATVI', 'ADBE', 'AMD', 'AAP', 'AES', 'AFL', 'A', 'APD', 'AKAM', 'ALK', 'ALB', 'ARE', 'ALXN', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'ABC', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'ANTM', 'AON', 'AOS', 'APA', 'AAPL', 'AMAT', 'APTV', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'BKR', 'BLL', 'BAC', 'BK', 'BAX', 'BDX', 'BRK.B', 'BBY', 'BIO', 'BIIB', 'BLK', 'BA', 'BKNG', 'BWA', 'BXP', 'BSX', 'BMY', 'AVGO', 'BR', 'BF.B', 'CHRW', 'COG', 'CDNS', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'CNC', 'CNP', 'CERN', 'CF', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CTXS', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CMA', 'CAG', 'CXO', 'COP', 'ED', 'STZ', 'COO', 'CPRT', 'GLW', 'CTVA', 'COST', 'CCI', 'CSX', 'CMI', 'CVS', 'DHI', 'DHR', 'DRI', 'DVA', 'DE', 'DAL'

In [67]:
#Loop the optimiser through the tickers and saving the result as a pickle file:
start = "2017-01-01"
end = "2019-12-31"
short_range = [3,15]
long_range = [5,30]
signal_range = [3,10]

optimal_MACD_dict = {}

for ticker in tickers:
    try:
        periods, return_ratio = optimize_periods(ticker, start,end,short_range, long_range,signal_range)
        optimal_MACD_dict[ticker]=[periods,return_ratio]
        print("{} complete".format(ticker))
    except KeyError: continue

pickle.dump( optimal_MACD_dict, open( "save.p", "wb" ) )
pd.DataFrame.from_dict(optimal_MACD_dict).head(10)

MMM complete
ABT complete
ABBV complete
ABMD complete
ACN complete
ATVI complete
ADBE complete
AMD complete
AAP complete
AES complete
AFL complete
A complete
APD complete
AKAM complete
ALK complete
ALB complete
ARE complete
ALXN complete
ALGN complete
ALLE complete
LNT complete
ALL complete
GOOGL complete
GOOG complete
MO complete
AMZN complete
AMCR complete
AEE complete
AAL complete
AEP complete
AXP complete
AIG complete
AMT complete
AWK complete
AMP complete
ABC complete
AME complete
AMGN complete
APH complete
ADI complete
ANSS complete
ANTM complete
AON complete
AOS complete
APA complete
AAPL complete
AMAT complete
APTV complete
ADM complete
ANET complete
AJG complete
AIZ complete
T complete
ATO complete
ADSK complete
ADP complete
AZO complete
AVB complete
AVY complete
BKR complete
BLL complete
BAC complete
BK complete
BAX complete
BDX complete
- BRK.B: No data found, symbol may be delisted
BBY complete
BIO complete
BIIB complete
BLK complete
BA complete
BKNG complete
BWA complete
B

Unnamed: 0,MMM,ABT,ABBV,ABMD,ACN,ATVI,ADBE,AMD,AAP,AES,...,WYNN,XEL,XRX,XLNX,XYL,YUM,ZBRA,ZBH,ZION,ZTS
0,"[3, 7, 3]","[3, 11, 3]","[4, 30, 3]","[4, 8, 3]","[3, 5, 3]","[4, 22, 3]","[4, 9, 3]","[3, 5, 3]","[10, 11, 3]","[15, 18, 3]",...,"[3, 12, 3]","[4, 5, 3]","[3, 5, 3]","[3, 5, 3]","[4, 6, 3]","[4, 12, 3]","[5, 28, 3]","[3, 5, 3]","[3, 14, 3]","[4, 7, 3]"
1,25.6987,3.92019,6.00162,11.6708,4.36939,5.72772,3.7344,2.94038,9.28387,0.789541,...,5.59976,0.0294714,4.77839,6.5649,6.65991,0.883095,0.511215,5.78094,11.9104,3.50079
