In [1]:
from datetime import datetime, timedelta
import multiprocessing as mp
import os
import pickle
import sys

import numpy as np
import pandas as pd
import yfinance as yf

In [2]:
DATA = './data'
TOMORROW = (datetime.now() + timedelta(1)).date()
YEARS_OF_DATA = 50
MIN_YEARS = 10
START = TOMORROW - timedelta(YEARS_OF_DATA * 365)

In [24]:
symbols = ['AAPL', 'JNJ', 'KO', 'TSLA', 'ADBL', 'ATX', 'BMC']

In [25]:
manual_symbols = ['JNJ', 'KO']

In [26]:
def get_min_start():
    # must have at least 1 year of data                                         
    min_start = TOMORROW - timedelta(MIN_YEARS * 365)
    if min_start.weekday() == 0:
        min_start -= timedelta(2)
    elif min_start.weekday() == 6:
        min_start -= timedelta(1)
    return min_start

In [27]:
min_start = get_min_start()
min_start

datetime.date(2014, 5, 2)

In [28]:
def process_batch(dfs, batch, manual_symbols, min_start):
    #tmp = f'{DATA}/tmp'
    #os.makedir(tmp)
    try:
        print(batch[0], end='\r')
        #pd.options.mode.chained_assignment = None                              
        data = download_data(batch)
        min_start = adjust_min_date(min_start, data.index)
        data = filter_by_min_date(data, manual_symbols, min_start)
        sharpes = get_sharpes(data)
        #pd.options.mode.chained_assignment = 'warn'                            
        #dfs.append(state_dars)                                                 
    except BaseException as e:
        print(f'Unexpected failure for batch {batch[0]}\n{e}')
    return dfs

In [29]:
def download_data(symbols):
    #std_out = sys.stdout
    #null = open(os.devnull, 'w')
    #sys.stdout = null
    try:
        data = (
            yf
            .download(symbols, start=START, end=TOMORROW)
            .rename(columns={'Adj Close': 'AdjClose'}))['AdjClose']
        data.index = pd.to_datetime(data.index)
        data = data.sort_index()
        data = data.loc[:, data.isnull().sum() != len(data)]
        #missing_last = [                                                       
        #    col for col in list(data) if data[col].isnull()[-1]]               
        #data.drop(columns=missing_last, inplace=True)                          
        data.fillna(method='ffill', inplace=True)
        #sys.stdout = std_out
        return data
    except BaseException as e:
        print(f'Failed to download data:\n{e}')

In [30]:
def adjust_min_date(min_date, dates):
    while pd.to_datetime(min_date) not in dates:
        min_date -= timedelta(1)
    return min_date

In [31]:
symbols

['AAPL', 'JNJ', 'KO', 'TSLA', 'ADBL', 'ATX', 'BMC']

In [32]:
data = download_data(symbols)
data.head()

[*********************100%***********************]  7 of 7 completed

2 Failed downloads:
- BMC: No data found for this date range, symbol may be delisted
- ATX: No data found for this date range, symbol may be delisted


Unnamed: 0_level_0,AAPL,ADBL,JNJ,KO,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1974-05-13,,,0.779484,0.257509,
1974-05-14,,,0.788222,0.25691,
1974-05-15,,,0.791717,0.257509,
1974-05-16,,,0.791717,0.252718,
1974-05-17,,,0.784727,0.244334,


In [12]:
min_start = adjust_min_date(min_start, data.index)
min_start

datetime.date(2014, 5, 2)

In [13]:
def filter_by_min_date(data, manual_symols, min_date):
    for col in data.columns:
        if col in manual_symbols:
            continue
        if (data[col][data[col].notnull()].index[0]
                > pd.to_datetime(min_date)):
            data.drop(columns=[col], inplace=True)
    return data

In [14]:
data = filter_by_min_date(data, manual_symbols, min_start)
data.head()

Unnamed: 0_level_0,AAPL,JNJ,KO,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1974-05-13,,0.779484,0.021601,
1974-05-14,,0.788222,0.02155,
1974-05-15,,0.791717,0.021601,
1974-05-16,,0.791717,0.021199,
1974-05-17,,0.784727,0.020495,


In [15]:
def get_sharpes(data):
    out = []
    for stock in list(data):
        returns = get_daily_returns(data[stock])
        sharpes = get_sharpe(returns)
        out.append(sharpes)
    return pd.Series(out, index=list(data))

In [16]:
def get_daily_returns(x):
    idx = x.index
    n = len(x)
    x = np.array(x)
    returns = x[1:n] / x[0:(n-1)] - 1
    returns = pd.Series(returns, index=idx[1:])
    return returns

In [17]:
def get_sharpe(returns, window=None):
    n = len(returns)
    if window is None:
        window = n
    if len(returns) > window:
        returns = returns[(n - window):n]
    returns = returns[~np.isnan(returns)]
    sharpe = (np.sqrt(252) * returns.mean()) / returns.std()
    return sharpe

In [18]:
sharpes = get_sharpes(data)

In [19]:
sharpes

AAPL    0.611620
JNJ     0.579999
KO      0.767146
TSLA    0.877062
dtype: float64

In [20]:
s2 = pd.Series([0.1213, 0.2996], index=['X', 'Q'])
s2

X    0.1213
Q    0.2996
dtype: float64

In [21]:
sl = [sharpes, s2]

In [22]:
sh = pd.concat(sl)
sh

AAPL    0.611620
JNJ     0.579999
KO      0.767146
TSLA    0.877062
X       0.121300
Q       0.299600
dtype: float64

In [23]:
pd.DataFrame(sh.sort_values(ascending=False), columns=['sharpe'])

Unnamed: 0,sharpe
TSLA,0.877062
KO,0.767146
AAPL,0.61162
JNJ,0.579999
Q,0.2996
X,0.1213
