In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

## Diversification of the portfolio

## Tickers

In [4]:
with open('tickers.txt', 'r') as file:
    lines = file.readlines()
    
portfolio = {}

for line in lines:
    key, value = line.strip().split('=', 1)
    list_name = key.strip()
    items = value.strip().strip('[]').replace("'", "").split(', ')
    portfolio[list_name] = items

portfolio_columns = "stocks efts coins crypto futures".split(' ')

### Historical data

In [5]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt

`yfinance.download` has the column `Adj Close`, which is not present in any `Ticker` method, therefore it is extracted to begin with.

In [6]:
now = datetime.now()
start = now - timedelta(days=59)

stock = 'BYDDY'

df = yf.download(stock, end=now.strftime('%Y-%m-%d'), start=start.strftime('%Y-%m-%d'), interval='5m')['Adj Close']
display(df)

[*********************100%%**********************]  1 of 1 completed


Datetime
2024-01-22 09:30:00-05:00    49.400002
2024-01-22 09:35:00-05:00    49.480000
2024-01-22 09:40:00-05:00    49.349998
2024-01-22 09:45:00-05:00    49.490002
2024-01-22 09:50:00-05:00    49.580002
                               ...    
2024-03-18 15:35:00-04:00    54.939999
2024-03-18 15:40:00-04:00    54.900002
2024-03-18 15:45:00-04:00    54.880001
2024-03-18 15:50:00-04:00    54.880001
2024-03-18 15:55:00-04:00    54.889999
Name: Adj Close, Length: 3092, dtype: float64

`yfinance.Tickers.history` provides the rest of the market historical data. It is then innerly joined with the previous DataFrame on `Datetime` 

In [7]:
stock_ticker = yf.Ticker(stock)
stock_hist = stock_ticker.history(end=now.strftime('%Y-%m-%d'), start=start.strftime('%Y-%m-%d'), interval='5m')
stock_hist.reset_index(inplace=True)
stock_hist.set_index('Datetime', inplace=True)
stock_hist = pd.concat([stock_hist, df], axis=1, join='inner')
stock_hist.reset_index(inplace=True)
stock_hist.drop('Datetime', axis=1, inplace=True)
stock_hist.sample(3)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Adj Close
2984,53.450001,53.450001,53.389999,53.389999,4699,0.0,0.0,53.389999
2915,53.349998,53.360001,53.310001,53.349998,5141,0.0,0.0,53.349998
1336,46.779999,46.794998,46.650002,46.66,4168,0.0,0.0,46.66


In [8]:
import numpy as np 

def historical(ticker):
    now = datetime.now()
    start = now - timedelta(days=58)

    stock = ticker

    df = yf.download(stock, end=now.strftime('%Y-%m-%d'), start=start.strftime('%Y-%m-%d'), interval='5m', progress=False)['Adj Close']
    stock_ticker = yf.Ticker(stock)
    stock_hist = stock_ticker.history(end=now.strftime('%Y-%m-%d'), start=start.strftime('%Y-%m-%d'), interval='5m');
    stock_hist.reset_index(inplace=True)
    stock_hist.set_index('Datetime', inplace=True)
    stock_hist = pd.concat([stock_hist, df], axis=1)
    stock_hist['returns'] = stock_hist['Adj Close'].pct_change()
    stock_hist['s'] = stock_hist.index.map(pd.Timestamp.timestamp)
    stock_hist['day sin'] = np.sin(stock_hist['s']*(2*np.pi / 60/60/24))
    stock_hist.reset_index(inplace=True)
    stock_hist.drop(['s', 'Datetime'], axis=1, inplace=True)
    stock_hist.ffill(inplace=True)
    stock_hist.dropna(inplace=True)
    
    return stock_hist.to_dict()

Apply it to all the stocks

In [27]:
from tqdm import tqdm

def historical(ticker):
    now = datetime.now()
    start = now - timedelta(days=58)
    stock = ticker

    df = yf.download(stock, end=now.strftime('%Y-%m-%d'), start=start.strftime('%Y-%m-%d'), interval='5m', progress=False)['Adj Close']
    stock_ticker = yf.Ticker(stock)
    stock_hist = stock_ticker.history(end=now.strftime('%Y-%m-%d'), start=start.strftime('%Y-%m-%d'), interval='5m');
    stock_hist.reset_index(inplace=True)
    stock_hist.set_index('Datetime', inplace=True)
    stock_hist = pd.concat([stock_hist, df], axis=1)
    stock_hist['returns'] = stock_hist['Adj Close'].pct_change()
    stock_hist['s'] = stock_hist.index.map(pd.Timestamp.timestamp)
    stock_hist['day sin'] = np.sin(stock_hist['s']*(2*np.pi / 60/60/24))
    stock_hist.ffill(inplace=True)
    stock_hist.dropna(inplace=True)
    stock_hist.reset_index(inplace=True)
    stock_hist.drop(['s', 'Datetime', 'Dividends', 'Stock Splits'], axis=1, inplace=True)
    if 'Capital Gains' in stock_hist.columns:
        stock_hist.drop('Capital Gains', axis=1, inplace=True)
    
    return stock_hist.to_dict()


def get_data():
    with open('tickers.txt', 'r') as file:
        lines = file.readlines()
    
    portfolio = {}

    for line in lines:
        key, value = line.strip().split('=', 1)
        list_name = key.strip()
        items = value.strip().strip('[]').replace("'", "").split(', ')
        portfolio[list_name.replace(' ', '')] = items

    portfolio_hist = {}

    pbar = tqdm(total=sum([len(x) for x in portfolio.values()]))
    columns = iter(portfolio.keys())
    for eq in portfolio.values():
        aux = pd.DataFrame()
        for stck in eq:
            df = historical(stck)
            aux[stck] = df
            pbar.update(1)
        multi_indexed_df = pd.concat({(i, j): pd.Series(v) for i, d in aux.to_dict().items() for j, v in d.items()}, axis=0)
        hist = pd.DataFrame(multi_indexed_df.unstack().transpose())
        #hist = hist.swaplevel(0, 1, axis=1)
        hist = hist.sort_index(axis=1)
        portfolio_hist[next(columns)] = hist.dropna() 
    pbar.close()
    return portfolio, portfolio_hist 

df_dict, df = get_data()

100%|██████████| 31/31 [00:03<00:00,  8.17it/s]


In [31]:
df['stocks']['MRNA']

Unnamed: 0,Adj Close,Close,High,Low,Open,Volume,day sin,returns
0,101.720001,101.720001,101.980003,101.000000,101.889999,110615.0,-0.625923,-0.001616
1,102.750000,102.750000,102.950401,101.430000,101.739998,124620.0,-0.642788,0.010126
2,102.279999,102.279999,102.969902,101.985001,102.889999,93119.0,-0.659346,-0.004574
3,101.889999,101.889999,102.480003,101.550003,102.290001,76433.0,-0.675590,-0.003813
4,101.230003,101.230003,102.139900,101.221901,101.900002,36890.0,-0.691513,-0.006478
...,...,...,...,...,...,...,...,...
3086,104.875000,104.875000,104.891998,104.709999,104.885002,14724.0,-0.999762,-0.000429
3087,104.779999,104.779999,104.853996,104.699997,104.809998,10345.0,-1.000000,-0.000906
3088,104.860001,104.860001,104.889999,104.767403,104.779999,12916.0,-0.999762,0.000764
3089,104.559998,104.559998,104.894997,104.550003,104.839996,20782.0,-0.999048,-0.002861
