## get_stock_correlations - 
1. Use the barchart api to retrieve stock historical data;
2. Create a correlation matrix from the historical data; 
3. Get standard deviations from the historical data;
4. Create long/short position information for each security retrieved
5. Create a portolio VaR using all of the above

## Imports

In [1]:
import pandas as pd
import numpy as np
import os
import sys
import datetime
from os.path import expanduser
from numpy import linalg as LA

### Add to sys.path
Within the barchart_api module, references like: ```from barchartapi import barchart_api``` will not work unless you append to sys.path the following folders:
1. project working directory (which holds barchart_api.py),
2. project directory,
3. workspace directory.

In [2]:
WORKSPACE_DIR = '../..' 
PROJECT_DIR = f'{WORKSPACE_DIR}/barchartapi' 
WORKING_DIR = f'{PROJECT_DIR}/barchartapi' 
if WORKING_DIR not in sys.path:
    sys.path.append(WORKING_DIR)
if PROJECT_DIR not in sys.path:
    sys.path.append(PROJECT_DIR)
if WORKSPACE_DIR not in sys.path:
    sys.path.append(WORKSPACE_DIR)
import barchart_api as bcapi

___
### Create paths to where you will store the history data

In [3]:
# DROPBOX_MARKET_DATA_DIR = expanduser('~/Dropbox/market_data')
# STOCKS_DIR = f'{DROPBOX_MARKET_DATA_DIR}/stocks'
# !ls {STOCKS_DIR}
STOCKS_DIR = f'./temp_folder/stocks'
try:
    os.makedirs(STOCKS_DIR)
except:
    pass

___
### Create an instance of BcHist
1. must set bar_type to minutes, hour or daily
2. must set interval
 * for minutes, set interval to 1, 5, 15, 30 or 60
 * for other types, see https://www.barchart.com/ondemand/api/getHistory

In [4]:
# set this to 'free' or 'paid'
endpoint = 'free' # free or paid

# set the bar_type and the interval
bar_type='minutes' # minutes, daily, monthly
interval=30 # 1,5,15,30,60

# create an instance 
api_key = open(f'./temp_folder/{endpoint}_api_key.txt','r').read()
endpoint_type=f'{endpoint}_url'
bch = bcapi.BcHist(api_key, bar_type=bar_type, interval=interval,endpoint_type = endpoint_type)


Barchart OnDemand Client: https://marketdata.websol.barchart.com/


___
### Main Loop to fetch history data from Barchart

#### Inputs
On each call to BcHist.get_history, provide:
 * a beginning yyyymmdd integer
 * an ending yyyymmdd integer
 * a short name like:
  * a stock symbol like: SPY,USO,IBM, etc
  * a futures symbol like: CLJ18, GCG19, ESH17, etc

#### Outputs
The get_history method will return a tuple, where:
 * tup[0] provides a status

* tup[1] provides a pandas DataFrame of data, or None

In [142]:
#  set a date range
beg_yyyymmdd = 20181201
end_yyyymmdd = 20190119

# true will cause barchartapi to refetch csv data, 
#   false will cause it to bypass any short_name that already has a csv file in the path sn_path below
delete_old_csv_file = True 

# set up stocks to get
#   below are some example lists of stocks
spydr_short_names = ['XLE','XLU','XLK','XLB','XLP','XLY','XLI','XLC','XLV','XLF']
commodity_etf_short_names = ['USO','UNG','DBC','DBA','GLD','USCI']
currency_etf_short_names = ['FXY','FXE','FXB','FXF','FXC','FXA']
my_portfolio_short_names = ['XLE','SPY','USO','GLD','XLU']

# set short_names to the desired list to get
short_names = my_portfolio_short_names

for short_name in short_names:
    sn_path = f'{STOCKS_DIR}/{short_name}.csv'
    if os.path.isfile(sn_path):
        if delete_old_csv_file:
            os.remove(sn_path)
        else:
            print(f'BYPASSING: {short_name}')
            continue
    print(f'get_history: {short_name} BEGIN {datetime.datetime.now()}')
    tup = bch.get_history(short_name, beg_yyyymmdd, end_yyyymmdd)
    print(f'get_history: {short_name} WRITING DATA {datetime.datetime.now()}')
    tup[1].to_csv(sn_path,index=False)
    print(f'get_history: {short_name} END {datetime.datetime.now()}')
    

get_history: XLE BEGIN 2019-01-20 17:57:39.703238
get_history: XLE WRITING DATA 2019-01-20 17:57:40.625474
get_history: XLE END 2019-01-20 17:57:40.630977
get_history: SPY BEGIN 2019-01-20 17:57:40.631149
get_history: SPY WRITING DATA 2019-01-20 17:57:41.236581
get_history: SPY END 2019-01-20 17:57:41.240028
get_history: USO BEGIN 2019-01-20 17:57:41.240267
get_history: USO WRITING DATA 2019-01-20 17:57:41.843409
get_history: USO END 2019-01-20 17:57:41.847209
get_history: GLD BEGIN 2019-01-20 17:57:41.847481
get_history: GLD WRITING DATA 2019-01-20 17:57:42.458120
get_history: GLD END 2019-01-20 17:57:42.461847
get_history: XLU BEGIN 2019-01-20 17:57:42.461936
get_history: XLU WRITING DATA 2019-01-20 17:57:42.986371
get_history: XLU END 2019-01-20 17:57:42.990372


___
### Do something interesting with the returned data, like create a portfolio VaR

In [143]:
df_portfolio = None
for short_name in short_names:
    sn_path = f'{STOCKS_DIR}/{short_name}.csv'
    df_temp = pd.read_csv(sn_path)
    df_temp = df_temp[['timestamp','close']]
    df_temp = df_temp.rename(columns={'close':f'{short_name}'})
    if df_portfolio is None:
        df_portfolio = df_temp.copy()
    else:
        df_portfolio = df_portfolio.merge(df_temp,how='inner',on='timestamp')
df_corr = df_portfolio.corr()
df_corr

Unnamed: 0,XLE,SPY,USO,GLD,XLU
XLE,1.0,0.974765,0.968302,0.705846,0.147043
SPY,0.974765,1.0,0.933751,0.658581,0.22055
USO,0.968302,0.933751,1.0,0.661278,0.124426
GLD,0.705846,0.658581,0.661278,1.0,-0.398839
XLU,0.147043,0.22055,0.124426,-0.398839,1.0


In [144]:
fn = f'{STOCKS_DIR}/{"_".join(df_corr.columns.values)}.csv'
df_corr.to_csv(fn,index=False)

In [145]:
df_portfolio.head()

Unnamed: 0,timestamp,XLE,SPY,USO,GLD,XLU
0,2018-12-20T09:30:00-05:00,57.99749,248.9562,9.87,119.06,53.71378
1,2018-12-20T10:00:00-05:00,58.21912,248.8468,9.835,119.19,53.97159
2,2018-12-20T10:30:00-05:00,58.10647,249.1351,9.81,118.77,54.36822
3,2018-12-20T11:00:00-05:00,57.6012,247.5345,9.68,118.98,54.64586
4,2018-12-20T11:30:00-05:00,57.11574,246.2222,9.74,119.14,54.45746


In [146]:
cols = list(set(list(df_portfolio.columns.values))-set(['timestamp']))
bars_per_day = 8*2
perc_of_day = 1/bars_per_day
perc_of_year = perc_of_day/256
std_series = df_portfolio[cols].pct_change().iloc[1:].std()/perc_of_year**.5
df_std = pd.DataFrame({'stdev':list(std_series.values),'symbol':list(std_series.index.values)})
df_std

Unnamed: 0,stdev,symbol
0,0.477034,USO
1,0.104514,GLD
2,0.264489,SPY
3,0.241231,XLU
4,0.332847,XLE


#### Enter Positions

In [147]:
positions_tuple = [
    ('USO',2000),
    ('GLD',-300),
    ('SPY',0),
    ('XLU',-300),
    ('XLE',300)
]

symbols = [t[0] for t in positions_tuple]
positions = [t[1] for t in positions_tuple]

df_positions = pd.DataFrame({'symbol':symbols,'position':positions})[['symbol','position']]
df_positions

Unnamed: 0,symbol,position
0,USO,2000
1,GLD,-300
2,SPY,0
3,XLU,-300
4,XLE,300


#### Get current prices

In [148]:
cols = list(filter(lambda c:'time' not in c,df_portfolio.columns.values))
vals = df_portfolio.iloc[-1:][cols].as_matrix().reshape(-1)
df_prices = pd.DataFrame({'symbol':cols,'price':vals})[['symbol','price']]
df_prices

Unnamed: 0,symbol,price
0,XLE,63.84
1,SPY,266.42
2,USO,11.33
3,GLD,121.04
4,XLU,53.14


#### Merge position, prices, stds

In [149]:
df_positions_2 = df_positions.merge(df_prices,how='inner',on='symbol')
df_positions_3 = df_positions_2.merge(df_std,how='inner',on='symbol')
df_positions_3

Unnamed: 0,symbol,position,price,stdev
0,USO,2000,11.33,0.477034
1,GLD,-300,121.04,0.104514
2,SPY,0,266.42,0.264489
3,XLU,-300,53.14,0.241231
4,XLE,300,63.84,0.332847


#### Create position VaR's

In [150]:
df_positions_3['unit_var'] = df_positions_3.apply(lambda r: r.price * r.stdev * 2.333 * (1/256)**.5 / r.price,axis=1 )
df_positions_3['position_var'] = df_positions_3.apply(lambda r: r.unit_var * r.position * r.price ,axis=1 )
df_positions_3

Unnamed: 0,symbol,position,price,stdev,unit_var,position_var
0,USO,2000,11.33,0.477034,0.069557,1576.17273
1,GLD,-300,121.04,0.104514,0.015239,-553.375917
2,SPY,0,266.42,0.264489,0.038566,0.0
3,XLU,-300,53.14,0.241231,0.035175,-560.752737
4,XLE,300,63.84,0.332847,0.048533,929.509729


#### Create portfolio VaR

In [151]:
# create an spy standard deviation that is the historical average
var_days = 1
spy_usual_daily_std = .16 
port_variance = df_positions_3.position_var.astype(float).as_matrix().T @ df_corr.astype(float).as_matrix() @ df_positions_3.position_var.astype(float).as_matrix()
port_var = port_variance**.5 * var_days**.5
spy_unit_var = float(df_positions_3[df_positions_3.symbol=='SPY'].unit_var) * spy_usual_daily_std / spy_curr_stdev
spy_curr_stdev = float(df_positions_3[df_positions_3.symbol=='SPY'].stdev)
sp_dollar_equiv = port_var / spy_unit_var * var_days**.5



In [152]:
print(f'portolio VaR: {round(port_var,2)}')
print(f'Equivalent S&P position (in dollars): {round(sp_dollar_equiv,2)}')


portolio VaR: 1431.54
Equivalent S&P position (in dollars): 65633.4


## END