## Analysis of portfolio vs SPY

In [2]:
import numpy as np
import os
import pandas as pd
from mpl_finance import candlestick_ohlc
import pandas_datareader.data as pdr
import datetime
import sys
import re
import sel_scrape as sela

if os.path.abspath('.')  not in sys.path:
    if os.path.abspath('.') not in sys.path:
        sys.path.append(os.path.abspath('.'))
if os.path.abspath('..')  not in sys.path:
    if os.path.abspath('..') not in sys.path:
        sys.path.append(os.path.abspath('..'))

from tqdm import tqdm
%matplotlib inline
import plot_utilities as pu
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)
import pathlib
SYSTEM_HOME = pathlib.Path.home()


Barchart OnDemand Client: https://marketdata.websol.barchart.com/
Barchart OnDemand Client: https://marketdata.websol.barchart.com/


In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import importlib
# importlib.reload(pu)

### Read portfolio

In [5]:
PORTFOLIO_PATH = f'{SYSTEM_HOME}/test_portfolio_jrh.csv'
df_port = pd.read_csv(PORTFOLIO_PATH)

### Get Market Data for the portfolio


In [6]:
YEARS = 5
df_all = None
for i in range(len(df_port)):
    symbol = df_port.iloc[i].symbol
    shares = df_port.iloc[i].position
    df_temp = pu.get_yahoo(symbol,days_to_fetch=365*YEARS)
    df_temp = df_temp[['trade_date','close']].rename(columns={'close':symbol})
    df_temp[symbol] = df_temp[symbol]*shares
    if df_all is None:
        df_all = df_temp.copy()
    else:
        df_all = df_all.merge(df_temp,on='trade_date',how='inner')
        


get_history: RRGIX FETCHING DATA 2019-10-11 08:34:26.998786
get_history: DHLRX FETCHING DATA 2019-10-11 08:34:27.842167
get_history: BAFSX FETCHING DATA 2019-10-11 08:34:28.562088
get_history: HASCX FETCHING DATA 2019-10-11 08:34:29.168947
get_history: HACAX FETCHING DATA 2019-10-11 08:34:29.809083
get_history: JVMIX FETCHING DATA 2019-10-11 08:34:30.401163
get_history: MMIIX FETCHING DATA 2019-10-11 08:34:31.022553
get_history: MEDIX FETCHING DATA 2019-10-11 08:34:31.622862
get_history: NUVBX FETCHING DATA 2019-10-11 08:34:32.247746
get_history: EMGNX FETCHING DATA 2019-10-11 08:34:33.172793
get_history: FARCX FETCHING DATA 2019-10-11 08:34:33.818102
get_history: OAYIX FETCHING DATA 2019-10-11 08:34:34.710902
get_history: ODVYX FETCHING DATA 2019-10-11 08:34:35.344837
get_history: OIGYX FETCHING DATA 2019-10-11 08:34:36.253745
get_history: PCRIX FETCHING DATA 2019-10-11 08:34:36.777416
get_history: PRFDX FETCHING DATA 2019-10-11 08:34:37.460788
get_history: PRINX FETCHING DATA 2019-10

### Do prelim analysis, calculating sharpe ratio of who time period

In [7]:
df_all2 = df_all.copy()
cols = [c for c in df_all2.columns.values if 'trade_' not in c]
df_all2['tot'] = df_all2.apply(lambda r:sum([r[c] for c in cols]),axis=1)
df_all2['port_pctchg'] = df_all2.tot.pct_change()
m = df_all2.port_pctchg.mean()
s = df_all2.port_pctchg.std()
r = m/s * 256**.5
{'daily_mean_return':m,'daily_std':s,'yearl_mean_return':m*256,'yearly_std':s*256**.5,'yearly_sharpe_ratio':r}

{'daily_mean_return': 0.0003555231583040982,
 'daily_std': 0.005967925737924191,
 'yearl_mean_return': 0.09101392852584914,
 'yearly_std': 0.09548681180678706,
 'yearly_sharpe_ratio': 0.9531570570186322}

### Rolling annualized return, std and  sharpe

In [8]:
ROLLING_PERIOD = 100 # business days
df_all3 = df_all2[['trade_date','tot','port_pctchg']].copy()
df_all3['rolling_annualized_std'] = df_all3.port_pctchg.rolling(ROLLING_PERIOD).std() * 256**.5
df_all3['rolling_annualized_mean'] = df_all3.port_pctchg.rolling(ROLLING_PERIOD).mean() * 256
df_all3['rolling_annualized_sharpe'] = df_all3.rolling_annualized_mean / df_all3.rolling_annualized_std
df_all4 = df_all3[['trade_date','tot','rolling_annualized_mean','rolling_annualized_std','rolling_annualized_sharpe']]
df_all4.tail(100)


Unnamed: 0,trade_date,tot,rolling_annualized_mean,rolling_annualized_std,rolling_annualized_sharpe
619,20190521,121830.610446,0.341814,0.098640,3.465256
620,20190522,121441.057776,0.323918,0.098824,3.277734
621,20190523,120143.891741,0.293707,0.100656,2.917939
622,20190524,120549.032694,0.287050,0.100419,2.858519
623,20190528,119994.464239,0.278826,0.100758,2.767301
624,20190529,119350.276871,0.300874,0.098340,3.059527
625,20190530,119542.990711,0.239295,0.090027,2.658023
626,20190531,118708.746098,0.202391,0.090289,2.241586
627,20190603,118665.144882,0.181958,0.089630,2.030096
628,20190604,120307.254671,0.197223,0.091354,2.158888


### Plot time-slices of rolling shape ratios using bar graph


In [9]:
VIEW_SLICE = 100
# for i in range(int(len(df_all4)/VIEW_SLICE)):
#     b = i*VIEW_SLICE
#     e = min((i+1)*VIEW_SLICE,len(df_all4)-1)
#     df_view = df_all4.iloc[b:e][['trade_date','rolling_annualized_sharpe']]
#     iplot(pu.plotly_pandas(df_view,x_column='trade_date',bar_plot=True))

### Plot portfolio value and rolling sharpe ratio over whole period

In [10]:
df_to_plot = df_all4[['trade_date','tot','rolling_annualized_sharpe']].copy()
t = 'portfolio value vs sharpe ratio'
p = pu.plotly_pandas(df_to_plot,x_column='trade_date',use_secondary_yaxis=True,plot_title=t)
iplot(p)

###  Do analysis of SPY

In [11]:
df_spy = pu.get_yahoo('SPY',days_to_fetch=365*YEARS)
df_spy2 = df_spy[['trade_date','close']].rename(columns={'close':'spy'})

get_history: SPY FETCHING DATA 2019-10-11 08:34:44.475016


In [12]:
df_compare = df_all4[['trade_date','tot']].merge(df_spy2[['trade_date','spy']],on='trade_date',how='inner')

In [13]:
df_compare['port_pct'] = df_compare.tot / df_compare.iloc[0].tot
df_compare['spy_pct'] = df_compare.spy / df_compare.iloc[0].spy
df_compare['port_pctchg'] = df_compare.tot.pct_change()
df_compare['spy_pctchg'] = df_compare.spy.pct_change()
port_mean = df_compare.port_pctchg.mean() * 256
port_std = df_compare.port_pctchg.std() * 256**.5
spy_mean = df_compare.spy_pctchg.mean() * 256
spy_std = df_compare.spy_pctchg.std() * 256**.5
{'port_mean':port_mean,'port_std':port_std,'spy_mean':spy_mean,'spy_std':spy_std,
 'port_sharpe':port_mean/port_std,'spy_sharpe':spy_mean/spy_std}

{'port_mean': 0.09101392852584914,
 'port_std': 0.09548681180678706,
 'spy_mean': 0.13116850623625395,
 'spy_std': 0.13170945177253168,
 'port_sharpe': 0.9531570570186322,
 'spy_sharpe': 0.995892887495941}

### Plot portfolio value vs SPY value over whole time period

In [14]:
df_to_plot = df_compare[['trade_date','port_pct','spy_pct']].copy()
t = 'growth of portfolio vs growth of SPY'
iplot(pu.plotly_pandas(df_to_plot,x_column='trade_date',use_secondary_yaxis=True,plot_title=t))

### Do Dividend analysis of portfolio

#### Get dividends from yahoo actions

In [15]:
dict_div = {}
df_div = None
for i in range(len(df_port)):
    symbol = df_port.iloc[i].symbol
    shares = df_port.iloc[i].position
    df_div2 = pu.pdr.DataReader(symbol, 'yahoo-actions').sort_index()
    df_div2['trade_date'] = df_div2.index.year*100*100 + df_div2.index.month*100 + df_div2.index.day
    df_div2.index = range(len(df_div2))
    df_div3 = df_div2.merge(df_all[['trade_date',symbol]],on='trade_date',how='inner')
    df_div3['close'] = (df_div3[symbol] / shares)
    df_div3['dividend'] = df_div3.value
    df_div3['shares'] = shares
    dict_div[symbol] = df_div3.copy()


In [16]:
dict_div2 = {}
for symbol in dict_div.keys():
    dict_div2[symbol] = dict_div[symbol][['trade_date','close','dividend','shares']]

#### Calculate avergage annual dividend rates for each symbol, and then for whole portfolio

In [17]:
syms = []
rates = []
for symbol in dict_div2.keys():
    dft = dict_div2[symbol]
    dft = dft[dft.trade_date<20190000]
    dft2 = dft[['trade_date','dividend','close']].groupby('trade_date',as_index=False).agg({'dividend': 'sum', 'close': 'mean'})
    rate = dft2.apply(lambda r:r.dividend/r.close,axis=1).mean()
    syms.append(symbol)
    rates.append(rate)
avgs = [df_all[c].mean() for c in syms]
df_div_rate = pd.DataFrame({'symbol':syms,'div_rate':rates,'avg_close':avgs})
portfolio_dividend_rate = df_div_rate.apply(lambda r:r.div_rate * r.avg_close,axis=1).sum() / df_div_rate.avg_close.sum()
print(f'portfolio dividend rate = {portfolio_dividend_rate}')

portfolio dividend rate = 0.03127285085359904


#### Do Dividend analysis of SPY

In [18]:
df_div_spy = pu.pdr.DataReader('SPY', 'yahoo-actions').sort_index()
df_div_spy['trade_date'] = df_div_spy.index.year*100*100 + df_div_spy.index.month*100 + df_div_spy.index.day
df_div_spy.index = range(len(df_div_spy))
df_div_spy = df_div_spy[(df_div_spy.trade_date>20160000) & (df_div_spy.trade_date<20190000)]

In [19]:
df_div_spy['year'] = df_div_spy.trade_date.astype(str).str.slice(0,4).astype(int)
df_div_spy_agg = df_div_spy[['year','value']].groupby('year',as_index=False).agg({'value':sum})
df_spy2 = df_spy[df_spy.trade_date>20160000].copy()
df_spy2['year'] = df_spy2.trade_date.astype(str).str.slice(0,4).astype(int)
df_spy_close_avg = df_spy2[['year','close']].groupby('year',as_index=False).mean()
df_spy_div = df_div_spy_agg.merge(df_spy_close_avg,on='year',how='inner')
df_spy_div['div_rate'] = df_spy_div.value/df_spy_div.close
df_spy_div

Unnamed: 0,year,value,close,div_rate
0,2016,4.539,196.401009,0.023111
1,2017,4.802,234.186343,0.020505
2,2018,5.101,267.391619,0.019077


### Get Expense Ratios for each portfolio member using web scrapes of Yahoo Finance

In [20]:
sel = sela.SelScrape()

In [21]:
syms = []
expense_ratios = []
for symbol in dict_div2.keys():
    try:
        print(f'getting symbol {symbol}')
        sel.goto(f'https://finance.yahoo.com/quote/{symbol}/profile?p={symbol}')
        xp = "//span[contains(text(),'Annual Report Expense Ratio')]/parent::*/parent::span/following-sibling::span"
        p = sel.findxpath(xpath=xp)['value'] 
        v = float(re.findall('[.0-9]+',p[0].text)[0])
        syms.append(symbol)
        expense_ratios.append(v)
    except Exception as e:
        print(symbol,e)
df_expense_ratios = pd.DataFrame({'symbol':syms,'expense_ratio':expense_ratios})
df_expense_ratios

getting symbol RRGIX
getting symbol DHLRX
getting symbol BAFSX
getting symbol HASCX
getting symbol HACAX
getting symbol JVMIX
getting symbol MMIIX
getting symbol MEDIX
getting symbol NUVBX
getting symbol EMGNX
getting symbol FARCX
getting symbol OAYIX
getting symbol ODVYX
getting symbol OIGYX
getting symbol PCRIX
getting symbol PRFDX
getting symbol PRINX
getting symbol PRSMX
getting symbol TRBCX
getting symbol RPMGX
getting symbol LTMIX
getting symbol FOSBX


Unnamed: 0,symbol,expense_ratio
0,RRGIX,0.95
1,DHLRX,0.67
2,BAFSX,0.99
3,HASCX,0.88
4,HACAX,0.66
5,JVMIX,0.86
6,MMIIX,0.67
7,MEDIX,0.82
8,NUVBX,0.47
9,EMGNX,1.2


In [22]:
aer = df_expense_ratios.expense_ratio.mean()
print(f'average expense ratio = {aer}')

average expense ratio = 0.8113636363636363


## END