In [1]:
import ffn
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt

In [2]:
#getting a list of S&P500 stocks from wikidpedia
table=pd.read_html('https://en.wikipedia.org/wiki/S%26P_100')
df=table[2]
df.to_csv("S&P100-Info.csv")

In [3]:
df.head()

Unnamed: 0,Symbol,Name,Sector
0,AAPL,Apple Inc.,Information Technology
1,ABBV,AbbVie Inc.,Health Care
2,ABT,Abbott Laboratories,Health Care
3,ACN,Accenture,Information Technology
4,ADBE,Adobe Inc.,Information Technology


In [4]:
stocks = df['Symbol'].apply(lambda x: str(x))
stocks

0      AAPL
1      ABBV
2       ABT
3       ACN
4      ADBE
       ... 
96       VZ
97      WBA
98      WFC
99      WMT
100     XOM
Name: Symbol, Length: 101, dtype: object

In [5]:
sp100 = []
for item in stocks:
    sp100.append(item)

In [6]:
sp100.remove('BRK.B')

In [9]:
for i in sp100:
    try:
        data = ffn.get(i, start = "2020-01-01")
    except:
        print('There is a problem with {}'.format(i))

In [7]:
data = ffn.get(sp100, start = '2020-01-01')
#the pull from these dates only went back to 2019

In [8]:
data.head()

Unnamed: 0_level_0,aapl,abbv,abt,acn,adbe,aig,amgn,amt,amzn,avgo,...,unh,unp,ups,usb,v,vz,wba,wfc,wmt,xom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,74.333511,83.871666,85.256981,206.125336,334.429993,49.306526,231.917572,224.252289,1898.01001,305.916046,...,286.745422,177.634338,112.318962,56.20261,189.656342,57.256145,56.046688,51.509617,116.500679,64.56012
2020-01-03,73.61084,83.075554,84.217628,205.782028,331.809998,48.925495,230.343124,224.360229,1874.969971,298.135071,...,283.843658,176.377136,112.251648,55.547543,188.14798,56.646542,56.046688,51.193367,115.472214,64.041092
2020-01-06,74.197395,83.731171,84.658859,204.438293,333.709991,48.963596,232.110748,224.301346,1902.880005,297.689056,...,285.814087,174.866547,111.751549,54.788048,187.741135,56.52462,56.530506,50.886707,115.237137,64.532799
2020-01-07,73.848442,83.25351,84.18821,200.024429,333.390015,48.687344,229.927765,219.521866,1906.859985,296.664276,...,284.088776,173.541122,111.559204,54.2659,187.244965,55.896252,56.245903,50.465046,114.169487,64.004662
2020-01-08,75.036385,83.843567,84.531395,200.416824,337.869995,49.2589,230.101654,221.425797,1891.969971,292.963501,...,290.078552,175.441559,112.193939,54.151974,190.450211,55.999416,52.963551,50.618382,113.777695,63.039452


In [9]:
data.to_csv("Stock Data.csv")

In [12]:
#calculate the daily and annual returns
returns_daily = data.to_log_returns().dropna()
returns_annual = returns_daily.mean()*250
#return the length of the daily and annual returns dataframes
len(returns_daily), len(returns_annual)

(315, 100)

In [16]:
sorted = returns_annual.sort_values(ascending=False)
sorted

tsla    1.659382
nvda    0.665173
pypl    0.639596
fdx     0.499260
amzn    0.419428
          ...   
spg    -0.130906
cop    -0.131922
rtx    -0.140825
ba     -0.195366
wfc    -0.208682
Length: 100, dtype: float64

In [None]:
#get the daily and annual covariances
cov_daily = returns_daily.cov()
cov_annual = cov_daily*250

In [None]:
# empty lists to store returns, volatility and weights of imiginary portfolios
port_returns = []
port_volatility = []
sharpe_ratio = []
stock_weights = []

In [None]:
# set the number of combinations for imaginary portfolios
num_assets = len(sp100)
num_portfolios = 50000

In [None]:
#set random seed for reproduction's sake
np.random.seed(42)

In [None]:
# populate the empty lists with each portfolios returns,risk and weights
for single_portfolio in range(num_portfolios):
    weights = np.random.random(num_assets)
    weights /= np.sum(weights)
    returns = np.dot(weights, returns_annual)
    volatility = np.sqrt(np.dot(weights.T, np.dot(cov_annual, weights)))
    sharpe = returns / volatility
    sharpe_ratio.append(sharpe)
    port_returns.append(returns)
    port_volatility.append(volatility)
    stock_weights.append(weights)

In [None]:
# a dictionary for Returns and Risk values of each portfolio
portfolio = {'Returns': port_returns,
             'Volatility': port_volatility,
             'Sharpe Ratio': sharpe_ratio}

In [None]:
# extend original dictionary to accomodate each ticker and weight in the portfolio
for counter,symbol in enumerate(sp100):
    portfolio[symbol+' Weight'] = [Weight[counter] for Weight in stock_weights]

In [None]:
spdf = pd.DataFrame(portfolio)

In [None]:
# get better labels for desired arrangement of columns
column_order = ['Returns', 'Volatility', 'Sharpe Ratio']+ [stock+' Weight' for stock in sp100]

In [None]:
# reorder dataframe columns
spdf = spdf[column_order]

In [None]:
%matplotlib inline
plt.style.use('seaborn-dark')
spdf.plot.scatter(x='Volatility', y='Returns', c='Sharpe Ratio',
                cmap='RdYlGn', edgecolors='black', figsize=(10, 10), grid=True)
plt.xlabel('Volatility (Std. Deviation)')
plt.ylabel('Expected Returns')
plt.title('Efficient Frontier')
plt.show()

In [None]:
x1=spdf["Volatility"]
y1=spdf['Returns']
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(x1,y1,
           c = spdf['Sharpe Ratio'],
          cmap='RdYlGn',
          edgecolors = 'black',
          )
ax.grid(True)
ax.set_xlabel('Volatility (Std. Deviation)')
ax.set_ylabel('Expected Returns')
ax.set_title('Efficient Frontier')

In [None]:
# find min Volatility & max sharpe values in the dataframe (df)
min_volatility = spdf['Volatility'].min()
max_sharpe = spdf['Sharpe Ratio'].max()

# use the min, max values to locate and create the two special portfolios
sharpe_portfolio = spdf.loc[spdf['Sharpe Ratio'] == max_sharpe]
min_variance_port = spdf.loc[spdf['Volatility'] == min_volatility]

# plot frontier, max sharpe & min Volatility values with a scatterplot
plt.style.use('seaborn-dark')
spdf.plot.scatter(x='Volatility', y='Returns', c='Sharpe Ratio',
                cmap='RdYlGn', edgecolors='black', figsize=(10, 8), grid=True)
plt.scatter(x=sharpe_portfolio['Volatility'], y=sharpe_portfolio['Returns'], c='red', marker='D', s=200)
plt.scatter(x=min_variance_port['Volatility'], y=min_variance_port['Returns'], c='blue', marker='D', s=200 )
plt.xlabel('Volatility (Std. Deviation)')
plt.ylabel('Expected Returns')
plt.title('Efficient Frontier')
plt.show()

In [None]:
min_variance_port.T.as_format(".2%").to_csv("min_var_sp100_2019_to_recent.csv")

In [None]:
sharpe_portfolio.T.as_format(".2%").to_csv("max_sharp_sp100_2019_to_recent.csv")