In [2]:
import ffn
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
print(plt.style.available)

In [3]:
#getting a list of S&P100 stocks from wikidpedia
table=pd.read_html('https://en.wikipedia.org/wiki/S%26P_100')
df=table[2]
df.to_csv("S&P100-Info.csv")

In [None]:
#creating a list of stocks from the symbol column in df
stocks = df['Symbol']#.apply(lambda x: str(x))
stocks

In [5]:
#extract the tickers from stocks and append them to a list sp100
sp100 = []
for item in stocks:
    sp100.append(item)

In [6]:
#I know that Berkshire shares do not pull through yahoo finance so I am removing BRK.B
sp100.remove('BRK.B')

In [None]:
#Iterate through the sp100 list to see if there are any tickers that have "issues"
for i in sp100:
    try:
        data = ffn.get(i, start = "2024-6-01")
    except:
        print('There is a problem with {}'.format(i))

In [None]:
#using the ffn library to pull data
data = ffn.get(sp100, start = '2024-06-01')

In [None]:
#take a look at the data pull
data.tail()

In [None]:
#calculate the daily and annual returns of the initial data pull
returns_daily = data.to_log_returns().dropna()
returns_annual = returns_daily.mean()*250
#return the length of the daily and annual returns dataframes for verification
len(returns_daily), len(returns_annual)

In [11]:
#sort by annual returns in decreasing from highest annual return to lowest.
sort = pd.DataFrame(returns_annual.sort_values(ascending=False))

In [None]:
#create a list of top 20 tickers by annual return
top_20=sort[0:20].index
print(top_20)

In [None]:
#create a new dataframe that has only the top 20 stocks
top_20_df = data[[column for column in top_20]]
top_20_df.head()

In [14]:
top_20_df.to_csv("SP100 top 20 Returns Starting Jun 2024.csv")

In [15]:
#calculate the returns per the top_20 df
top_20_ret_daily = top_20_df.to_log_returns().dropna()
top_20_ret_ann = top_20_ret_daily.mean()*250

In [None]:
#view the annualized returns of the top 20 S&P100 stocks
top_20_ret_ann

In [None]:
top_20_ret_daily.plot_corr_heatmap(figsize=(12,12))

### You can see that there is a lot of correlation with this basket of stocks

In [17]:
#calculate the daily and annual covariances
top_20_cov_daily = top_20_ret_daily.cov()
top_20_cov_annual = top_20_cov_daily*250 #250 trading days in a year.

In [18]:
# create empty lists to store returns, volatility, and weights of possible portfolios
port_returns = []
port_volatility = []
sharpe_ratio = []
stock_weights = []

In [19]:
####################################################
# set the number of portfolio combinations with    #
# the number of assets and portfolios as variables #
####################################################
num_assets = len(top_20)
num_portfolios = 50000

In [20]:
#set random seed
np.random.seed(42)

In [21]:
# populate the empty lists with each portfolios returns,risk and weights
for single_portfolio in range(num_portfolios):
    weights = np.random.random(num_assets)
    weights /= np.sum(weights)
    returns = np.dot(weights, top_20_ret_ann)
    volatility = np.sqrt(np.dot(weights.T, np.dot(top_20_cov_annual, weights)))
    sharpe = returns / volatility
    sharpe_ratio.append(sharpe)
    port_returns.append(returns)
    port_volatility.append(volatility)
    stock_weights.append(weights)

In [22]:
# a dictionary for Returns and Risk values of each portfolio
portfolio = {'Returns': port_returns,
             'Volatility': port_volatility,
             'Sharpe Ratio': sharpe_ratio}

for counter,symbol in enumerate(top_20):
    portfolio[symbol+' Weight'] = [Weight[counter] for Weight in stock_weights]

In [23]:
#create the final dataframe with X number of portfolios randomized with different weights
sp_top_20 = pd.DataFrame(portfolio)

In [None]:
sp_top_20.tail()

In [25]:
# get better labels for desired arrangement of columns
column_order = ['Returns', 'Volatility', 'Sharpe Ratio']+ [stock+' Weight' for stock in top_20]

In [26]:
# reorder dataframe columns
final = sp_top_20[column_order]

In [None]:
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
final.plot.scatter(x='Volatility', y='Returns', c='Sharpe Ratio',
                cmap='RdYlGn', edgecolors='black', figsize=(10, 10), grid=True)
plt.xlabel('Volatility (Std. Deviation)')
plt.ylabel('Expected Returns')
plt.title('Efficient Frontier')
plt.show()

In [None]:
#plot using fig, ax
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(final["Volatility"],final['Returns'],
           c = final['Sharpe Ratio'],
          cmap='RdYlGn',
          edgecolors = 'black',
          )
ax.grid(True)
ax.set_xlabel('Volatility (Std. Deviation)')
ax.set_ylabel('Expected Returns')
ax.set_title('Efficient Frontier')

In [None]:
# find min Volatility & max sharpe values in the dataframe (df)
min_volatility = final['Volatility'].min()
max_sharpe = final['Sharpe Ratio'].max()

# use the min, max values to locate and create the two special portfolios
sharpe_portfolio = final.loc[final['Sharpe Ratio'] == max_sharpe]
min_variance_port = final.loc[final['Volatility'] == min_volatility]

# plot frontier, max sharpe & min Volatility values with a scatterplot
plt.style.available
plt.style.use('seaborn-v0_8-darkgrid')
final.plot.scatter(x='Volatility', y='Returns', c='Sharpe Ratio',
                cmap='RdYlGn', edgecolors='black', figsize=(10, 8), grid=True)
plt.scatter(x=sharpe_portfolio['Volatility'], y=sharpe_portfolio['Returns'], c='red', marker='D', s=100)
plt.scatter(x=min_variance_port['Volatility'], y=min_variance_port['Returns'], c='blue', marker='D', s=100 )
plt.xlabel('Volatility (Std. Deviation)')
plt.ylabel('Expected Returns')
plt.title('Efficient Frontier')
plt.show()

In [31]:
min_variance_port.T.as_format(".2%").to_csv("min_var_top_20_sp100_june01-2024_to_recent.csv")

In [32]:
sharpe_portfolio.T.as_format(".2%").to_csv("max_sharp_top_20_sp100_jun01-2024_to_recent.csv")