In [1]:
import numpy as np
import itertools
import pandas as pd
import matplotlib.pyplot as plt
#import scienceplots
plt.rcParams['figure.figsize'] = (10,10)
#plt.style.use(['science','grid','notebook'])
import plotly.express as px
import yfinance as yf
import pandas_datareader as pdr
import datetime as dt

from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
sp500_list = pd.read_html(url)
sp500_list = sp500_list[0]
sp500_list = sp500_list.drop(columns=['GICS Sub-Industry','Headquarters Location', 'Date added','CIK','Founded'], axis=1)
sp500_list.rename(columns={'GICS Sector':'Sector'},inplace=True)

Unnamed: 0,Symbol,Security,Sector
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ACN,Accenture,Information Technology
...,...,...,...
498,XYL,Xylem Inc.,Industrials
499,YUM,Yum! Brands,Consumer Discretionary
500,ZBRA,Zebra Technologies,Information Technology
501,ZBH,Zimmer Biomet,Health Care


In [6]:
sp500_list = sp500_list.sort_values(by='Symbol', ascending=True)

In [7]:
sp500_list

Unnamed: 0,Symbol,Security,Sector
9,A,Agilent Technologies,Health Care
39,AAPL,Apple Inc.,Information Technology
3,ABBV,AbbVie,Health Care
11,ABNB,Airbnb,Consumer Discretionary
2,ABT,Abbott Laboratories,Health Care
...,...,...,...
498,XYL,Xylem Inc.,Industrials
499,YUM,Yum! Brands,Consumer Discretionary
501,ZBH,Zimmer Biomet,Health Care
500,ZBRA,Zebra Technologies,Information Technology


In [8]:
end = dt.datetime.now()
start = end - dt.timedelta(days=252)

ticker_list = [i for i in sp500_list['Symbol']]

for i in range(len(ticker_list)):
    if ticker_list[i] == 'BRK.B':
        ticker_list[i] = 'BRK-B'
    elif ticker_list[i] == 'BF.B':
        ticker_list[i] = 'BF-B'

In [11]:
stocks = yf.download(ticker_list, start=start, end=end)['Close']

[*********************100%%**********************]  503 of 503 completed


In [13]:
stocks.shape

(175, 503)

In [14]:
log_return = np.log(stocks / stocks.shift(1)).replace(np.nan, 0)

annual_return = log_return.mean() * 252
annual_volatility = log_return.std() * np.sqrt(252)

stock_data = pd.DataFrame()
stock_data['Price'] = stocks.tail(1).T
stock_data['Symbol'] = list(sp500_list['Symbol'])
stock_data['Security'] = list(sp500_list['Security'])
stock_data['Sector'] = list(sp500_list['Sector'])
stock_data['Return'] = list(round(annual_return, 3))
stock_data['Volatility'] = list(round(annual_volatility, 3))
stock_data['SharpeRatio'] = list(round((annual_return / annual_volatility), 3))

In [15]:
stock_data

Unnamed: 0_level_0,Price,Symbol,Security,Sector,Return,Volatility,SharpeRatio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,130.309998,A,Agilent Technologies,Health Care,-0.019,0.260,-0.073
AAPL,225.910004,AAPL,Apple Inc.,Information Technology,0.307,0.244,1.258
ABBV,203.869995,ABBV,AbbVie,Health Care,0.195,0.213,0.915
ABNB,134.789993,ABNB,Airbnb,Consumer Discretionary,-0.179,0.322,-0.556
ABT,113.370003,ABT,Abbott Laboratories,Health Care,-0.075,0.184,-0.408
...,...,...,...,...,...,...,...
XYL,121.779999,XYL,Xylem Inc.,Industrials,-0.045,0.215,-0.208
YUM,131.160004,YUM,Yum! Brands,Consumer Discretionary,-0.077,0.162,-0.473
ZBH,106.919998,ZBH,Zimmer Biomet,Health Care,-0.272,0.224,-1.218
ZBRA,381.970001,ZBRA,Zebra Technologies,Information Technology,0.473,0.271,1.749


In [16]:
px.scatter(stock_data,
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 stocks by Volatility/Return',
           color='Sector',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [17]:
X = stock_data[['Return', 'Volatility']]
inertia_list = []

for k in range(1,16):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    inertia_list.append(kmeans.inertia_)

px.line(inertia_list,
        title='Elbow Curve',
        width=750,
        height=500,
        labels={
            "index":"Number of Clusters",
            "value":"Sum of Squared Error"
        },
        template='plotly_dark'
        ).update_layout(showlegend=False, title_x=0.5)

In [19]:
silhouette = []

for k in range(2,16):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    silhouette.append(silhouette_score(X, kmeans.labels_))

In [21]:
kmeans = KMeans(n_clusters=8).fit(X)
labels = kmeans.labels_
stock_data['KMeansCluster'] = labels

In [23]:
px.scatter(stock_data,
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 stocks by Volatility/Return',
           color='KMeansCluster',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [24]:
px.line(silhouette,
        title='Silhouette Curve',
        width=750,
        height=500,
        labels={
            "index":"Number of Clusters",
            "value":"Sum of Squared Error"
        },
        template='plotly_dark'
        ).update_layout(showlegend=False, title_x=0.5)

In [25]:
dividends = []

for i in ticker_list:
    info = yf.Ticker(i).info
    div = info.get('trailingAnnualDividendYield')
    dividends.append(div)

In [33]:
stock_data['Dividends'] = list(dividends)

dividend_stock_data = stock_data[stock_data['Dividends'] > 0]

In [34]:
dividend_stock_data

Unnamed: 0_level_0,Price,Symbol,Security,Sector,Return,Volatility,SharpeRatio,KMeansCluster,Dividends
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A,130.309998,A,Agilent Technologies,Health Care,-0.019,0.260,-0.073,1,0.007096
ABBV,203.869995,ABBV,AbbVie,Health Care,0.195,0.213,0.915,0,0.030769
ABT,113.370003,ABT,Abbott Laboratories,Health Care,-0.075,0.184,-0.408,1,0.019222
ACN,344.820007,ACN,Accenture,Information Technology,-0.130,0.253,-0.512,1,0.015437
ADI,223.110001,ADI,Analog Devices,Information Technology,0.230,0.339,0.681,0,0.015731
...,...,...,...,...,...,...,...,...,...
XOM,116.779999,XOM,ExxonMobil,Energy,0.169,0.191,0.883,6,0.032222
XYL,121.779999,XYL,Xylem Inc.,Industrials,-0.045,0.215,-0.208,1,0.010828
YUM,131.160004,YUM,Yum! Brands,Consumer Discretionary,-0.077,0.162,-0.473,1,0.009558
ZBH,106.919998,ZBH,Zimmer Biomet,Health Care,-0.272,0.224,-1.218,4,0.008742


In [36]:
px.scatter(dividend_stock_data,
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 dividend stocks by Volatility/Return',
           color='Sector',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [40]:

px.scatter(dividend_stock_data[dividend_stock_data['Price'] < 40],
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 dividend stocks by Volatility/Return',
           color='Sector',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [42]:
dividend_stock_data[dividend_stock_data['Price'] < 40].sort_values(by='SharpeRatio', ascending=False)

Unnamed: 0_level_0,Price,Symbol,Security,Sector,Return,Volatility,SharpeRatio,KMeansCluster,Dividends
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
KMI,24.51,KMI,Kinder Morgan,Energy,0.506,0.169,2.987,3,0.046635
NI,35.16,NI,NiSource,Utilities,0.428,0.148,2.888,3,0.030403
FOX,38.959999,FOX,Fox Corporation (Class B),Communication Services,0.492,0.175,2.807,3,0.01335
T,22.540001,T,AT&T,Communication Services,0.423,0.194,2.183,3,0.050409
DOC,22.450001,DOC,Healthpeak Properties,Real Estate,0.432,0.221,1.957,3,0.052817
PPL,32.560001,PPL,PPL Corporation,Utilities,0.28,0.153,1.831,0,0.030862
RF,23.870001,RF,Regions Financial Corporation,Financials,0.384,0.234,1.641,3,0.040316
BKR,38.080002,BKR,Baker Hughes,Energy,0.349,0.256,1.363,0,0.022139
GEN,29.110001,GEN,Gen Digital,Information Technology,0.394,0.29,1.358,3,0.01858
PCG,20.219999,PCG,PG&E Corporation,Utilities,0.247,0.182,1.356,0,0.001486
