In [1]:
import numpy as np
import itertools
import pandas as pd
import matplotlib.pyplot as plt
#import scienceplots
plt.rcParams['figure.figsize'] = (10,10)
#plt.style.use(['science','grid','notebook'])
import plotly.express as px
import yfinance as yf
import pandas_datareader as pdr
import datetime as dt

from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score

In [4]:
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
sp500_list = pd.read_html(url)
sp500_list = sp500_list[0]
sp500_list = sp500_list.drop(columns=['GICS Sub-Industry','Headquarters Location', 'Date added','CIK','Founded'], axis=1)
sp500_list.rename(columns={'GICS Sector':'Sector'},inplace=True)
sp500_list = sp500_list.sort_values(by='Symbol', ascending=True)

In [5]:
end = dt.datetime.now()
start = end - dt.timedelta(days=252)

ticker_list = [i for i in sp500_list['Symbol']]

for i in range(len(ticker_list)):
    if ticker_list[i] == 'BRK.B':
        ticker_list[i] = 'BRK-B'
    elif ticker_list[i] == 'BF.B':
        ticker_list[i] = 'BF-B'

In [6]:
stocks = yf.download(ticker_list, start=start, end=end)['Close']

[*********************100%%**********************]  503 of 503 completed


In [13]:
stocks.shape

(175, 503)

In [7]:
log_return = np.log(stocks / stocks.shift(1)).replace(np.nan, 0)

annual_return = log_return.mean() * 252
annual_volatility = log_return.std() * np.sqrt(252)

stock_data = pd.DataFrame()
stock_data['Price'] = stocks.tail(1).T
stock_data['Symbol'] = list(sp500_list['Symbol'])
stock_data['Security'] = list(sp500_list['Security'])
stock_data['Sector'] = list(sp500_list['Sector'])
stock_data['Return'] = list(round(annual_return, 3))
stock_data['Volatility'] = list(round(annual_volatility, 3))
stock_data['SharpeRatio'] = list(round((annual_return / annual_volatility), 3))

In [15]:
stock_data

Unnamed: 0_level_0,Price,Symbol,Security,Sector,Return,Volatility,SharpeRatio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,130.309998,A,Agilent Technologies,Health Care,-0.019,0.260,-0.073
AAPL,225.910004,AAPL,Apple Inc.,Information Technology,0.307,0.244,1.258
ABBV,203.869995,ABBV,AbbVie,Health Care,0.195,0.213,0.915
ABNB,134.789993,ABNB,Airbnb,Consumer Discretionary,-0.179,0.322,-0.556
ABT,113.370003,ABT,Abbott Laboratories,Health Care,-0.075,0.184,-0.408
...,...,...,...,...,...,...,...
XYL,121.779999,XYL,Xylem Inc.,Industrials,-0.045,0.215,-0.208
YUM,131.160004,YUM,Yum! Brands,Consumer Discretionary,-0.077,0.162,-0.473
ZBH,106.919998,ZBH,Zimmer Biomet,Health Care,-0.272,0.224,-1.218
ZBRA,381.970001,ZBRA,Zebra Technologies,Information Technology,0.473,0.271,1.749


In [8]:
px.scatter(stock_data,
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 stocks by Volatility/Return',
           color='Sector',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [9]:
X = stock_data[['Return', 'Volatility']]
inertia_list = []

for k in range(1,16):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    inertia_list.append(kmeans.inertia_)

px.line(inertia_list,
        title='Elbow Curve',
        width=750,
        height=500,
        labels={
            "index":"Number of Clusters",
            "value":"Sum of Squared Error"
        },
        template='plotly_dark'
        ).update_layout(showlegend=False, title_x=0.5)

In [10]:
silhouette = []

for k in range(2,16):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    silhouette.append(silhouette_score(X, kmeans.labels_))

In [14]:
kmeans = KMeans(n_clusters=7).fit(X)
labels = kmeans.labels_
stock_data['KMeansCluster'] = labels

In [15]:
px.scatter(stock_data,
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 stocks by Volatility/Return',
           color='KMeansCluster',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [16]:
px.line(silhouette,
        title='Silhouette Curve',
        width=750,
        height=500,
        labels={
            "index":"Number of Clusters",
            "value":"Sum of Squared Error"
        },
        template='plotly_dark'
        ).update_layout(showlegend=False, title_x=0.5)

In [17]:
dividends = []

for i in ticker_list:
    info = yf.Ticker(i).info
    div = info.get('trailingAnnualDividendYield')
    dividends.append(div)

In [18]:
stock_data['Dividends'] = list(dividends)

dividend_stock_data = stock_data[stock_data['Dividends'] > 0]

In [19]:
dividend_stock_data

Unnamed: 0_level_0,Price,Symbol,Security,Sector,Return,Volatility,SharpeRatio,KMeansCluster,Dividends
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A,138.000000,A,Agilent Technologies,Health Care,0.063,0.268,0.236,4,0.007160
ABBV,202.789993,ABBV,AbbVie,Health Care,0.186,0.212,0.876,0,0.030412
ABT,118.620003,ABT,Abbott Laboratories,Health Care,-0.010,0.192,-0.053,4,0.019405
ACN,347.125000,ACN,Accenture,Information Technology,-0.119,0.253,-0.473,2,0.015515
ADI,225.000000,ADI,Analog Devices,Information Technology,0.241,0.338,0.714,0,0.016225
...,...,...,...,...,...,...,...,...,...
XOM,117.589996,XOM,ExxonMobil,Energy,0.178,0.191,0.932,0,0.032197
XYL,121.510002,XYL,Xylem Inc.,Industrials,-0.048,0.215,-0.222,2,0.011578
YUM,132.570007,YUM,Yum! Brands,Consumer Discretionary,-0.061,0.162,-0.376,2,0.009721
ZBH,108.099998,ZBH,Zimmer Biomet,Health Care,-0.255,0.223,-1.142,2,0.008979


In [20]:
px.scatter(dividend_stock_data,
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 dividend stocks by Volatility/Return',
           color='Sector',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [21]:

px.scatter(dividend_stock_data[dividend_stock_data['Price'] < 40],
           x='Volatility',
           y='Return',
           hover_data=['Sector','Security'],
           hover_name='Symbol',
           title='S&P 500 dividend stocks by Volatility/Return',
           color='Sector',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [29]:
dividend_stock_data[dividend_stock_data['Price'] < 40].sort_values(by='SharpeRatio', ascending=False).head(20)

Unnamed: 0_level_0,Price,Symbol,Security,Sector,Return,Volatility,SharpeRatio,KMeansCluster,Dividends
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
KMI,24.6,KMI,Kinder Morgan,Energy,0.508,0.169,3.009,6,0.046716
NI,35.139999,NI,NiSource,Utilities,0.425,0.148,2.874,6,0.030148
FOX,38.849998,FOX,Fox Corporation (Class B),Communication Services,0.485,0.175,2.774,6,0.013347
T,22.42,T,AT&T,Communication Services,0.413,0.194,2.135,6,0.049246
DOC,22.715,DOC,Healthpeak Properties,Real Estate,0.446,0.22,2.025,6,0.053452
PPL,32.860001,PPL,PPL Corporation,Utilities,0.292,0.153,1.909,0,0.030559
RF,23.950001,RF,Regions Financial Corporation,Financials,0.387,0.233,1.657,6,0.040637
PCG,20.299999,PCG,PG&E Corporation,Utilities,0.251,0.182,1.383,0,0.001484
BKR,37.830002,BKR,Baker Hughes,Energy,0.337,0.255,1.321,0,0.021796
GEN,28.76,GEN,Gen Digital,Information Technology,0.375,0.29,1.293,6,0.017176


In [33]:
chosen_stock_ticker = [i for i in range(dividend_stock_data['Symbol'], 15)]
chosen_stock = yf.download(chosen_stock_ticker, start=start, end=end)['Close']

TypeError: 'Series' object cannot be interpreted as an integer

In [None]:
dividend_stock_data[(dividend_stock_data['Price'] < 40) & (dividend_stock_data['SharpeRatio'] > 0.5)].sort_values(by='SharpeRatio', ascending=False)


Unnamed: 0_level_0,Price,Symbol,Security,Sector,Return,Volatility,SharpeRatio,KMeansCluster,Dividends
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
KMI,24.6,KMI,Kinder Morgan,Energy,0.508,0.169,3.009,6,0.046716
NI,35.139999,NI,NiSource,Utilities,0.425,0.148,2.874,6,0.030148
FOX,38.849998,FOX,Fox Corporation (Class B),Communication Services,0.485,0.175,2.774,6,0.013347
T,22.42,T,AT&T,Communication Services,0.413,0.194,2.135,6,0.049246
DOC,22.715,DOC,Healthpeak Properties,Real Estate,0.446,0.22,2.025,6,0.053452
PPL,32.860001,PPL,PPL Corporation,Utilities,0.292,0.153,1.909,0,0.030559
RF,23.950001,RF,Regions Financial Corporation,Financials,0.387,0.233,1.657,6,0.040637
PCG,20.299999,PCG,PG&E Corporation,Utilities,0.251,0.182,1.383,0,0.001484
BKR,37.830002,BKR,Baker Hughes,Energy,0.337,0.255,1.321,0,0.021796
GEN,28.76,GEN,Gen Digital,Information Technology,0.375,0.29,1.293,6,0.017176
