<a href="https://colab.research.google.com/github/Zozz98/Finance-Economics/blob/main/sandbox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IMPORTS

In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import plotly.express as px
import yfinance as yf
import pandas_datareader as pdr
import datetime as dt

from sklearn.cluster import KMeans

# GET DATA FROM WIKIPEDIA

In [39]:
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
sp500_list = pd.read_html(url)
sp500_list = sp500_list[0]
sp500_list = sp500_list.drop(columns=['GICS Sub-Industry','Headquarters Location', 'Date added','CIK','Founded'], axis=1)
sp500_list.rename(columns={'GICS Sector':'Sector'},inplace=True)

# LOAD CLOSE PRICES FROM WIKIPEDIA DATA

In [40]:
end = dt.datetime.now()
start = end - dt.timedelta(days=252)

ticker_list = [i for i in sp500_list['Symbol']]

for i in range(len(ticker_list)):
    if ticker_list[i] == 'BRK.B':
        ticker_list[i] = 'BRK-B'
    elif ticker_list[i] == 'BF.B':
        ticker_list[i] = 'BF-B'

In [41]:
stocks = yf.download(ticker_list, start=start, end=end)['Close']

[*********************100%***********************]  503 of 503 completed


# LOG RETURN AND SET COLUMNS

In [42]:
log_return = np.log(stocks / stocks.shift(1)).replace(np.nan, 0)

annual_return = log_return.mean() * 252
annual_volatility = log_return.std() * np.sqrt(252)

stock_data = pd.DataFrame()
stock_data['Symbol'] = ticker_list
stock_data['Security'] = sp500_list['Security']
stock_data['Sector'] = sp500_list['Sector']
stock_data['Return'] = list(round(annual_return, 3))
stock_data['Volatility'] = list(round(annual_volatility, 3))
stock_data['Ratio'] = list(round((annual_return / annual_volatility), 3))

# VOLATILITY-RETURN SCATTER PLOT

In [43]:
px.scatter(stock_data, 
           x='Volatility', 
           y='Return', 
           hover_data=['Sector'],
           hover_name='Symbol', 
           title='S&P 500 stocks by Volatility/Return', 
           color='Ratio',
           width=750,
           height=500).update_layout(title_x=0.5)

# FIND NUMBER OF CLUSTERS WITH ELBOW-METHOD

In [44]:
X = stock_data[['Return', 'Volatility']]
inertia_list = []
for k in range(2,16):
    kmeans = KMeans(n_clusters=k, n_init='auto')
    kmeans.fit(X)
    inertia_list.append(kmeans.inertia_)

px.line(inertia_list, 
        title='Elbow Curve',
        width=750,
        height=500,
        labels={
            "index":"Number of Clusters",
            "value":"Sum of Squared Error"
        }).update_layout(showlegend=False, title_x=0.5)

# KMEAN MODEL FIT

In [45]:
kmeans = KMeans(n_clusters=2, n_init='auto').fit(X)
labels = kmeans.labels_
stock_data['Cluster Label'] = labels

# PLOT STOCKS BY CLUSTERS

In [92]:
px.scatter(X, 
           x='Volatility', 
           y='Return', 
           color=labels,
           title='KMeans Clusters',
           hover_name=stock_data['Symbol'],
           width=750,
           height=500).update_layout(title_x=0.5)

In [83]:
dividends = []

for i in ticker_list:
    info = yf.Ticker(i).info
    div = info.get('trailingAnnualDividendYield')
    dividends.append(div)

[0.05910891,
 0.01655251,
 0.019279953,
 0.042344324,
 0.013921486,
 0.0,
 0.02249182,
 0.0,
 0.02128055,
 0.07247482,
 0.031334624,
 0.02368421,
 0.0074620466,
 0.023404857,
 0.0,
 0.0,
 0.0072433962,
 0.039247885,
 0.0,
 0.014856738,
 0.032490637,
 0.030654073,
 0.0,
 0.0,
 0.08248337,
 0.0,
 0.04817013,
 0.0,
 0.02871157,
 0.0,
 0.038070466,
 0.012591082,
 0.022563018,
 0.03152988,
 0.017813433,
 0.015765907,
 0.010888351,
 0.006021306,
 0.035932202,
 0.010411376,
 0.017963076,
 0.0,
 0.0071732798,
 0.025436046,
 0.005173771,
 0.008147545,
 0.0,
 0.0,
 0.0,
 0.010188587,
 0.021851001,
 0.0689441,
 0.024244493,
 0.0,
 0.0,
 0.03449573,
 0.017452007,
 0.0,
 0.024151437,
 0.015128593,
 0.029511534,
 0.01942219,
 0.02752729,
 0.014165207,
 0.006932409,
 0.0,
 0.04704017,
 0.0,
 0.0038905775,
 0.0,
 0.028957728,
 0.03317319,
 0.0,
 0.0,
 0.014551681,
 0.07195301,
 0.0,
 0.03406998,
 0.0,
 0.018286346,
 0.0068568727,
 0.01190366,
 0.026542095,
 0.02524483,
 0.0,
 0.0,
 0.033730682,
 0.032

In [91]:
for (i,j) in zip(ticker_list, dividends):
    print(f'ticker: {i}, dividend: {j}')

ticker: MMM, dividend: 0.05910891
ticker: AOS, dividend: 0.01655251
ticker: ABT, dividend: 0.019279953
ticker: ABBV, dividend: 0.042344324
ticker: ACN, dividend: 0.013921486
ticker: ATVI, dividend: 0.0
ticker: ADM, dividend: 0.02249182
ticker: ADBE, dividend: 0.0
ticker: ADP, dividend: 0.02128055
ticker: AAP, dividend: 0.07247482
ticker: AES, dividend: 0.031334624
ticker: AFL, dividend: 0.02368421
ticker: A, dividend: 0.0074620466
ticker: APD, dividend: 0.023404857
ticker: AKAM, dividend: 0.0
ticker: ALK, dividend: 0.0
ticker: ALB, dividend: 0.0072433962
ticker: ARE, dividend: 0.039247885
ticker: ALGN, dividend: 0.0
ticker: ALLE, dividend: 0.014856738
ticker: LNT, dividend: 0.032490637
ticker: ALL, dividend: 0.030654073
ticker: GOOGL, dividend: 0.0
ticker: GOOG, dividend: 0.0
ticker: MO, dividend: 0.08248337
ticker: AMZN, dividend: 0.0
ticker: AMCR, dividend: 0.04817013
ticker: AMD, dividend: 0.0
ticker: AEE, dividend: 0.02871157
ticker: AAL, dividend: 0.0
ticker: AEP, dividend: 0.0380

In [94]:
stock_data['Dividend'] = dividends

In [95]:
stock_data

Unnamed: 0,Symbol,Security,Sector,Return,Volatility,Ratio,Cluster Label,Dividend
0,MMM,3M,Industrials,-0.084,0.303,-0.279,0,0.059109
1,AOS,A. O. Smith,Industrials,0.349,0.408,0.855,1,0.016553
2,ABT,Abbott,Health Care,-1.284,0.626,-2.052,0,0.019280
3,ABBV,AbbVie,Health Care,0.390,0.299,1.303,1,0.042344
4,ACN,Accenture,Information Technology,0.036,0.219,0.166,0,0.013921
...,...,...,...,...,...,...,...,...
498,YUM,Yum! Brands,Consumer Discretionary,0.328,0.188,1.747,1,0.013176
499,ZBRA,Zebra Technologies,Information Technology,0.376,0.222,1.693,1,0.000000
500,ZBH,Zimmer Biomet,Health Care,0.086,0.466,0.186,0,0.007214
501,ZION,Zions Bancorporation,Financials,-0.675,0.691,-0.977,0,0.051127


In [100]:
print(yf.Ticker('AAPL').info['dividendYield'])
print(yf.Ticker('AAPL').info['trailingAnnualDividendYield'])

0.0053
0.005173771
