<a href="https://colab.research.google.com/github/Zozz98/Finance-Economics/blob/main/sandbox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IMPORTS

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import plotly.express as px
import yfinance as yf
import pandas_datareader as pdr
import datetime as dt

from sklearn.cluster import KMeans

# GET DATA FROM WIKIPEDIA

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
sp500_list = pd.read_html(url)
sp500_list = sp500_list[0]
sp500_list = sp500_list.drop(columns=['GICS Sub-Industry','Headquarters Location', 'Date added','CIK','Founded'], axis=1)
sp500_list.rename(columns={'GICS Sector':'Sector'},inplace=True)

# LOAD CLOSE PRICES FROM WIKIPEDIA DATA

In [3]:
end = dt.datetime.now()
start = end - dt.timedelta(days=252)

ticker_list = [i for i in sp500_list['Symbol']]

for i in range(len(ticker_list)):
    if ticker_list[i] == 'BRK.B':
        ticker_list[i] = 'BRK-B'
    elif ticker_list[i] == 'BF.B':
        ticker_list[i] = 'BF-B'

In [4]:
stocks = yf.download(ticker_list, start=start, end=end)['Close']

[*********************100%***********************]  503 of 503 completed


# LOG RETURN AND SET COLUMNS

In [5]:
log_return = np.log(stocks / stocks.shift(1)).replace(np.nan, 0)

annual_return = log_return.mean() * 252
annual_volatility = log_return.std() * np.sqrt(252)

stock_data = pd.DataFrame()
stock_data['Symbol'] = ticker_list
stock_data['Security'] = sp500_list['Security']
stock_data['Sector'] = sp500_list['Sector']
stock_data['Return'] = list(round(annual_return, 3))
stock_data['Volatility'] = list(round(annual_volatility, 3))
stock_data['Ratio'] = list(round((annual_return / annual_volatility), 3))

# VOLATILITY-RETURN SCATTER PLOT

In [12]:
px.scatter(stock_data,
           x='Volatility',
           y='Return',
           hover_data=['Sector'],
           hover_name='Symbol',
           title='S&P 500 stocks by Volatility/Return',
           color='Ratio',
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

# FIND NUMBER OF CLUSTERS WITH ELBOW-METHOD

In [11]:
X = stock_data[['Return', 'Volatility']]
inertia_list = []
for k in range(2,16):
    kmeans = KMeans(n_clusters=k, n_init='auto')
    kmeans.fit(X)
    inertia_list.append(kmeans.inertia_)

px.line(inertia_list,
        title='Elbow Curve',
        width=750,
        height=500,
        labels={
            "index":"Number of Clusters",
            "value":"Sum of Squared Error"
        },
        template='plotly_dark'
        ).update_layout(showlegend=False, title_x=0.5)

# KMEAN MODEL FIT

In [13]:
kmeans = KMeans(n_clusters=2, n_init='auto').fit(X)
labels = kmeans.labels_
stock_data['Cluster Label'] = labels

# PLOT STOCKS BY CLUSTERS

In [14]:
px.scatter(X,
           x='Volatility',
           y='Return',
           color=labels,
           title='KMeans Clusters',
           hover_name=stock_data['Symbol'],
           width=750,
           height=500,
           template='plotly_dark'
           ).update_layout(title_x=0.5)

In [15]:
dividends = []

for i in ticker_list:
    info = yf.Ticker(i).info
    div = info.get('trailingAnnualDividendYield')
    dividends.append(div)

In [17]:
for (i,j) in zip(ticker_list, dividends):
    print(f'Ticker: {i}, DividendRate: {j}')

Ticker: MMM, DividendRate: 0.05750891
Ticker: AOS, DividendRate: 0.016451566
Ticker: ABT, DividendRate: 0.018663112
Ticker: ABBV, DividendRate: 0.042534404
Ticker: ACN, DividendRate: 0.013373692
Ticker: ATVI, DividendRate: 0.0
Ticker: ADM, DividendRate: 0.022129828
Ticker: ADBE, DividendRate: 0.0
Ticker: ADP, DividendRate: 0.020726794
Ticker: AAP, DividendRate: 0.08582462
Ticker: AES, DividendRate: 0.03015356
Ticker: AFL, DividendRate: 0.023222476
Ticker: A, DividendRate: 0.007176441
Ticker: APD, DividendRate: 0.022733526
Ticker: AKAM, DividendRate: 0.0
Ticker: ALK, DividendRate: 0.0
Ticker: ALB, DividendRate: 0.0068641463
Ticker: ARE, DividendRate: 0.039455224
Ticker: ALGN, DividendRate: 0.0
Ticker: ALLE, DividendRate: 0.014124768
Ticker: LNT, DividendRate: 0.03227907
Ticker: ALL, DividendRate: 0.031119958
Ticker: GOOGL, DividendRate: 0.0
Ticker: GOOG, DividendRate: 0.0
Ticker: MO, DividendRate: 0.084258206
Ticker: AMZN, DividendRate: 0.0
Ticker: AMCR, DividendRate: 0.04737354
Ticker:

In [18]:
stock_data['Dividend'] = dividends

In [37]:
no_dividend_shares = stock_data.loc[stock_data['Dividend'] == 0].count()
dividend_shares = stock_data.loc[stock_data['Dividend'] > 0].count()

print(f'Number of shares does not have Dividend: {no_dividend_shares[1]}')
print(f'Number of shares does have Dividend: {dividend_shares[1]}')

Number of shares does not have Dividend: 105
Number of shares does have Dividend: 398


0.0053
0.005173771
