
# Stonks
## Import needed packages

In [123]:
import pandas as pd
import numpy as np
import datetime as dt
import pandas_datareader.data as pdr
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from tqdm.notebook import tqdm
# yf.pdr_override()

## Functions to get stock data and to plot share prices

In [124]:
def get_fundamentals(tickers):
    '''Gets the fundamentals data for given tickers and produces a clean dataframe from it'''
    
    tickers_data = {}
    fundamentals = ['forwardPE',
                    'forwardEps',
                    'sector',
                    'fullTimeEmployees',
                    'country',
                    'twoHundredDayAverage',
                    'averageDailyVolume10Day',
                    'trailingPE',
                    'marketCap',
                    'priceToSalesTrailing12Months',
                    'trailingEps',
                    'priceToBook',
                    'earningsQuarterlyGrowth',
                    'pegRatio']
    # Loop all tickers and get some interesting fundamentals.
    for ticker in tqdm(tickers):
        ticker_object = yf.Ticker(ticker)

        #convert info() output from dictionary to dataframe
        new_info = { key:value for (key,value) in ticker_object.info.items() if key in fundamentals}
        temp = pd.DataFrame.from_dict(new_info, orient="index")
        temp.reset_index(inplace=True)
        if len(temp.columns) == 2:
            temp.columns = ["Attribute", "Value"]
            # add (ticker, dataframe) to main dictionary
            tickers_data[ticker] = temp


    combined_data = pd.concat(tickers_data).reset_index().drop(columns="level_1").rename(columns={'level_0': 'Ticker'})
    combined_data = combined_data.pivot(index='Ticker', columns='Attribute', values='Value').reset_index()
    combined_data = combined_data.rename_axis(None, axis=1).infer_objects()
    combined_data.dropna(inplace=True) # Drop if any fundamental is NA
    return combined_data

In [125]:
def get_data(mode="test"):
    '''Fetches stock tickers and fundamentals data from Yahoo or csv'''
    if mode == "test":
        # Tickers for lighter computing
        tickers =['FB','AMZN', 'AAPL', 'NFLX', 'GOOGL', 'MSFT']
        fundamentals = get_fundamentals(tickers)
    elif mode == "all":
        #Get all tickers from csv, if no csv in directory -> scrape them from wikipedia
        SP500_fileName = "SP500_symbols.csv"
        if not os.path.isfile(SP500_fileName):
            tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
            tickers = tickers[0]["Symbol"]
            tickers.to_csv(SP500_fileName)
        else:
            tickers = pd.read_csv(SP500_fileName)

        # Get all fundamentals from csv, if no csv in directory -> scrape them from yahoo
        fundamentals_fileName = "SP500_fundamentals.csv"
        if not os.path.isfile(fundamentals_fileName):
            fundamentals = get_fundamentals(tickers)
            fundamentals.to_csv(fundamentals_fileName)
        else:
            fundamentals = pd.read_csv(fundamentals_fileName)
    else:
        print("Select mode")
        return 0

    return tickers,fundamentals


In [126]:
def monitor_stock(stock_df,stockName):
    '''Creates an interactive Plotly figure to monitor the share prices and volumes of given stocks'''
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
               vertical_spacing=0.03, 
               row_width=[0.2, 0.7])


    fig.add_trace(go.Candlestick(x = stock_df.index, 
                                                   open = stock_df[('Open',    stockName)], 
                                                   high = stock_df[('High',    stockName)], 
                                                   low = stock_df[('Low',    stockName)], 
                                                   close = stock_df[('Close',    stockName)],showlegend=False,name="Price"))
    fig.update_xaxes(row=1, col=1,
        title_text = '',
        rangeslider_visible = False,
        rangeselector = dict(
            buttons = list([
                dict(count = 1, label = '1M', step = 'month', stepmode = 'backward'),
                dict(count = 6, label = '6M', step = 'month', stepmode = 'backward'),
                dict(count = 1, label = 'YTD', step = 'year', stepmode = 'todate'),
                dict(count = 1, label = '1Y', step = 'year', stepmode = 'backward'),
                dict(step = 'all')])))
    
    fig.add_trace(go.Bar(x = stock_df.index, y=stock_df[('Volume',    stockName)], showlegend=False,name="Volume",marker=dict(color="rgba(0,0,0.8,0.66)")),row=2, col=1)

 
    
    fig.update_layout(
        width=1280,
        height=800,
        title = {
            'text': stockName +' STOCK MONITOR',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
            plot_bgcolor =  "rgba(1,1,1,0.05)")
    
    fig.update_yaxes(title_text ='Close Price', tickprefix = '$',row=1,col=1)
    fig.update_yaxes(title_text = 'Volume',row=2,col=1)
    fig.show()

## Monitor one example stock

In [127]:
start = dt.datetime(2020,1,1)
end = dt.datetime.now()
stocks = pdr.DataReader(['FB'], 'yahoo', start, end)
stocks.describe()

Attributes,Adj Close,Close,High,Low,Open,Volume
Symbols,FB,FB,FB,FB,FB,FB
count,333.0,333.0,333.0,333.0,333.0,333.0
mean,244.967778,244.967778,248.528169,241.260961,244.831111,21874220.0
std,39.429188,39.429188,39.737288,39.453673,39.601401,9626643.0
min,146.009995,146.009995,148.179993,137.100006,139.75,6702000.0
25%,217.789993,217.789993,218.770004,213.520004,216.520004,15427900.0
50%,256.839996,256.839996,263.070007,254.820007,258.399994,19528900.0
75%,273.570007,273.570007,277.76001,269.420013,273.470001,25117700.0
max,313.089996,313.089996,315.880005,310.329987,314.850006,76343900.0


In [128]:
monitor_stock(stocks,"FB")

## Get fundamentals data for companies

In [129]:
# tickers,fundamentals = get_data("test")
tickers,fundamentals = get_data("all")
fundamentals

Unnamed: 0.1,Unnamed: 0,Ticker,averageDailyVolume10Day,country,earningsQuarterlyGrowth,forwardEps,forwardPE,fullTimeEmployees,marketCap,pegRatio,priceToBook,priceToSalesTrailing12Months,sector,trailingEps,trailingPE,twoHundredDayAverage
0,0,A,1396414.0,United States,0.462,4.36,30.917430,16400.0,4.107329e+10,2.91,8.555471,7.427358,Healthcare,2.597,51.906048,119.774414
1,2,AAP,920185.0,United States,0.168,11.66,16.949400,40000.0,1.289753e+10,1.57,3.654468,1.276185,Consumer Cyclical,7.140,27.679274,164.982210
2,3,AAPL,79115057.0,United States,0.293,4.72,28.300850,147000.0,2.242554e+12,2.00,33.938007,7.624235,Technology,3.687,36.229996,125.668380
3,4,ABBV,4930885.0,United States,-0.987,13.87,8.069935,47000.0,1.975374e+11,2.15,15.109342,4.312667,Healthcare,2.720,41.150734,103.048010
4,6,ABMD,231200.0,United States,-0.106,4.91,71.521390,1536.0,1.588377e+10,4.02,12.589898,19.538795,Healthcare,4.387,80.047870,304.002440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,495,XLNX,2203175.0,United States,0.055,3.30,39.854546,4891.0,3.232223e+10,5.20,12.408718,10.587797,Technology,2.499,52.629050,129.577700
357,498,XYL,849575.0,United States,0.254,3.09,36.006474,15600.0,2.002702e+10,2.34,6.760649,4.107264,Industrials,1.400,79.471430,96.721260
358,501,ZBRA,271150.0,United States,0.178,16.95,29.297934,8800.0,2.656775e+10,3.02,12.383114,5.972966,Technology,9.350,53.112297,381.658600
359,502,ZION,1163275.0,United States,22.000,4.33,12.759815,9682.0,9.049950e+09,-0.32,1.228433,3.232125,Financial Services,4.919,11.231957,43.150295


In [130]:
fundamentals

Unnamed: 0.1,Unnamed: 0,Ticker,averageDailyVolume10Day,country,earningsQuarterlyGrowth,forwardEps,forwardPE,fullTimeEmployees,marketCap,pegRatio,priceToBook,priceToSalesTrailing12Months,sector,trailingEps,trailingPE,twoHundredDayAverage
0,0,A,1396414.0,United States,0.462,4.36,30.917430,16400.0,4.107329e+10,2.91,8.555471,7.427358,Healthcare,2.597,51.906048,119.774414
1,2,AAP,920185.0,United States,0.168,11.66,16.949400,40000.0,1.289753e+10,1.57,3.654468,1.276185,Consumer Cyclical,7.140,27.679274,164.982210
2,3,AAPL,79115057.0,United States,0.293,4.72,28.300850,147000.0,2.242554e+12,2.00,33.938007,7.624235,Technology,3.687,36.229996,125.668380
3,4,ABBV,4930885.0,United States,-0.987,13.87,8.069935,47000.0,1.975374e+11,2.15,15.109342,4.312667,Healthcare,2.720,41.150734,103.048010
4,6,ABMD,231200.0,United States,-0.106,4.91,71.521390,1536.0,1.588377e+10,4.02,12.589898,19.538795,Healthcare,4.387,80.047870,304.002440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,495,XLNX,2203175.0,United States,0.055,3.30,39.854546,4891.0,3.232223e+10,5.20,12.408718,10.587797,Technology,2.499,52.629050,129.577700
357,498,XYL,849575.0,United States,0.254,3.09,36.006474,15600.0,2.002702e+10,2.34,6.760649,4.107264,Industrials,1.400,79.471430,96.721260
358,501,ZBRA,271150.0,United States,0.178,16.95,29.297934,8800.0,2.656775e+10,3.02,12.383114,5.972966,Technology,9.350,53.112297,381.658600
359,502,ZION,1163275.0,United States,22.000,4.33,12.759815,9682.0,9.049950e+09,-0.32,1.228433,3.232125,Financial Services,4.919,11.231957,43.150295


## Perform mMDS, PCA and other analysis

In [131]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import random

## Correlation matrix of fundamentals

In [132]:
num_features = fundamentals.select_dtypes(include=np.number).columns.tolist()
num_fundamentals = fundamentals.loc[:, num_features]
num_fundamentals.corr()



Unnamed: 0.1,Unnamed: 0,averageDailyVolume10Day,earningsQuarterlyGrowth,forwardEps,forwardPE,fullTimeEmployees,marketCap,pegRatio,priceToBook,priceToSalesTrailing12Months,trailingEps,trailingPE,twoHundredDayAverage
Unnamed: 0,1.0,-0.069508,-0.069952,0.024241,0.070459,-0.044088,-0.053973,-0.099849,0.055545,0.027104,-0.009849,0.106626,-0.016244
averageDailyVolume10Day,-0.069508,1.0,0.009132,-0.075621,-0.045201,0.174832,0.513975,-0.013119,-0.018856,-0.005346,-0.090925,0.056349,-0.089315
earningsQuarterlyGrowth,-0.069952,0.009132,1.0,-0.014438,-0.001762,-0.017186,-0.009521,-0.021581,-0.002095,-0.023458,-0.004017,0.002974,-0.014314
forwardEps,0.024241,-0.075621,-0.014438,1.0,-0.061658,0.129174,0.204648,-0.003764,0.039254,-0.038132,0.895078,-0.039734,0.856532
forwardPE,0.070459,-0.045201,-0.001762,-0.061658,1.0,-0.059681,0.032161,0.03418,0.035999,0.464413,-0.042759,0.286862,0.094719
fullTimeEmployees,-0.044088,0.174832,-0.017186,0.129174,-0.059681,1.0,0.468574,-0.008552,0.102182,-0.189661,0.100412,0.057773,0.338314
marketCap,-0.053973,0.513975,-0.009521,0.204648,0.032161,0.468574,1.0,-0.004191,0.050795,0.14024,0.189741,0.051809,0.405944
pegRatio,-0.099849,-0.013119,-0.021581,-0.003764,0.03418,-0.008552,-0.004191,1.0,-0.002172,0.091691,0.00406,0.005498,0.011621
priceToBook,0.055545,-0.018856,-0.002095,0.039254,0.035999,0.102182,0.050795,-0.002172,1.0,0.129334,0.031355,0.045832,0.09893
priceToSalesTrailing12Months,0.027104,-0.005346,-0.023458,-0.038132,0.464413,-0.189661,0.14024,0.091691,0.129334,1.0,-0.03777,0.28019,0.180293


## Scatter matrix of numerical fundamentals

In [133]:
# fig = px.scatter_matrix(fundamentals,
#     dimensions=fundamentals.select_dtypes(include=np.number).columns.tolist(),
#     color="sector")
# fig.update_layout(width=1280,
#                     height=800)
# fig.update_traces(diagonal_visible=False)
# fig.show()

## PCA

In [134]:
def pca_on_fundamentals(data):
    '''Performs PCA on the numeric values of the fundamentals dataset'''
    features = data.select_dtypes(include=np.number).columns.tolist()
    x = data.loc[:, features].values
    x = StandardScaler().fit_transform(x)
    pd.DataFrame(data = x, columns = features).head()
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(x)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['PC1', 'PC2'])
    return principalDf

def plot_pca(data):
    '''Plots the PCA onto two dimensions using interactive Plotly scatterplot'''
    principalDf = pca_on_fundamentals(data)
    rand_colours = color = [
        "#" + "".join([random.choice("0123456789ABCDEF") for j in range(6)])
        for i in range(len(pd.unique(data['sector'])))
    ]
    colours = {i:rand_colours[k] for k,i in enumerate(pd.unique(data['sector']))}

    col_df = pd.DataFrame.from_dict(colours,orient='index').reset_index()
    col_df.columns = ["sector", "Colour"]
    t = pd.merge(data,col_df,how="left")

    fig = go.Figure(go.Scatter(
        x=principalDf["PC1"],
        y=principalDf["PC2"],
        mode='markers',
        text=t["Ticker"],
        marker_color=t['Colour'],
        marker_size=t["forwardPE"]
        ))

    fig.update_layout(
        width=1280,
        height=800,
        title = {
            'text': 'PCA of tickers',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
            plot_bgcolor =  "rgba(1,1,1,0.05)")

    fig.show()

In [135]:
# pca = pca_on_fundamentals(t)
plot_pca(fundamentals)