
# Stonks
## Import needed packages

In [39]:
import pandas as pd
import numpy as np
import datetime as dt
import pandas_datareader.data as pdr
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from tqdm.notebook import tqdm
# yf.pdr_override()

## Functions to get stock data and to plot share prices

In [40]:
def get_fundamentals(tickers):
    '''Gets the fundamentals data for given tickers and produces a clean dataframe from it'''
    
    tickers_data = {}
    fundamentals = ['forwardPE',
                    'forwardEps',
                    'sector',
                    'fullTimeEmployees',
                    'country',
                    'twoHundredDayAverage',
                    'averageDailyVolume10Day',
                    'trailingPE',
                    'marketCap',
                    'priceToSalesTrailing12Months',
                    'trailingEps',
                    'priceToBook',
                    'earningsQuarterlyGrowth',
                    'pegRatio']
    # Loop all tickers and get some interesting fundamentals.
    start = dt.datetime.now()
    for ticker in tqdm(tickers):
        ticker_object = yf.Ticker(ticker)

        #convert info() output from dictionary to dataframe
        new_info = { key:value for (key,value) in ticker_object.info.items() if key in fundamentals}
        temp = pd.DataFrame.from_dict(new_info, orient="index")
        temp.reset_index(inplace=True)
        temp.columns = ["Attribute", "Value"]
        # add (ticker, dataframe) to main dictionary
        tickers_data[ticker] = temp
        # if idx %
        # print("Elapsed time: ",(dt.datetime.now()-start).seconds,"s")

    combined_data = pd.concat(tickers_data).reset_index().drop(columns="level_1").rename(columns={'level_0': 'Ticker'})
    combined_data = combined_data.pivot(index='Ticker', columns='Attribute', values='Value').reset_index()
    combined_data = combined_data.rename_axis(None, axis=1).infer_objects()
    combined_data.dropna(inplace=True)
    return combined_data

In [41]:
def get_data(mode="test"):
    '''Fetches stock tickers and fundamentals data from Yahoo or csv'''
    if mode == "test":
        # Tickers for lighter computing
        tickers =['FB','AMZN', 'AAPL', 'NFLX', 'GOOGL', 'MSFT']
        fundamentals = get_fundamentals(tickers)
    elif mode == "all":
        #Get all tickers from csv, if no csv in directory -> scrape them from wikipedia
        SP500_fileName = "SP500_symbols.csv"
        if ~(os.path.isfile(SP500_fileName)):
            tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
            tickers = tickers[0]["Symbol"]
            tickers.to_csv(SP500_fileName)
        else:
            tickers = pd.read_csv(SP500_fileName)

        # Get all fundamentals from csv, if no csv in directory -> scrape them from yahoo
        fundamentals_fileName = "SP500_fundamentals.csv"
        if ~(os.path.isfile(fundamentals_fileName)):
            fundamentals = get_fundamentals(tickers)
            fundamentals.to_csv(fundamentals_fileName)
        else:
            fundamentals = pd.read_csv(fundamentals_fileName)
    else:
        print("Select mode")
        return 0

    return tickers,fundamentals


In [42]:
def monitor_stock(stock_df,stockName):
    '''Creates an interactive Plotly figure to monitor the share prices and volumes of given stocks'''
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
               vertical_spacing=0.03, 
               row_width=[0.2, 0.7])


    fig.add_trace(go.Candlestick(x = stock_df.index, 
                                                   open = stock_df[('Open',    stockName)], 
                                                   high = stock_df[('High',    stockName)], 
                                                   low = stock_df[('Low',    stockName)], 
                                                   close = stock_df[('Close',    stockName)],showlegend=False,name="Price"))
    fig.update_xaxes(row=1, col=1,
        title_text = '',
        rangeslider_visible = False,
        rangeselector = dict(
            buttons = list([
                dict(count = 1, label = '1M', step = 'month', stepmode = 'backward'),
                dict(count = 6, label = '6M', step = 'month', stepmode = 'backward'),
                dict(count = 1, label = 'YTD', step = 'year', stepmode = 'todate'),
                dict(count = 1, label = '1Y', step = 'year', stepmode = 'backward'),
                dict(step = 'all')])))
    
    fig.add_trace(go.Bar(x = stock_df.index, y=stock_df[('Volume',    stockName)], showlegend=False,name="Volume",marker=dict(color="rgba(0,0,0.8,0.66)")),row=2, col=1)

 
    
    fig.update_layout(
        width=1280,
        height=800,
        title = {
            'text': stockName +' STOCK MONITOR',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
            plot_bgcolor =  "rgba(1,1,1,0.05)")
    
    fig.update_yaxes(title_text ='Close Price', tickprefix = '$',row=1,col=1)
    fig.update_yaxes(title_text = 'Volume',row=2,col=1)
    fig.show()

## Monitor one example stock

In [43]:
start = dt.datetime(2020,1,1)
end = dt.datetime.now()
stocks = pdr.DataReader(['FB'], 'yahoo', start, end)
stocks.describe()

Attributes,Adj Close,Close,High,Low,Open,Volume
Symbols,FB,FB,FB,FB,FB,FB
count,333.0,333.0,333.0,333.0,333.0,333.0
mean,244.967778,244.967778,248.528169,241.260961,244.831111,21874220.0
std,39.429188,39.429188,39.737288,39.453673,39.601401,9626643.0
min,146.009995,146.009995,148.179993,137.100006,139.75,6702000.0
25%,217.789993,217.789993,218.770004,213.520004,216.520004,15427900.0
50%,256.839996,256.839996,263.070007,254.820007,258.399994,19528900.0
75%,273.570007,273.570007,277.76001,269.420013,273.470001,25117700.0
max,313.089996,313.089996,315.880005,310.329987,314.850006,76343900.0


In [44]:
monitor_stock(stocks,"FB")

## Get fundamentals data for companies

In [45]:
tickers,fundamentals = get_data("test")
# tickers,fundamentals = get_data("all")
fundamentals

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




Unnamed: 0,Ticker,averageDailyVolume10Day,country,earningsQuarterlyGrowth,forwardEps,forwardPE,fullTimeEmployees,marketCap,pegRatio,priceToBook,priceToSalesTrailing12Months,sector,trailingEps,trailingPE,twoHundredDayAverage
0,AAPL,79115057,United States,0.293,4.72,28.30085,147000,2242554363904,2.0,33.938007,7.624235,Technology,3.687,36.229996,125.66838
1,AMZN,3147085,United States,1.21,66.22,52.227425,1298000,1743488679936,1.86,18.624727,4.516061,Consumer Cyclical,41.83,82.67989,3200.9167
2,FB,15936985,United States,0.527,13.6,22.580883,58604,872550957056,1.25,6.819898,10.150073,Communication Services,10.09,30.436075,276.72485
3,GOOGL,1464928,United States,1.623,91.17,25.87518,139995,1597159768064,1.31,6.882824,8.120518,Communication Services,75.04,31.4371,1899.859
4,MSFT,23481314,United States,0.438,8.28,30.743961,163000,1917236543488,1.8,14.258667,11.98505,Technology,7.338,34.69065,228.32088
5,NFLX,8994142,United States,1.407,12.98,39.023113,9400,224592494592,1.16,17.430744,8.509984,Communication Services,8.263,61.29977,520.63983


## Perform mMDS, PCA and other analysis

In [46]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import random

## Correlation matrix of fundamentals

In [60]:
num_features = fundamentals.select_dtypes(include=np.number).columns.tolist()
num_fundamentals = fundamentals.loc[:, num_features]
num_fundamentals.corr()



Unnamed: 0,averageDailyVolume10Day,earningsQuarterlyGrowth,forwardEps,forwardPE,fullTimeEmployees,marketCap,pegRatio,priceToBook,priceToSalesTrailing12Months,trailingEps,trailingPE,twoHundredDayAverage
averageDailyVolume10Day,1.0,-0.733512,-0.581191,-0.317175,-0.268612,0.522419,0.605373,0.82077,0.07613,-0.549527,-0.34619,-0.55338
earningsQuarterlyGrowth,-0.733512,1.0,0.756296,0.378378,0.210698,-0.42858,-0.53924,-0.415627,-0.449889,0.741528,0.425739,0.637586
forwardEps,-0.581191,0.756296,1.0,0.248741,0.468722,0.147087,-0.134522,-0.414488,-0.539626,0.986778,0.237797,0.847922
forwardPE,-0.317175,0.378378,0.248741,1.0,0.827425,-0.037782,0.291294,0.255429,-0.692093,0.104597,0.98129,0.684149
fullTimeEmployees,-0.268612,0.210698,0.468722,0.827425,1.0,0.316191,0.482226,0.137035,-0.754076,0.328082,0.79125,0.851673
marketCap,0.522419,-0.42858,0.147087,-0.037782,0.316191,1.0,0.871335,0.439414,-0.133879,0.152538,-0.178431,0.157905
pegRatio,0.605373,-0.53924,-0.134522,0.291294,0.482226,0.871335,1.0,0.704876,-0.254828,-0.188505,0.164142,0.124011
priceToBook,0.82077,-0.415627,-0.414488,0.255429,0.137035,0.439414,0.704876,1.0,-0.348626,-0.455964,0.223927,-0.174876
priceToSalesTrailing12Months,0.07613,-0.449889,-0.539626,-0.692093,-0.754076,-0.133879,-0.254828,-0.348626,1.0,-0.429268,-0.73699,-0.780442
trailingEps,-0.549527,0.741528,0.986778,0.104597,0.328082,0.152538,-0.188505,-0.455964,-0.429268,1.0,0.090268,0.75178


## Scatter matrix of numerical fundamentals

In [61]:
# fig = px.scatter_matrix(fundamentals,
#     dimensions=fundamentals.select_dtypes(include=np.number).columns.tolist(),
#     color="sector")
# fig.update_layout(width=1280,
#                     height=800)
# fig.update_traces(diagonal_visible=False)
# fig.show()

In [47]:
def pca_on_fundamentals(data):
    '''Performs PCA on the numeric values of the fundamentals dataset'''
    features = data.select_dtypes(include=np.number).columns.tolist()
    x = data.loc[:, features].values
    x = StandardScaler().fit_transform(x)
    pd.DataFrame(data = x, columns = features).head()
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(x)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['PC1', 'PC2'])
    return principalDf

def plot_pca(data):
    '''Plots the PCA onto two dimensions using interactive Plotly scatterplot'''
    principalDf = pca_on_fundamentals(data)
    rand_colours = color = [
        "#" + "".join([random.choice("0123456789ABCDEF") for j in range(6)])
        for i in range(len(pd.unique(data['sector'])))
    ]
    colours = {i:rand_colours[k] for k,i in enumerate(pd.unique(data['sector']))}

    col_df = pd.DataFrame.from_dict(colours,orient='index').reset_index()
    col_df.columns = ["sector", "Colour"]
    t = pd.merge(data,col_df,how="left")

    fig = go.Figure(go.Scatter(
        x=principalDf["PC1"],
        y=principalDf["PC2"],
        mode='markers',
        text=t["Ticker"],
        marker_color=t['Colour'],
        marker_size=t["forwardPE"]
        ))

    fig.update_layout(
        width=1280,
        height=800,
        title = {
            'text': 'PCA of tickers',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
            plot_bgcolor =  "rgba(1,1,1,0.05)")

    fig.show()

In [62]:
# pca = pca_on_fundamentals(t)
plot_pca(fundamentals)