In [75]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import altair as alt
import matplotlib

matplotlib.style.use("ggplot")
from ipywidgets import interact
import numpy as np
import matplotlib as mpl
import arrow
from matplotlib import animation, rc
from IPython.display import HTML, display
from datetime import timedelta
import pandas_datareader.data as data


%matplotlib inline

# Lets take a stab at some stock analysis!

In [79]:
# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = "2015-01-01"
end_date = "2020-12-31"

# User pandas_reader.data.DataReader to load the desired data. As simple as that.
panel_data = data.DataReader(
    "FB;AMZN;SNAP;AAPL;QQQ;GOOG;MSFT".split(";"), "yahoo", start_date, end_date
)
print("Sample from the full dataset")
display(panel_data.head(1))
df_original = panel_data["Close"]
print("Sample of data from close")
df = df_original.copy()
display(df.head(3))

Sample from the full dataset
Sample of data from close


Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,...,Open,Open,Open,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Symbols,FB,AMZN,SNAP,AAPL,QQQ,GOOG,MSFT,FB,AMZN,SNAP,...,QQQ,GOOG,MSFT,FB,AMZN,SNAP,AAPL,QQQ,GOOG,MSFT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-01-02,78.449997,308.519989,,24.898552,97.672798,523.373108,41.539135,78.449997,308.519989,,...,103.760002,527.561584,46.66,18177500.0,2783200.0,,212818400.0,31314600.0,1447500.0,27913900.0


Symbols,FB,AMZN,SNAP,AAPL,QQQ,GOOG,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-01-02,78.449997,308.519989,,27.3325,102.940002,523.373108,46.759998
2015-01-05,77.190002,302.190002,,26.5625,101.43,512.463013,46.330002
2015-01-06,76.150002,295.290009,,26.565001,100.07,500.585632,45.650002


In [80]:
from numpy.lib.function_base import disp
import arrow

earliest = arrow.utcnow().shift(months=-12).date()
df = df_original.copy()[
    earliest:
]  # note this is destructive, probably good to keep an original around as a best practice

df.index = df.index.astype(
    str
)  # when transposing dates to columns, easier to operate in strings.

first_day = df.reset_index().iloc[0, 0]
last_day = df.reset_index().iloc[-1, 0]

display(f"Returns from:{first_day}, to:{last_day}")

returns = df.iloc[
    [0, -1]
].T  # first and last row, and turn into columns for easy manipulation
returns["delta"] = returns[last_day] - returns[first_day]
returns["pcnt_change"] = returns.delta / returns[first_day]
display(returns)

# returns.diff = returns.[] _[0] - _[1]
print(
    "QQ: Should sum daily %% change ==  pcnt_change of total returns -- because it doesn't??"
)
df.pct_change().sum()

'Returns from:2019-12-24, to:2020-12-24'

Date,2019-12-24,2020-12-24,delta,pcnt_change
Symbols,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FB,205.119995,267.399994,62.279999,0.303627
AMZN,1789.209961,3172.689941,1383.47998,0.773235
SNAP,15.61,50.150002,34.540002,2.212684
AAPL,71.067497,131.970001,60.902504,0.856967
QQQ,211.919998,309.559998,97.639999,0.46074
GOOG,1343.560059,1738.849976,395.289917,0.294211
MSFT,157.380005,222.75,65.369995,0.415364


QQ: Should sum daily %% change ==  pcnt_change of total returns -- because it doesn't??


Symbols
FB      0.370602
AMZN    0.647662
SNAP    1.445446
AAPL    0.728053
QQQ     0.443329
GOOG    0.331577
MSFT    0.444180
dtype: float64

In [81]:
from numpy.core.defchararray import encode
from altair.vegalite.v4.schema.channels import Tooltip

# Look at correlation
print("Correlations between stocks")
print("  NOTE: Need to correlate on percentage change, not abosolute price")
df = df_original.copy()
df.columns.name = None
corr = df.pct_change(1).corr()  # compute correlation on percent change
display(corr)
corr = df.corr().reset_index().melt(id_vars="index")
# display(corr)
height_in_inches = 40

base = (
    alt.Chart(corr)
    .properties(
        width=8 * height_in_inches,
        height=8 * height_in_inches,
    )
    .encode(
        x="index:O",
        y="variable:O",
    )
)

c = base.mark_rect().encode(tooltip="value;index;variable".split(";"), color="value:Q")


# Configure text
text = base.mark_text(baseline="middle").encode(text=alt.Text("value:Q", format="0.2f"))

display(c + text)

Correlations between stocks
  NOTE: Need to correlate on percentage change, not abosolute price


Unnamed: 0,FB,AMZN,SNAP,AAPL,QQQ,GOOG,MSFT
FB,1.0,0.599847,0.308828,0.574991,0.73039,0.668796,0.607162
AMZN,0.599847,1.0,0.267441,0.552496,0.734215,0.65041,0.654383
SNAP,0.308828,0.267441,1.0,0.239857,0.340149,0.292625,0.300977
AAPL,0.574991,0.552496,0.239857,1.0,0.823706,0.605544,0.682758
QQQ,0.73039,0.734215,0.340149,0.823706,1.0,0.811557,0.878047
GOOG,0.668796,0.65041,0.292625,0.605544,0.811557,1.0,0.72893
MSFT,0.607162,0.654383,0.300977,0.682758,0.878047,0.72893,1.0
