# Portfolio EDA

Performs exploratory data analysis on the data scraped from the API.

In [1]:
import os
from datetime import datetime
import plotly.express as px
import pandas as pd
import numpy as np

### Load Portfolio Data

Load the data, convert the date column to a datetime column, and set datetime as the index.

In [2]:
stocks_adj_close_filename = os.path.join("output", "stocks_adj_close.csv")
stocks_adj_close_df = pd.read_csv(stocks_adj_close_filename)
stocks_adj_close_df["datetime"] = pd.to_datetime(stocks_adj_close_df["date"], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
stocks_adj_close_df.drop("date", axis=1, inplace=True)
stocks_adj_close_df = stocks_adj_close_df.set_index("datetime").sort_index()
stocks_adj_close_df

Unnamed: 0_level_0,AAPL,AMZN,GOOG,MSFT,NVDA,TSLA
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-04-01,114.025919,370.255,27.0537,36.618003,0.5063,187.59
2015-04-02,115.007873,372.250,26.7032,36.231320,0.5075,191.00
2015-04-06,116.870832,377.040,26.7647,37.359895,0.5221,203.10
2015-04-07,115.641095,374.410,26.7775,37.346406,0.5268,203.25
2015-04-08,115.264833,381.200,27.0063,37.247487,0.5306,207.67
...,...,...,...,...,...,...
2025-03-06,235.330000,200.700,174.2100,396.890000,110.5700,263.45
2025-03-07,239.070000,199.250,175.5500,393.310000,112.6900,262.67
2025-03-10,227.480000,194.540,167.8100,380.160000,106.9800,222.15
2025-03-11,220.840000,196.590,165.9800,380.450000,108.7500,230.58


Quick check, did the index get set properly?

In [3]:
stocks_adj_close_df.loc[datetime(2015, 4, 1), :]

AAPL    114.025919
AMZN    370.255000
GOOG     27.053700
MSFT     36.618003
NVDA      0.506300
TSLA    187.590000
Name: 2015-04-01 00:00:00, dtype: float64

In [4]:
stocks_long_filename = os.path.join("output", "stocks_long.csv")
stocks_long_df = pd.read_csv(stocks_long_filename)
stocks_long_df["datetime"] = pd.to_datetime(stocks_long_df["date"], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
stocks_long_df.drop("date", axis=1, inplace=True)
multi_index = pd.MultiIndex.from_arrays([stocks_long_df['datetime'], stocks_long_df['symbol']], names=['datetime', 'symbol'])
stocks_long_df.drop(['datetime', 'symbol'], axis=1, inplace=True)
stocks_long_df.index = multi_index
stocks_long_df.sort_index(inplace=True)
stocks_long_df

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_open,adj_high,adj_low,adj_close,open,high,low,close,split_factor
datetime,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-04-01,AAPL,114.549016,114.824330,112.970549,114.025919,124.82,125.12,123.10,124.250,1.0
2015-04-01,AMZN,372.100000,373.160000,368.340000,370.255000,372.10,373.16,368.34,370.255,1.0
2015-04-01,GOOG,27.430000,27.557000,26.975000,27.053700,548.60,551.14,539.50,542.560,1.0
2015-04-01,MSFT,36.510091,36.653973,36.249305,36.618003,40.60,40.76,40.31,40.720,1.0
2015-04-01,NVDA,0.506353,0.508040,0.497676,0.506300,21.01,21.08,20.65,21.010,1.0
...,...,...,...,...,...,...,...,...,...,...
2025-03-12,AMZN,200.720000,201.520000,195.290000,198.890000,200.72,201.52,195.29,198.890,1.0
2025-03-12,GOOG,168.470000,169.530000,165.480000,169.000000,168.47,169.53,165.48,169.000,1.0
2025-03-12,MSFT,382.950000,385.216500,378.950700,383.270000,382.95,385.22,378.95,383.270,1.0
2025-03-12,NVDA,114.120000,116.760000,112.880000,115.740000,114.12,116.76,112.88,115.740,1.0


Quick check, does the multindex work?

In [5]:
stocks_long_df.loc[datetime(2015, 4, 1), :]

Unnamed: 0_level_0,adj_open,adj_high,adj_low,adj_close,open,high,low,close,split_factor
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,114.549016,114.82433,112.970549,114.025919,124.82,125.12,123.1,124.25,1.0
AMZN,372.1,373.16,368.34,370.255,372.1,373.16,368.34,370.255,1.0
GOOG,27.43,27.557,26.975,27.0537,548.6,551.14,539.5,542.56,1.0
MSFT,36.510091,36.653973,36.249305,36.618003,40.6,40.76,40.31,40.72,1.0
NVDA,0.506353,0.50804,0.497676,0.5063,21.01,21.08,20.65,21.01,1.0
TSLA,188.7,192.3,186.05,187.59,188.7,192.3,186.05,187.59,1.0


In [6]:
stocks_long_df.loc[(datetime(2015, 4, 1), 'AMZN'), :]

adj_open        372.100
adj_high        373.160
adj_low         368.340
adj_close       370.255
open            372.100
high            373.160
low             368.340
close           370.255
split_factor      1.000
Name: (2015-04-01 00:00:00, AMZN), dtype: float64

### Visualize adjusted close prices

In [7]:
symbol = "AAPL"
fig = px.line(stocks_adj_close_df, y=symbol, title=f"Interactive {symbol} Plot")
fig.show()

In [8]:
symbol = "AMZN"
fig = px.line(stocks_adj_close_df, y=symbol, title=f"Interactive {symbol} Plot")
fig.show()

In [9]:
symbol = "GOOG"
fig = px.line(stocks_adj_close_df, y=symbol, title=f"Interactive {symbol} Plot")
fig.show()

In [10]:
symbol = "MSFT"
fig = px.line(stocks_adj_close_df, y=symbol, title=f"Interactive {symbol} Plot")
fig.show()

In [11]:
symbol = "NVDA"
fig = px.line(stocks_adj_close_df, y=symbol, title=f"Interactive {symbol} Plot")
fig.show()

In [12]:
symbol = "TSLA"
fig = px.line(stocks_adj_close_df, y=symbol, title=f"Interactive {symbol} Plot")
fig.show()

In [14]:
split_factor_rows = stocks_long_df[stocks_long_df["split_factor"] != 1.0]
split_factor_rows

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_open,adj_high,adj_low,adj_close,open,high,low,close,split_factor
datetime,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-04-27,GOOG,28.1695,28.2975,27.660005,27.7685,563.39,565.95,553.2001,555.37,1.003
2020-08-31,AAPL,127.360025,130.774128,125.782749,128.817508,127.58,131.0,126.0,129.04,4.0
2020-08-31,TSLA,444.61,500.14,440.11,498.32,444.61,500.14,440.11,498.32,5.0
2021-07-20,NVDA,18.695813,18.803616,18.130846,18.5784,187.3,188.38,181.64,186.12,4.0
2022-06-06,AMZN,,,,124.79,125.245,128.99,123.81,124.79,20.0
2022-07-18,GOOG,113.44,114.8,109.3,109.91,113.44,114.8,109.3,109.91,20.0
2022-08-25,TSLA,,,,296.07,302.36,302.96,291.6,296.07,3.0
2024-06-10,NVDA,120.360045,123.08982,117.000323,121.78,120.37,123.1,117.01,121.79,10.0
