# Portfolio EDA

Performs exploratory data analysis on the data scraped from the API.

In [13]:
import os
from datetime import datetime
import plotly.express as px
import pandas as pd

### Load Portfolio Data

Load the data, convert the date column to a datetime column, and set datetime as the index.

In [17]:
stocks_adj_close_filename = os.path.join("output", "stocks_adj_close.csv")
stocks_adj_close_df = pd.read_csv(stocks_adj_close_filename)
stocks_adj_close_df["datetime"] = pd.to_datetime(stocks_adj_close_df["date"], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
stocks_adj_close_df.drop("date", axis=1, inplace=True)
stocks_adj_close_df = stocks_adj_close_df.set_index("datetime").sort_index()
stocks_adj_close_df

Unnamed: 0_level_0,AAPL,AMZN,GOOG,MSFT,NVDA,TSLA
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-04-01,114.025919,370.255,27.0537,36.618003,0.5063,187.59
2015-04-02,115.007873,372.250,26.7032,36.231320,0.5075,191.00
2015-04-06,116.870832,377.040,26.7647,37.359895,0.5221,203.10
2015-04-07,115.641095,374.410,26.7775,37.346406,0.5268,203.25
2015-04-08,115.264833,381.200,27.0063,37.247487,0.5306,207.67
...,...,...,...,...,...,...
2025-03-06,235.330000,200.700,174.2100,396.890000,110.5700,263.45
2025-03-07,239.070000,199.250,175.5500,393.310000,112.6900,262.67
2025-03-10,227.480000,194.540,167.8100,380.160000,106.9800,222.15
2025-03-11,220.840000,196.590,165.9800,380.450000,108.7500,230.58


Quick check, did the index get set properly?

In [10]:
stocks_adj_close_df.loc["2015-04-01T00:00:00+0000", :]

AAPL    114.025919
AMZN    370.255000
GOOG     27.053700
MSFT     36.618003
NVDA      0.506300
TSLA    187.590000
Name: 2015-04-01 00:00:00+00:00, dtype: float64

In [18]:
stocks_long_filename = os.path.join("output", "stocks_long.csv")
stocks_long_df = pd.read_csv(stocks_long_filename)
stocks_long_df["datetime"] = pd.to_datetime(stocks_long_df["date"], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
stocks_long_df.drop("date", axis=1, inplace=True)
multi_index = pd.MultiIndex.from_arrays([stocks_long_df['datetime'], stocks_long_df['symbol']], names=['datetime', 'symbol'])
stocks_long_df.drop(['datetime', 'symbol'], axis=1, inplace=True)
stocks_long_df.index = multi_index
stocks_long_df.sort_index(inplace=True)
stocks_long_df

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_open,adj_high,adj_low,adj_close
datetime,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-04-01,AAPL,114.549016,114.824330,112.970549,114.025919
2015-04-01,AMZN,372.100000,373.160000,368.340000,370.255000
2015-04-01,GOOG,27.430000,27.557000,26.975000,27.053700
2015-04-01,MSFT,36.510091,36.653973,36.249305,36.618003
2015-04-01,NVDA,0.506353,0.508040,0.497676,0.506300
...,...,...,...,...,...
2025-03-12,AMZN,200.720000,201.520000,195.290000,198.890000
2025-03-12,GOOG,168.470000,169.530000,165.480000,169.000000
2025-03-12,MSFT,382.950000,385.216500,378.950700,383.270000
2025-03-12,NVDA,114.120000,116.760000,112.880000,115.740000


Quick check, does the multindex work?

In [21]:
stocks_long_df.loc[datetime(2015, 4, 1), :]

Unnamed: 0_level_0,adj_open,adj_high,adj_low,adj_close
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,114.549016,114.82433,112.970549,114.025919
AMZN,372.1,373.16,368.34,370.255
GOOG,27.43,27.557,26.975,27.0537
MSFT,36.510091,36.653973,36.249305,36.618003
NVDA,0.506353,0.50804,0.497676,0.5063
TSLA,188.7,192.3,186.05,187.59


In [22]:
stocks_long_df.loc[(datetime(2015, 4, 1), 'AMZN'), :]

adj_open     372.100
adj_high     373.160
adj_low      368.340
adj_close    370.255
Name: (2015-04-01 00:00:00, AMZN), dtype: float64