In [1]:
%pip install --upgrade pip
%pip install matplotlib
import sys
!{sys.executable} -m pip install "nbformat>=4.2.0"
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from pathlib import Path
pd.set_option('display.max_columns', None)

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
path = '../data/cleaned_etf_data.csv'
df = pd.read_csv(path)


In [3]:
# Convert date-related columns to datetime
df['Date'] = pd.to_datetime(df['Date'])
df['Mid_Quarter'] = pd.to_datetime(df['Mid_Quarter'])
df['End_Quarter'] = pd.to_datetime(df['End_Quarter'])  # If in YYYY-MM format

# Convert categorical/time-related columns to appropriate types
df['Year'] = df['Year'].astype(int) 
df['Month_Year'] = pd.to_datetime(df['Month_Year'])  
df['Quarter'] = df['Quarter'].astype(str) 
df['Week'] = df['Week'].astype("uint32")  
# Convert categorical columns
df['Ticker'] = df['Ticker'].astype("category")  # Saves memory for categorical values

In [4]:
# Create two different sub-dataframe, one that contains etf + benchmark,sub_df, and one without benchmark, sub_df2. 
# The sub df will filter out where year is greater that 1998, because most etf weren't avaiable until that year. 
etfs_and_benchmark = ["SPY", "QQQ", "XLE", "XLV", "XLF", "VNQ", "XLI"] #Spy would be the SPY500 benchmark. 
etfs = ["QQQ", "XLE", "XLV", "XLF", "VNQ", "XLI"]

sub_df = df[df["Ticker"].isin(etfs_and_benchmark) & (df["Year"].astype(int) > 1997)].copy()
sub_df2 = df[df["Ticker"].isin(etfs) & (df["Year"].astype(int) > 1997)].copy()

In [5]:
# Create consistent colors for each industry 

industry_colors = {
    "Technology": "#1f77b4",     # Cool Blue
    "Energy": "#ff7f0e",         # Soft Orange
    "Healthcare": "#2ca02c",     # Calming Green
    "Financials": "#d62728",     # Controlled Red
    "Real Estate": "#9467bd",    # Soft Purple
    "Industrials": "#8c564b",    # Earthy Brown-Grey
    "Market Benchmark": "#000000"  # True Black
}

# Create function to add annotation for major events 
def add_macroeconomic_annotations(fig, row=1, col=1):
    events = [
        {
            "x": "2002-09-01",
            "label": "Dot-Com Bubble Crash",
            "xshift": 0
        },
        {
            "x": "2009-03-01",
            "label": "Global Financial Crisis",
            "xshift": 0
        },
        {
            "x": "2020-03-01",
            "label": "COVID-19 Crash",
            "xshift": -40
        },
        {
            "x": "2022-06-01",
            "label": "Inflation & Rate Hikes",
            "xshift": 50
        }
    ]

    for event in events:
        # Add vertical line
        fig.add_vline(
            x=event["x"],
            line_dash="dot",
            line_color="black",
            row=row,
            col=col
        )

        # Add annotation
        fig.add_annotation(
            x=event["x"],
            y=1,
            xref="x",
            yref="paper",
            text=event["label"],
            showarrow=False,
            yanchor="bottom",
            xshift=event["xshift"],
            font=dict(size=12)
        )

    return fig

In [6]:
pivot_table1 = sub_df.pivot_table(
    index = "Month_Year",
    columns = "Industry",
    values = "Adj Close Normalized 0 - 100",
    aggfunc = "mean"
)

pivot_long = pivot_table1.reset_index().melt(
    id_vars="Month_Year",
    var_name="Industry",
    value_name="Normalized Price"
)

fig = px.line(
    pivot_long,
    x="Month_Year",
    y="Normalized Price",
    color="Industry",
    color_discrete_map=industry_colors,
    title="From Crisis to Recovery: Which Sectors Outperformed?",
    labels={
        "Normalized Price": "Normalized Avg Adj Close (0–100)",
        "Month_Year": "Month-Year"
    }
)

fig.update_layout(
    legend=dict(
        orientation="h",        # horizontal layout
        yanchor="bottom",
        y=-0.5,                 # adjust how far below the x-axis it appears
        xanchor="center",
        x=0.5
    )
)

# Add annotation
add_macroeconomic_annotations(fig)

fig.show()

In [7]:
# Create to graphs to analyze the draw down distributions and drawdowns over tiem of each industry. 

pivot_table3 = sub_df2.pivot_table(
    index = "Month_Year",
    columns = "Industry",
    values = "Drawdown",
    aggfunc = "mean"
)

# Reshape the pivot table for box plot
drawdown_long = pivot_table3.reset_index().melt(
    id_vars='Month_Year',
    var_name='Industry',
    value_name='Drawdown'
)

# Create subplot layout with 2 rows (line chart on top, box plot on bottom)
fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=False,
    subplot_titles=("Average Industry Drawdown Over Time", "Drawdown Distribution by Industry"),
    vertical_spacing=0.15
)

# Adjust first subplot title spacing. 
fig.layout.annotations[0].update(y=1.05)  # Increase y to move title up

# Add line traces (one per industry)
for industry in pivot_table3.columns:
    fig.add_trace(
        go.Scatter(
            x=pivot_table3.index,
            y=pivot_table3[industry],
            mode='lines',
            line=dict(color=industry_colors.get(industry, "#999")),
            name=industry
        ),
        row=1, col=1
    )

add_macroeconomic_annotations(fig, row=1, col=1)

for industry in drawdown_long['Industry'].unique():
    fig.add_trace(
        go.Box(
            y=drawdown_long[drawdown_long['Industry'] == industry]['Drawdown'],
            name=industry,
            line=dict(color=industry_colors.get(industry, "#999"))
        ),
        row=2, col=1
    )

# Update layout
fig.update_layout(
    height=800,
    # title_text="Drawdown Analysis by Industry",
    showlegend=True
)

fig.update_layout(
    legend=dict(
        orientation="h",        # horizontal layout
        yanchor="bottom",
        y=-0.3,                 # adjust how far below the x-axis it appears
        xanchor="center",
        x=0.5
    )
)

fig.update_xaxes(title_text="Time", row=1, col=1)
fig.update_yaxes(title_text="Avg Drawdown (%)", row=1, col=1)
fig.update_yaxes(title_text="Drawdown (%)", row=2, col=1)

# Optional: Legend position and styling — you can uncomment & tweak these lines later
fig.update_layout(
    legend=dict(
        orientation="v",        # "v" for vertical (default), "h" for horizontal
        yanchor="top",       # or "top"
        y=.75,                 # adjust up/down
        xanchor="left",
        x=1.05               # center align
    )
)



fig.show()

In [8]:
# 2️⃣ How do rising interest rates impact different industries? Do some sectors thrive while others decline?

pivot3 = sub_df2.pivot_table(
    index="Month_Year",
    columns="Industry",
    values="Adj Close Normalized 0 - 100",  # Normalized industry performance
    aggfunc="mean"
)

# Get Fed Funds Rate separately (since it applies across all industries)
fed_funds_rate = sub_df2.groupby("Month_Year")["Federal Funds Rate"].mean()

fig = go.Figure()

add_macroeconomic_annotations(fig, row=1, col=1)

# 🎨 Add industry performance lines (left y-axis)
for industry in pivot3.columns:
    fig.add_trace(go.Scatter(
        x=pivot3.index,
        y=pivot3[industry],
        mode="lines",
        name=industry,
        yaxis="y1",
        line=dict(color=industry_colors.get(industry, "#999"))
    ))

# 🎨 Add Federal Funds Rate (right y-axis)
fig.add_trace(go.Scatter(
    x=fed_funds_rate.index,
    y=fed_funds_rate,
    mode="lines",
    name="Federal Funds Rate",
    yaxis="y2",
    line=dict(color="black", dash="solid")  # Dashed black line for differentiation
))

# 📊 Format layout with legend and title adjustments
fig.update_layout(
    title="How Do Rising Interest Rates Impact Different Industries?",
    xaxis=dict(title="Year-Month"),
    
    # Left Y-axis: Normalized Industry Performance
    yaxis=dict(
        title="Normalized Industry Performance", 
        side="left",
        title_standoff=10  # Moves y-axis title slightly for better spacing
    ),

    # Right Y-axis: Federal Funds Rate
    yaxis2=dict(
        title="Federal Funds Rate (%)", 
        overlaying="y", 
        side="right",
        showgrid=False,
        title_standoff=25  # Moves title further away from the legend
    ),
    
    # Move legend below the chart to reduce overlap
    legend=dict(
        yanchor="bottom", 
        y=-0.5,  # Moves legend below the x-axis
        xanchor="center", 
        x=.5,
        orientation="h"  # Horizontal legend for better spacing
    ),

    # template="plotly_white"
)
fig.show()


In [9]:
# 4 How does inflation affect different sectors? Are certain industries inflation-proof?

quarterly_cpi = sub_df2.groupby("End_Quarter").agg({
    "Inflation (CPI)": "mean",
    "Adj Close Normalized 0 - 100": "mean"
}).reset_index()

# Calculate quarterly inflation rate (%) based on CPI
quarterly_cpi["Inflation Rate (%)"] = quarterly_cpi["Inflation (CPI)"].pct_change() * 100



pivot_table4 = sub_df2.pivot_table(
    index = "Month_Year",
    columns = "Industry",
    values = "Adj Close Normalized 0 - 100",
    aggfunc = "mean"
)


fig = go.Figure()

add_macroeconomic_annotations(fig, row=1, col=1)

# 🎨 Add industry performance lines (left y-axis)
for industry in pivot_table4.columns:
    fig.add_trace(go.Scatter(
        x=pivot_table4.index,
        y=pivot_table4[industry],
        mode="lines",
        name=industry,
        yaxis="y1",
        line=dict(color=industry_colors.get(industry, "#999"))
    ))

fig.add_trace(go.Scatter(
    x=quarterly_cpi['End_Quarter'],
    y=quarterly_cpi["Inflation Rate (%)"],
    mode="lines",
    name="Inflation",
    yaxis="y2",
    line=dict(color="black", dash="solid")
))
# 📊 Format layout with legend and title adjustments
fig.update_layout(
    title="How Do Rising Inflation Rates Impact Different Industries?",
    xaxis=dict(title="Year-Month"),
    
    # Left Y-axis: Normalized Industry Performance
    yaxis=dict(
        title="Normalized Industry Performance", 
        side="left",
        title_standoff=10  # Moves y-axis title slightly for better spacing
    ),

    # Right Y-axis: Federal Funds Rate
    yaxis2=dict(
        title="Inflation (CPI)", 
        overlaying="y", 
        side="right",
        showgrid=False,
        title_standoff=25  # Moves title further away from the legend
    ),
    
    # Move legend below the chart to reduce overlap
    legend=dict(
        yanchor="bottom", 
        y=-0.5,  # Moves legend below the x-axis
        xanchor="center", 
        x=.5,
        orientation="h"  # Horizontal legend for better spacing
    ),

    # template="plotly_white"
)

fig.show()