In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import os
pio.templates.default = "plotly_white"


In [None]:
df = pd.read_csv('../data/raw/SP500_USA.csv')
df['Date'] = pd.to_datetime(df['Date'])
df

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
df.describe()

In [None]:
# Display basic information
print(40*"=")
print("OVERVIEW")
print(40*"=", "\n")

print(f"Date range : {df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}")
total_days = (df['Date'].max() - df['Date'].min()).days
total_years = total_days / 365.25
print(f"Total Days :  {total_days} days")
print(f"Total Years : {total_years:.2f} years")

number_co = df['Ticker'].nunique()
print(f"Number of companies : {number_co}\n")

print("COMPANIES : \n")

companies = sorted(df['Ticker'].unique())

for i in range(0, len(companies), 5):
    print(", ".join(companies[i:i+5]))


### Plot 1 : US Market Growth with Major Crisis Period Highlighted

In [None]:
# Plot 1 : US Market Growth with Major Crisis Period Highlighted

df = df.sort_values("Date")
df["Normalized"] = df["Adj Close"] / df["Adj Close"].iloc[0] * 100

plt.style.use("seaborn-v0_8-whitegrid")

fig, ax = plt.subplots(figsize=(12,6))

ax.plot(df["Date"], df["Normalized"], linewidth=2.2)

# Highlight 2008 Financial Crisis
ax.axvspan("2007-10-01", "2009-06-01", alpha=0.4, color='red', label='2008 Global Financial Crisis')

# Highlight COVID Crash
ax.axvspan("2020-02-01", "2020-06-01", alpha=0.4, color='orange', label='COVID Crash')

ax.set_title("US Market Growth with Major Crisis Periods Highlighted",
             fontsize=15, fontweight="bold")

ax.set_xlabel("Year")
ax.set_ylabel("Index Level (Base 100)")

ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: format(int(x), ",")))

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

ax.legend()
plt.tight_layout()

folder = "../output/output_us/figures"
os.makedirs(folder, exist_ok=True)
filepath = os.path.join(folder, "Plot_1_US_Market_Growth_with_Major_Crisis_Period_Highlighted.png")   
plt.savefig(filepath, dpi=600, bbox_inches='tight', facecolor='white')


### Plot 1 : US Market Growth with Major Crisis Period Highlighted
(Interactive Version)

In [None]:
fig = go.Figure()

# Line
fig.add_trace(go.Scatter(
    x=df["Date"],
    y=df["Normalized"],
    mode="lines",
    line=dict(width=2.2),
    name="US Market"
))

# 2008 Crisis
fig.add_vrect(
    x0="2007-10-01", x1="2009-06-01",
    fillcolor="red",
    opacity=0.4,
    layer="below",
    line_width=0,
    annotation_text="2008 Global Financial Crisis",
    annotation_position="top left"
)

# COVID
fig.add_vrect(
    x0="2020-02-01", x1="2020-06-01",
    fillcolor="orange",
    opacity=0.4,
    layer="below",
    line_width=0,
    annotation_text="COVID Crash",
    annotation_position="top left"
)

fig.update_layout(
    title="US Market Growth with Major Crisis Periods Highlighted",
    template="simple_white",
    xaxis_title="Year",
    yaxis_title="Index Level (Base 100)",
    hovermode="x unified"
)

fig.show()

folder = "../output/output_us/reports"
os.makedirs(folder, exist_ok=True)
filepath = os.path.join(folder, "Plot_1_US_Market_Growth_with_Major_Crisis_Period_Highlighted.html")
fig.write_html(filepath, include_plotlyjs="cdn")

### Plot 2 : US Market Drawdown

In [None]:
df = df.sort_values("Date")

df["Return"] = df["Adj Close"].pct_change()
df["Cum_Return"] = (1 + df["Return"]).cumprod()
df["Running_Max"] = df["Cum_Return"].cummax()
df["Drawdown"] = df["Cum_Return"] / df["Running_Max"] - 1

plt.style.use("seaborn-v0_8-whitegrid")

fig, ax = plt.subplots(figsize=(12,6))

# Drawdown line
ax.plot(df["Date"], df["Drawdown"], 
        linewidth=1.8)

### Crisis Highlight
# Highlight 2008 Financial Crisis
ax.axvspan("2007-10-01", "2009-06-01", alpha=0.4, color='red', label='2008 Global Financial Crisis')

# Highlight COVID Crash
ax.axvspan("2020-02-01", "2020-06-01", alpha=0.4, color='orange', label='COVID Crash')

# Formatting
ax.set_title("US Market Drawdown Over Time",
             fontsize=15, fontweight="bold")

ax.set_xlabel("Year", fontsize=12)
ax.set_ylabel("Drawdown", fontsize=12)

# Convert y-axis to percentage
ax.yaxis.set_major_formatter(ticker.PercentFormatter(1.0))

ax.axhline(0, linestyle="--", linewidth=1)

# Clean borders
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=2)

plt.tight_layout()
plt.show()