In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set style for academic-quality plots
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (15, 6)

# Define paths
RAW_PATH = "../data/raw"
PROCESSED_PATH = "../data/processed"

# Check available tickers
files = [f.replace('.csv', '') for f in os.listdir(RAW_PATH) if f.endswith('.csv')]
print(f"Available Tickers: {files}")

In [None]:
def load_data(ticker):
    # Load Raw (for dates and real prices)
    raw_df = pd.read_csv(f"{RAW_PATH}/{ticker}.csv")
    raw_df['Date'] = pd.to_datetime(raw_df['Date'])
    raw_df = raw_df.set_index('Date')
    
    # Load Processed (for normalized features)
    proc_df = pd.read_csv(f"{PROCESSED_PATH}/{ticker}_processed.csv")
    
    return raw_df, proc_df

# Pick a stock to analyze (e.g., the first one in your list)
CURRENT_TICKER = files[0] 
raw_df, proc_df = load_data(CURRENT_TICKER)

print(f"Loaded {CURRENT_TICKER}")
print(f"Date Range: {raw_df.index.min().date()} to {raw_df.index.max().date()}")
print(f"Total Trading Days: {len(raw_df)}")

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True, gridspec_kw={'height_ratios': [3, 1]})

# Plot 1: Closing Price
ax1.plot(raw_df.index, raw_df['Close'], label='Close Price', color='navy', linewidth=1.5)
ax1.set_title(f"{CURRENT_TICKER} - Daily Closing Price", fontsize=14)
ax1.set_ylabel("Price ($)")
ax1.legend()

# Plot 2: Volume
ax2.bar(raw_df.index, raw_df['Volume'], color='gray', alpha=0.5, label='Volume')
ax2.set_ylabel("Volume")
ax2.set_xlabel("Date")
ax2.legend()

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(15, 6))

# Plot normalized features
plt.plot(proc_df['Close'], label='Normalized Close', alpha=0.9, linewidth=1)
plt.plot(proc_df['Volume'], label='Normalized Volume', alpha=0.5, linewidth=1)

plt.title(f"Normalized Input Features for {CURRENT_TICKER} (StandardScaler Output)", fontsize=14)
plt.axhline(0, color='black', linestyle='--', linewidth=1) # Zero mean line
plt.ylabel("Standard Deviations (Z-Score)")
plt.xlabel("Time Step (Index)")
plt.legend()
plt.show()

In [None]:
# Calculate Daily Returns
raw_df['Returns'] = raw_df['Close'].pct_change()
# Calculate Rolling Volatility (20-day standard deviation)
raw_df['Volatility'] = raw_df['Returns'].rolling(window=20).std()

fig, ax1 = plt.subplots(figsize=(15, 6))

color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Daily Returns', color=color)
ax1.plot(raw_df.index, raw_df['Returns'], color=color, alpha=0.3, label='Daily Returns')
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for Volatility
ax2 = ax1.twinx()  
color = 'tab:blue'
ax2.set_ylabel('20-Day Rolling Volatility', color=color)  
ax2.plot(raw_df.index, raw_df['Volatility'], color=color, linewidth=2, label='Volatility (Risk)')
ax2.tick_params(axis='y', labelcolor=color)

plt.title(f"Market Regime Analysis: Returns vs. Volatility ({CURRENT_TICKER})")
fig.tight_layout()  
plt.show()