# Task 1: Preprocess and Explore Financial Data
This notebook loads historical financial data, cleans it, performs exploratory data analysis (EDA), and computes key financial risk metrics.

## Import necessary libraries

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.stats import norm
from sklearn.preprocessing import MinMaxScaler

# Set plot style for better visuals
sns.set(style='whitegrid')

## Fetch Historical Financial Data

In [3]:
# Define tickers and date range
tickers = ["TSLA", "BND", "SPY"]
start_date = "2015-01-01"
end_date = "2025-01-31"

# Fetch data using yfinance
data = yf.download(tickers, start=start_date, end=end_date)
print(f"✅ Data Fetched: {data.shape[0]} rows")

# Extract Adjusted Close prices
adj_close = data['Adj Close']

# Save raw data
adj_close.to_csv('data/raw/adj_close.csv')

# Display first few rows
adj_close.head()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  3 of 3 completed


✅ Data Fetched: 2535 rows


KeyError: 'Adj Close'


## Data Cleaning and Understanding

In [None]:
# Check basic statistics
print("Basic Statistics:")
display(adj_close.describe())

# Check data types
print("\nData Types:")
display(adj_close.dtypes)

# Check for missing values
print("\nMissing Values:")
display(adj_close.isnull().sum())

# Handle missing values with forward fill
adj_close = adj_close.ffill()

# Verify no missing values remain
print("\nMissing Values After Cleaning:")
display(adj_close.isnull().sum())

# Normalize the Data

In [None]:
# Normalize the data using MinMaxScaler
scaler = MinMaxScaler()
normalized_adj_close = pd.DataFrame(scaler.fit_transform(adj_close), 
                                    columns=adj_close.columns, 
                                    index=adj_close.index)

# Save normalized data
normalized_adj_close.to_csv('data/processed/normalized_adj_close.csv')

# Display first few rows
normalized_adj_close.head()

## Visualize Closing Prices (EDA)

In [None]:
# Plot adjusted close prices
adj_close.plot(subplots=True, figsize=(10, 8), title="Adjusted Close Prices")
plt.tight_layout()
plt.savefig('figs/adj_close.png')
plt.show()

## Calculate and Plot Daily Returns (EDA)

In [None]:
# Calculate daily percentage change
daily_returns = adj_close.pct_change()

# Plot daily returns
daily_returns.plot(subplots=True, figsize=(10, 8), title="Daily Percentage Change")
plt.tight_layout()
plt.savefig('figs/daily_returns.png')
plt.show()

## Analyze Volatility with Rolling Statistics (EDA)

In [None]:
# Plot rolling mean and standard deviation for each ticker
for ticker in tickers:
    rolling_mean = adj_close[ticker].rolling(window=30).mean()
    rolling_std = adj_close[ticker].rolling(window=30).std()
    
    plt.figure(figsize=(10, 4))
    plt.plot(adj_close[ticker], label=f'{ticker} Adj Close')
    plt.plot(rolling_mean, label='30-Day Rolling Mean')
    plt.plot(rolling_std, label='30-Day Rolling Std')
    plt.title(f'{ticker} Rolling Statistics')
    plt.legend()
    plt.savefig(f'figs/rolling_stats_{ticker}.png')
    plt.show()

## Outlier Detection (EDA)

In [None]:
# Detect outliers in daily returns
for ticker in tickers:
    returns = adj_close[ticker].pct_change()
    mean_return = returns.mean()
    std_return = returns.std()
    outliers = returns[(returns < mean_return - 3*std_return) | 
                       (returns > mean_return + 3*std_return)]
    print(f"Outliers in {ticker} Daily Returns:\n{outliers.dropna()}\n")

## Seasonality and Trends (EDA)

In [None]:
# Decompose time series for seasonality and trends
for ticker in tickers:
    decomposition = seasonal_decompose(adj_close[ticker], model='multiplicative', period=252)
    decomposition.plot()
    plt.suptitle(f'{ticker} Time Series Decomposition', y=1.05)
    plt.savefig(f'figs/decomposition_{ticker}.png')
    plt.show()

## Analyze Volatility

In [None]:
# Plot 30-day rolling volatility
rolling_std = adj_close.rolling(window=30).std()
rolling_std.plot(subplots=True, figsize=(10, 8), title="30-Day Rolling Volatility")
plt.tight_layout()
plt.savefig('figs/rolling_volatility.png')
plt.show()

## Calculate Risk Metrics

In [None]:
# Calculate VaR and Sharpe Ratio
for ticker in tickers:
    returns = adj_close[ticker].pct_change().dropna()
    
    # Value at Risk (VaR) at 95% confidence
    VaR_95 = norm.ppf(0.05, returns.mean(), returns.std())
    
    # Sharpe Ratio (assuming 2% risk-free rate)
    annual_return = (1 + returns.mean())**252 - 1
    annual_volatility = returns.std() * np.sqrt(252)
    risk_free_rate = 0.02
    sharpe_ratio = (annual_return - risk_free_rate) / annual_volatility
    
    print(f"\nRisk Metrics for {ticker}:")
    print(f"  - VaR (95%): {VaR_95:.4f}")
    print(f"  - Sharpe Ratio: {sharpe_ratio:.4f}")