# TSA Chapter 0: Real Financial Data Analysis

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch0/TSA_ch0_real_data/TSA_ch0_real_data.ipynb)

This notebook demonstrates real financial data analysis:
- S&P 500, Bitcoin, Gold, EUR/RON exchange rates
- Airline passengers dataset
- Data loading from Yahoo Finance and FRED
- Time series patterns in real data

In [None]:
!pip install yfinance pandas_datareader statsmodels matplotlib pandas numpy scipy -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Style configuration
COLORS = {
    'blue': '#1A3A6E',
    'red': '#DC3545',
    'green': '#2E7D32',
    'orange': '#E67E22',
    'gray': '#666666',
    'purple': '#8E44AD',
}

plt.rcParams.update({
    'axes.facecolor': 'none',
    'figure.facecolor': 'none',
    'savefig.transparent': True,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 10,
    'axes.titlesize': 11,
    'axes.labelsize': 10,
    'legend.fontsize': 8,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'lines.linewidth': 1.5,
    'axes.prop_cycle': plt.cycler('color', list(COLORS.values())),
    'axes.edgecolor': '#333333',
    'axes.linewidth': 0.8,
    'axes.grid': False,
})

np.random.seed(42)

CHARTS_DIR = os.path.join(os.path.dirname(os.path.abspath('.')), '..', '..', 'charts')

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    # Also save to main charts directory for the lecture
    try:
        charts_path = os.path.join(CHARTS_DIR, name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass  # Skip if running on Colab without the charts dir
    print(f'Saved: {name}.pdf + .png')

def add_legend_below(ax, ncol=3):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=ncol, frameon=False)

In [None]:
# Load real data with fallback to synthetic
# S&P 500
try:
    import yfinance as yf
    sp500 = yf.download('^GSPC', start='2020-01-01', end='2024-12-31', progress=False)['Close'].squeeze()
    sp500_loaded = True
    print('S&P 500 data loaded from Yahoo Finance')
except Exception as e:
    print(f'Yahoo Finance failed: {e}. Using synthetic data.')
    sp500_loaded = False
    t = np.arange(1250)
    sp500 = pd.Series(4700 * np.cumprod(1 + np.random.randn(1250) * 0.01 + 0.0004),
                      index=pd.date_range('2020-01-01', periods=1250, freq='B'))

# Bitcoin
try:
    btc = yf.download('BTC-USD', start='2020-01-01', end='2024-12-31', progress=False)['Close'].squeeze()
    btc_loaded = True
    print('Bitcoin data loaded from Yahoo Finance')
except Exception as e:
    print(f'BTC failed: {e}. Using synthetic data.')
    btc_loaded = False
    btc = pd.Series(30000 * np.cumprod(1 + np.random.randn(1250) * 0.03 + 0.001),
                    index=pd.date_range('2020-01-01', periods=1250, freq='B'))

# EUR/RON
try:
    eurron = yf.download('EURRON=X', start='2020-01-01', end='2024-12-31', progress=False)['Close'].squeeze()
    eurron_loaded = True
    print('EUR/RON data loaded from Yahoo Finance')
except Exception as e:
    print(f'EUR/RON failed: {e}. Using synthetic data.')
    eurron_loaded = False
    eurron = pd.Series(4.87 + np.cumsum(np.random.randn(1250) * 0.002),
                       index=pd.date_range('2020-01-01', periods=1250, freq='B'))

# Airline passengers (synthetic - classic dataset)
t_air = np.arange(144)
trend_air = 100 + 1.8 * t_air + 0.005 * t_air**2
seasonal_air = 1 + 0.15 * np.sin(2*np.pi*t_air/12) + 0.08 * np.cos(4*np.pi*t_air/12)
airline = pd.Series(trend_air * seasonal_air * (1 + np.random.randn(144) * 0.02),
                    index=pd.date_range('1949-01', periods=144, freq='ME'))
print('Airline passengers data generated (classic dataset)')

In [None]:
# Chart: ch1_motivation_everywhere
# Real data: S&P 500, BTC, EUR/RON, airline passengers
fig, axes = plt.subplots(2, 2, figsize=(10, 5.5))

# S&P 500
axes[0, 0].plot(sp500.index, sp500.values, color=COLORS['blue'], linewidth=1.2)
axes[0, 0].set_title('S&P 500 (Finance)', fontweight='bold')
axes[0, 0].set_ylabel('Price')

# Bitcoin
axes[0, 1].plot(btc.index, btc.values, color=COLORS['orange'], linewidth=1.2)
axes[0, 1].set_title('Bitcoin (Crypto)', fontweight='bold')
axes[0, 1].set_ylabel('Price (USD)')

# EUR/RON
axes[1, 0].plot(eurron.index, eurron.values, color=COLORS['green'], linewidth=1.2)
axes[1, 0].set_title('EUR/RON (Exchange Rate)', fontweight='bold')
axes[1, 0].set_xlabel('Date')
axes[1, 0].set_ylabel('Rate')

# Airline passengers
axes[1, 1].plot(airline.index, airline.values, color=COLORS['red'], linewidth=1.2)
axes[1, 1].set_title('Airline Passengers (Classic)', fontweight='bold')
axes[1, 1].set_xlabel('Date')
axes[1, 1].set_ylabel('Passengers')

for ax in axes.flat:
    ax.tick_params(axis='x', rotation=30)

fig.suptitle('Time series are everywhere', fontweight='bold', fontsize=13, y=1.02)
fig.tight_layout()
save_chart(fig, 'ch1_motivation_everywhere')
plt.show()

In [None]:
# Chart: timeseries_definition
# S&P 500 price and returns
fig, axes = plt.subplots(2, 1, figsize=(8, 5), gridspec_kw={'height_ratios': [2, 1]})

axes[0].plot(sp500.index, sp500.values, color=COLORS['blue'], linewidth=1.2)
axes[0].set_title('S&P 500 Daily Price', fontweight='bold')
axes[0].set_ylabel('Price')

# Returns
returns = sp500.pct_change().dropna() * 100
colors_ret = np.where(returns.values > 0, COLORS['green'], COLORS['red'])
axes[1].bar(returns.index, returns.values, color=colors_ret, width=1, alpha=0.7)
axes[1].axhline(y=0, color='black', linewidth=0.5)
axes[1].set_title('Daily Returns (%)', fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('%')

for ax in axes:
    ax.tick_params(axis='x', rotation=30)

fig.tight_layout()
save_chart(fig, 'timeseries_definition')
plt.show()

In [None]:
# Chart: multiple_assets
# BTC, Gold (simulated), S&P 500 normalized comparison
fig, axes = plt.subplots(2, 1, figsize=(8, 5.5))

# Normalize to 100
sp_norm = (sp500 / sp500.iloc[0]) * 100
btc_norm = (btc / btc.iloc[0]) * 100

# Gold (try loading, fallback to synthetic)
try:
    import yfinance as yf
    gold = yf.download('GC=F', start='2020-01-01', end='2024-12-31', progress=False)['Close'].squeeze()
    gold_norm = (gold / gold.iloc[0]) * 100
    print('Gold data loaded from Yahoo Finance')
except Exception as e:
    print(f'Gold failed: {e}. Using synthetic data.')
    gold_vals = 100 * np.cumprod(1 + np.random.randn(len(sp500)) * 0.008 + 0.0003)
    gold_norm = pd.Series(gold_vals, index=sp500.index)

axes[0].plot(sp_norm.index, sp_norm.values, color=COLORS['blue'], linewidth=1.2, label='S&P 500')
axes[0].plot(btc_norm.index, btc_norm.values, color=COLORS['orange'], linewidth=1.2, label='Bitcoin')
axes[0].plot(gold_norm.index, gold_norm.values, color=COLORS['green'], linewidth=1.2, label='Gold')
axes[0].set_title('Financial assets normalized (base 100)', fontweight='bold')
axes[0].set_ylabel('Indexed Value')
add_legend_below(axes[0], ncol=3)

# Log returns
sp_ret = np.log(sp500 / sp500.shift(1)).dropna() * 100
btc_ret = np.log(btc / btc.shift(1)).dropna() * 100

axes[1].plot(sp_ret.index, sp_ret.values, color=COLORS['blue'], linewidth=0.6, alpha=0.7, label='S&P 500')
axes[1].plot(btc_ret.index, btc_ret.values, color=COLORS['orange'], linewidth=0.6, alpha=0.7, label='Bitcoin')
axes[1].axhline(y=0, color='black', linewidth=0.5)
axes[1].set_title('Daily log returns (%)', fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Return (%)')
add_legend_below(axes[1], ncol=2)

for ax in axes:
    ax.tick_params(axis='x', rotation=30)

fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'multiple_assets')
plt.show()

In [None]:
# Chart: ch1_motivation_forecast
# Airline passengers with forecast
fig, ax = plt.subplots(figsize=(10, 4.5))

# Use airline data
train = airline[:120]
test = airline[120:]

# Simple forecast: trend + seasonal extrapolation
t_f = np.arange(120, 144)
trend_f = 100 + 1.8 * t_f + 0.005 * t_f**2
seasonal_f = 1 + 0.15 * np.sin(2*np.pi*t_f/12) + 0.08 * np.cos(4*np.pi*t_f/12)
forecast = trend_f * seasonal_f

ci_width = np.linspace(10, 50, len(t_f))

ax.plot(train.index, train.values, color=COLORS['blue'], label='Observed data', linewidth=1.5)
ax.plot(test.index, forecast, color=COLORS['red'], linewidth=2, linestyle='--', label='Forecast')
ax.fill_between(test.index, forecast - ci_width, forecast + ci_width,
                color=COLORS['red'], alpha=0.15, label='95% confidence interval')
ax.axvline(x=train.index[-1], color=COLORS['gray'], linestyle=':', linewidth=1, alpha=0.7)
ax.text(train.index[-1], ax.get_ylim()[1] * 0.95 if ax.get_ylim()[1] > 0 else 500,
        ' Forecast\n horizon', fontsize=8, color=COLORS['gray'])

ax.set_title('Time series forecasting', fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Passengers')
add_legend_below(ax, ncol=3)

fig.tight_layout()
save_chart(fig, 'ch1_motivation_forecast')
plt.show()

In [None]:
# Chart: ch1_motivation_components
# Airline passengers decomposition
fig, axes = plt.subplots(4, 1, figsize=(10, 6), sharex=True)

t = np.arange(144)
trend_comp = 100 + 1.8 * t + 0.005 * t**2
seasonal_comp = 0.15 * np.sin(2*np.pi*t/12) + 0.08 * np.cos(4*np.pi*t/12)
noise_comp = airline.values / (trend_comp * (1 + seasonal_comp)) - 1

axes[0].plot(airline.index, airline.values, color=COLORS['blue'], linewidth=1.2)
axes[0].set_title('Original: $X_t = T_t \\times S_t \\times \\varepsilon_t$', fontweight='bold', fontsize=10)
axes[0].set_ylabel('$X_t$')

axes[1].plot(airline.index, trend_comp, color=COLORS['red'], linewidth=1.5)
axes[1].set_title('Trend ($T_t$)', fontweight='bold', fontsize=10)
axes[1].set_ylabel('$T_t$')

axes[2].plot(airline.index, 1 + seasonal_comp, color=COLORS['green'], linewidth=1.2)
axes[2].axhline(y=1, color='black', linewidth=0.5, linestyle=':')
axes[2].set_title('Seasonal factor ($S_t$)', fontweight='bold', fontsize=10)
axes[2].set_ylabel('$S_t$')

axes[3].plot(airline.index, 1 + noise_comp, color=COLORS['gray'], linewidth=0.8)
axes[3].axhline(y=1, color='black', linewidth=0.5, linestyle=':')
axes[3].set_title('Residuals ($\\varepsilon_t$)', fontweight='bold', fontsize=10)
axes[3].set_ylabel('$\\varepsilon_t$')
axes[3].set_xlabel('Date')

fig.tight_layout()
save_chart(fig, 'ch1_motivation_components')
plt.show()