# TSA Chapter 0: Time Series Definition and Basic Concepts

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch0/TSA_ch0_definition/TSA_ch0_definition.ipynb)

This notebook demonstrates the fundamental characteristics of time series data:
- Time series definition and temporal ordering
- Autocorrelation (ACF)
- Common patterns: trend, seasonal, cyclic, random
- Data type comparison: cross-sectional vs time series vs panel
- Moving averages for trend extraction

In [None]:
!pip install matplotlib numpy scipy statsmodels pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Style configuration
COLORS = {
    'blue': '#1A3A6E',
    'red': '#DC3545',
    'green': '#2E7D32',
    'orange': '#E67E22',
    'gray': '#666666',
    'purple': '#8E44AD',
}

plt.rcParams.update({
    'axes.facecolor': 'none',
    'figure.facecolor': 'none',
    'savefig.transparent': True,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': False,
    'font.size': 10,
    'axes.titlesize': 11,
    'axes.labelsize': 10,
    'legend.fontsize': 8,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'lines.linewidth': 1.5,
    'axes.prop_cycle': plt.cycler('color', list(COLORS.values())),
    'axes.edgecolor': '#333333',
    'axes.linewidth': 0.8,
})

np.random.seed(42)

CHARTS_DIR = os.path.join(os.path.dirname(os.path.abspath('.')), '..', '..', 'charts')

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    # Also save to main charts directory for the lecture
    try:
        charts_path = os.path.join(CHARTS_DIR, name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass  # Skip if running on Colab without the charts dir
    print(f'Saved: {name}.pdf + .png')

def add_legend_below(ax, ncol=3):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=ncol, frameon=False)

In [None]:
# Chart: ch1_def_timeseries
# Time series definition with real US unemployment data and ACF
fig, axes = plt.subplots(2, 1, figsize=(8, 5))

try:
    unemp_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=UNRATE', parse_dates=['DATE'])
    unemp_df['UNRATE'] = pd.to_numeric(unemp_df['UNRATE'], errors='coerce')
    unemp_df = unemp_df.dropna().tail(50)
    t = np.arange(len(unemp_df))
    y = unemp_df['UNRATE'].values
    x_labels = unemp_df['DATE'].values
    use_dates = True
    print(f'US Unemployment loaded: {len(y)} observations')
except Exception:
    np.random.seed(42)
    t = np.arange(1, 51)
    y = np.cumsum(np.random.randn(50) * 0.5) + 10
    x_labels = t
    use_dates = False

if use_dates:
    axes[0].plot(x_labels, y, color=COLORS['blue'], linewidth=1.5, marker='o', markersize=3)
    for i_ann in [10, 25, 40]:
        if i_ann < len(y):
            axes[0].annotate(f'$X_{{{i_ann}}}$', xy=(x_labels[i_ann], y[i_ann]),
                            xytext=(x_labels[min(i_ann+3, len(y)-1)], y[i_ann]+0.8),
                            arrowprops=dict(arrowstyle='->', color=COLORS['red'], lw=1),
                            fontsize=9, color=COLORS['red'])
    axes[0].tick_params(axis='x', rotation=30)
else:
    axes[0].plot(t, y, color=COLORS['blue'], linewidth=1.5, marker='o', markersize=3)
    for i in [10, 20, 30]:
        axes[0].annotate(f'$X_{{{i}}}$', xy=(t[i-1], y[i-1]),
                        xytext=(t[i-1]+2, y[i-1]+0.8),
                        arrowprops=dict(arrowstyle='->', color=COLORS['red'], lw=1),
                        fontsize=9, color=COLORS['red'])

axes[0].set_title('Time Series: $\\{X_t\\}_{t=1}^{T}$ (US Unemployment Rate)', fontweight='bold')
axes[0].set_xlabel('Time ($t$)')
axes[0].set_ylabel('$X_t$ (%)')

# ACF
n = len(y)
y_centered = y - np.mean(y)
acf_vals = []
for k in range(16):
    if k == 0:
        acf_vals.append(1.0)
    else:
        acf_vals.append(np.sum(y_centered[:n-k] * y_centered[k:]) / np.sum(y_centered**2))

lags = np.arange(16)
axes[1].bar(lags, acf_vals, color=COLORS['blue'], alpha=0.7, width=0.6)
axes[1].axhline(y=0, color='black', linewidth=0.5)
ci = 1.96 / np.sqrt(n)
axes[1].axhline(y=ci, color=COLORS['red'], linestyle='--', linewidth=0.8, alpha=0.7)
axes[1].axhline(y=-ci, color=COLORS['red'], linestyle='--', linewidth=0.8, alpha=0.7)
axes[1].set_title('Autocorrelation Function (ACF)', fontweight='bold')
axes[1].set_xlabel('Lag ($k$)')
axes[1].set_ylabel('$\\rho_k$')

fig.tight_layout()
save_chart(fig, 'ch1_def_timeseries')
plt.show()

In [None]:
# Chart: ch1_ts_patterns
# Common patterns in time series (real data where available)
np.random.seed(42)
fig, axes = plt.subplots(2, 2, figsize=(10, 5.5))

# Trend: Real US GDP from FRED
try:
    gdp_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=GDPC1', parse_dates=['DATE'])
    gdp_df['GDPC1'] = pd.to_numeric(gdp_df['GDPC1'], errors='coerce')
    gdp_vals = gdp_df.dropna().tail(100)
    axes[0, 0].plot(gdp_vals['DATE'], gdp_vals['GDPC1'], color=COLORS['blue'], linewidth=1.2)
    axes[0, 0].set_title('Trend (US Real GDP)', fontweight='bold')
    axes[0, 0].tick_params(axis='x', rotation=30)
except Exception:
    t = np.arange(100)
    axes[0, 0].plot(t, 20 + 0.5 * t + np.random.randn(100) * 2, color=COLORS['blue'], linewidth=1.2)
    axes[0, 0].set_title('Trend', fontweight='bold')
axes[0, 0].set_ylabel('$X_t$')

# Seasonal: Real FRED retail sales
try:
    ret_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=RSXFS', parse_dates=['DATE'])
    ret_df['RSXFS'] = pd.to_numeric(ret_df['RSXFS'], errors='coerce')
    ret_vals = ret_df.dropna().tail(100)
    axes[0, 1].plot(ret_vals['DATE'], ret_vals['RSXFS'], color=COLORS['green'], linewidth=1.2)
    axes[0, 1].set_title('Seasonal (US Retail Sales)', fontweight='bold')
    axes[0, 1].tick_params(axis='x', rotation=30)
except Exception:
    t = np.arange(100)
    axes[0, 1].plot(t, 50 + 15 * np.sin(2 * np.pi * t / 12) + np.random.randn(100) * 1.5,
                   color=COLORS['green'], linewidth=1.2)
    axes[0, 1].set_title('Seasonal', fontweight='bold')
axes[0, 1].set_ylabel('$X_t$')

# Cyclic (synthetic - conceptual: business cycle)
t = np.arange(100)
axes[1, 0].plot(t, 50 + 10 * np.sin(2 * np.pi * t / 40) + np.random.randn(100) * 2,
               color=COLORS['orange'], linewidth=1.2)
axes[1, 0].set_title('Cyclic (simulated business cycle)', fontweight='bold')
axes[1, 0].set_xlabel('Time')
axes[1, 0].set_ylabel('$X_t$')

# Random (white noise - conceptual)
axes[1, 1].plot(t, np.random.randn(100) * 5 + 50, color=COLORS['gray'], linewidth=1.0)
axes[1, 1].set_title('Random (White Noise)', fontweight='bold')
axes[1, 1].set_xlabel('Time')
axes[1, 1].set_ylabel('$X_t$')

fig.suptitle('Common Patterns in Time Series', fontweight='bold', fontsize=12, y=1.02)
fig.tight_layout()
save_chart(fig, 'ch1_ts_patterns')
plt.show()

In [None]:
# Chart: data_types_comparison
# Cross-sectional vs Time Series vs Panel Data
np.random.seed(42)
fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))

# Cross-sectional
x_cs = np.random.randn(50) * 10 + 50
y_cs = 0.5 * x_cs + np.random.randn(50) * 5 + 10
axes[0].scatter(x_cs, y_cs, color=COLORS['blue'], alpha=0.6, s=30, edgecolors='white', linewidth=0.5)
axes[0].set_title('Cross-sectional Data', fontweight='bold')
axes[0].set_xlabel('Income')
axes[0].set_ylabel('Consumption')

# Time series
t = np.arange(60)
y_ts = 100 + 0.5 * t + 8 * np.sin(2 * np.pi * t / 12) + np.random.randn(60) * 2
axes[1].plot(t, y_ts, color=COLORS['green'], linewidth=1.2)
axes[1].set_title('Time Series', fontweight='bold')
axes[1].set_xlabel('Time ($t$)')
axes[1].set_ylabel('$X_t$')

# Panel data
for i, (c, name) in enumerate(zip([COLORS['blue'], COLORS['red'], COLORS['orange']],
                                   ['Country A', 'Country B', 'Country C'])):
    y_panel = 100 + (15 + i * 5) * np.log1p(t) + np.random.randn(60) * 3
    axes[2].plot(t, y_panel, color=c, linewidth=1.2, label=name)
axes[2].set_title('Panel Data', fontweight='bold')
axes[2].set_xlabel('Time')
axes[2].set_ylabel('GDP per capita')
add_legend_below(axes[2], ncol=3)

fig.tight_layout()
save_chart(fig, 'data_types_comparison')
plt.show()

In [None]:
# Chart: ch1_def_moving_average
# Moving average with 3 window sizes on real retail sales data
np.random.seed(42)
fig, ax = plt.subplots(figsize=(8, 4.5))

try:
    ret_ma_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=RSXFS', parse_dates=['DATE'])
    ret_ma_df['RSXFS'] = pd.to_numeric(ret_ma_df['RSXFS'], errors='coerce')
    ret_ma_vals = ret_ma_df.dropna().tail(120)
    t_ma = np.arange(len(ret_ma_vals))
    y_ma = ret_ma_vals['RSXFS'].values.astype(float)
    data_label = 'US Retail Sales'
except Exception:
    t_ma = np.arange(120)
    trend_ma = 50 + 0.3 * t_ma
    y_ma = trend_ma + 10 * np.sin(2 * np.pi * t_ma / 12) + np.random.randn(120) * 3
    data_label = 'Original Data'

ax.plot(t_ma, y_ma, color=COLORS['gray'], linewidth=0.8, alpha=0.5, label=data_label)

for w, c, name in [(5, COLORS['green'], 'MA(5)'),
                    (12, COLORS['blue'], 'MA(12)'),
                    (24, COLORS['red'], 'MA(24)')]:
    kernel = np.ones(w) / w
    ma = np.convolve(y_ma, kernel, mode='valid')
    offset = w // 2
    ax.plot(t_ma[offset:offset+len(ma)], ma, color=c, linewidth=1.5, label=name)

ax.set_title('Centered Moving Average: Smoothing Effect', fontweight='bold')
ax.set_xlabel('Time ($t$)')
ax.set_ylabel('$\\hat{T}_t$')
add_legend_below(ax, ncol=4)

fig.tight_layout()
save_chart(fig, 'ch1_def_moving_average')
plt.show()

In [None]:
# Chart: ch1_moving_average
# Three panels with different window sizes on real retail sales data
np.random.seed(42)
fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))

try:
    ret_mp_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=RSXFS', parse_dates=['DATE'])
    ret_mp_df['RSXFS'] = pd.to_numeric(ret_mp_df['RSXFS'], errors='coerce')
    ret_mp_vals = ret_mp_df.dropna().tail(100)
    t_mp = np.arange(len(ret_mp_vals))
    y_mp = ret_mp_vals['RSXFS'].values.astype(float)
except Exception:
    t_mp = np.arange(100)
    y_mp = 50 + 0.3 * t_mp + 8 * np.sin(2 * np.pi * t_mp / 12) + np.random.randn(100) * 3

for ax, w, title in zip(axes, [3, 7, 15],
                         ['Small Window (MA-3)', 'Medium Window (MA-7)', 'Large Window (MA-15)']):
    ax.plot(t_mp, y_mp, color=COLORS['gray'], linewidth=0.6, alpha=0.5, label='Original')
    kernel = np.ones(w) / w
    ma = np.convolve(y_mp, kernel, mode='valid')
    offset = w // 2
    ax.plot(t_mp[offset:offset+len(ma)], ma, color=COLORS['blue'], linewidth=1.5, label=f'MA({w})')
    ax.set_title(title, fontweight='bold', fontsize=9)
    ax.set_xlabel('Time')
    add_legend_below(ax, ncol=2)

axes[0].set_ylabel('$X_t$')
fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'ch1_moving_average')
plt.show()