## Time Series Analysis

* **What is Time Series Analysis?**
    * It's a way to understand data that is collected over time, like stock prices, temperature readings, or sales figures.
    * The order of the data points matters a lot! What happened yesterday often influences what happens today.

* **Why do we do it?**
    * We want to find patterns, trends, or repeating cycles in the data.
    * The main goal is often to predict future values (forecasting).

* **Think of it like this:**
    * Data might generally go up or down over time (a *trend*).
    * It might have regular ups and downs depending on the time of year or day (like holiday sales - that's *seasonality*).
    * There's also random unpredictable stuff (noise).
    * Time series analysis helps us break down and understand these different parts to make better predictions.

## Specialized Analytical Techniques
Time series analysis requires tailored models that capture temporal dependencies and patterns, which are often disguised in static datasets.
Example Techniques:
- Autoregressive Integrated Moving Average (ARIMA): Captures linear dependencies.
- Exponential Smoothing Models: Address trends and seasonal components.
- Decomposition Methods: Split time series into trend, seasonal, and noise components for clearer insights.

#Why Time Series Isn't Just Another Dataset

## Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.seasonal import seasonal_decompose
import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (14, 6)

## Step 1: Challenges in Analyzing Time Series Data

In [2]:
def create_autocorrelated_data(ar_coef=0.8, n=100, seed=42):
    """Generate autocorrelated data with specified parameters"""
    np.random.seed(seed)
    ar_data = np.zeros(n)
    ar_data[0] = np.random.normal()
    for i in range(1, n):
        ar_data[i] = ar_coef * ar_data[i-1] + np.random.normal()
    return ar_data

def plot_autocorrelation(ar_coef, n_samples, max_lags):
    """Plot time series data and its autocorrelation"""
    ar_data = create_autocorrelated_data(ar_coef, n_samples)
    dates = pd.date_range(start='2020-01-01', periods=n_samples, freq='D')
    ts_auto = pd.Series(ar_data, index=dates, name='Autocorrelated Series')

    fig, ax = plt.subplots(2, 1, figsize=(14, 10))

    # Plot time series
    ax[0].plot(ts_auto)
    ax[0].set_title(f'Time Series with Autocorrelation Coefficient = {ar_coef}')
    ax[0].set_xlabel('Date')
    ax[0].set_ylabel('Value')

    # Plot ACF
    plot_acf(ts_auto, lags=max_lags, ax=ax[1])
    ax[1].set_title('Autocorrelation Function (ACF)')

    plt.tight_layout()
    plt.show()

    print(f"AR coefficient: {ar_coef} - Higher values create stronger dependency between observations")
    print("Imagine traditional models like trying to understand the flow of a river from just snapshots. "
          "Time series analysis traces the current, capturing the river's journey and rhythm.")

def interactive_autocorrelation():
    """Interactive widget for exploring autocorrelation"""
    ar_slider = widgets.FloatSlider(value=0.8, min=0, max=0.99, step=0.01, description='AR Coefficient:')
    samples_slider = widgets.IntSlider(value=100, min=50, max=300, step=10, description='Sample Size:')
    lags_slider = widgets.IntSlider(value=20, min=5, max=50, step=5, description='Max ACF Lags:')

    ui = widgets.VBox([ar_slider, samples_slider, lags_slider])
    out = widgets.Output()

    def update_plot(*args):
        with out:
            clear_output(wait=True)
            plot_autocorrelation(ar_slider.value, samples_slider.value, lags_slider.value)

    ar_slider.observe(update_plot, 'value')
    samples_slider.observe(update_plot, 'value')
    lags_slider.observe(update_plot, 'value')

    display(ui, out)
    update_plot()

def create_trend_seasonal_data(trend_slope=0.03, seasonal_amp=5.0, noise_level=1.0, n_days=730):
    """Generate time series with trend, seasonality and noise"""
    dates = pd.date_range(start='2020-01-01', periods=n_days, freq='D')
    trend = np.linspace(0, trend_slope * n_days, n_days)  # Linear trend
    seasonality = seasonal_amp * np.sin(2 * np.pi * np.arange(n_days) / 365)  # Yearly seasonality
    noise = np.random.normal(0, noise_level, n_days)  # Random noise

    # Create component series
    components = {
        'trend': trend,
        'seasonality': seasonality,
        'noise': noise,
        'combined': trend + seasonality + noise
    }

    return dates, components

def plot_trend_seasonality(trend_slope, seasonal_amp, noise_level, show_components):
    """Plot time series with trend and seasonality"""
    dates, components = create_trend_seasonal_data(trend_slope, seasonal_amp, noise_level)
    ts = pd.Series(components['combined'], index=dates)

    if show_components:
        fig, ax = plt.subplots(4, 1, figsize=(14, 14))

        ax[0].plot(dates, components['combined'])
        ax[0].set_title('Combined Time Series')

        ax[1].plot(dates, components['trend'])
        ax[1].set_title('Trend Component')

        ax[2].plot(dates, components['seasonality'])
        ax[2].set_title('Seasonal Component')

        ax[3].plot(dates, components['noise'])
        ax[3].set_title('Noise Component')

    else:
        fig, ax = plt.subplots(2, 1, figsize=(14, 10))

        ax[0].plot(dates, components['combined'])
        ax[0].set_title('Time Series with Trend and Seasonality')
        ax[0].set_xlabel('Date')
        ax[0].set_ylabel('Value')

        # Show a shorter period to visualize seasonality better
        mask = (dates >= '2021-01-01') & (dates <= '2021-06-30')
        ax[1].plot(dates[mask], ts[mask])
        ax[1].set_title('6-Month Period Showing Seasonal Pattern')
        ax[1].set_xlabel('Date')
        ax[1].set_ylabel('Value')

    plt.tight_layout()
    plt.show()

def interactive_trend_seasonality():
    """Interactive widget for exploring trend and seasonality"""
    trend_slider = widgets.FloatSlider(value=0.03, min=0, max=0.1, step=0.005, description='Trend Slope:')
    seasonal_slider = widgets.FloatSlider(value=5.0, min=0, max=10, step=0.5, description='Seasonal Amplitude:')
    noise_slider = widgets.FloatSlider(value=1.0, min=0, max=3, step=0.1, description='Noise Level:')
    component_checkbox = widgets.Checkbox(value=False, description='Show Components')

    ui = widgets.VBox([trend_slider, seasonal_slider, noise_slider, component_checkbox])
    out = widgets.Output()

    def update_plot(*args):
        with out:
            clear_output(wait=True)
            plot_trend_seasonality(trend_slider.value, seasonal_slider.value,
                                 noise_slider.value, component_checkbox.value)

    trend_slider.observe(update_plot, 'value')
    seasonal_slider.observe(update_plot, 'value')
    noise_slider.observe(update_plot, 'value')
    component_checkbox.observe(update_plot, 'value')

    display(ui, out)
    update_plot()

# Main function to show widgets for Step 2
def explore_time_series_challenges():
    tab = widgets.Tab()
    tab.children = [
        widgets.VBox([widgets.HTML("<h3>Autocorrelation Exploration</h3>"),
                      widgets.Output(layout={'height': '800px'})]),
        widgets.VBox([widgets.HTML("<h3>Trend and Seasonality Exploration</h3>"),
                      widgets.Output(layout={'height': '800px'})])
    ]
    tab.set_title(0, 'Autocorrelation')
    tab.set_title(1, 'Trend & Seasonality')

    display(tab)

    with tab.children[0].children[1]:
        interactive_autocorrelation()

    with tab.children[1].children[1]:
        interactive_trend_seasonality()

# Uncomment to run this section:
explore_time_series_challenges()

Tab(children=(VBox(children=(HTML(value='<h3>Autocorrelation Exploration</h3>'), Output(layout=Layout(height='…

##Step 2: Interactive Time Series Visualization


In [3]:
#added during screencast
def load_stock_data(ticker='AAPL', start_date='2018-01-01', end_date='2023-01-01'):
    """Download historical stock data"""
    try:
        data = yf.download(ticker, start=start_date, end=end_date)
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

def plot_stock_data(ticker='AAPL', start_date='2018-01-01', end_date='2023-01-01',
                   ma_window=30, plot_type='line'):
    """Plot stock data with various visualization options"""
    data = load_stock_data(ticker, start_date, end_date)

    if data is None or len(data) == 0:
        print(f"No data available for {ticker} from {start_date} to {end_date}")
        return

    if plot_type == 'line':
        plt.figure(figsize=(14, 6))
        data['Close'].plot()
        if ma_window > 0:
            data['Close'].rolling(window=ma_window).mean().plot(linewidth=2, label=f'{ma_window}-Day MA')
            plt.legend()
        plt.title(f'{ticker} Stock Closing Prices')
        plt.xlabel('Date')
        plt.ylabel('Price (USD)')
        plt.grid(True)

    elif plot_type == 'resampled':
        fig, axes = plt.subplots(3, 1, figsize=(14, 12))

        data['Close'].resample('W').mean().plot(ax=axes[0])
        axes[0].set_title('Weekly Average')
        axes[0].grid(True)

        data['Close'].resample('M').mean().plot(ax=axes[1])
        axes[1].set_title('Monthly Average')
        axes[1].grid(True)

        data['Close'].resample('Q').mean().plot(ax=axes[2])
        axes[2].set_title('Quarterly Average')
        axes[2].grid(True)

    elif plot_type == 'yearly':
        yearly_data = data['Close'].groupby([data.index.year, data.index.dayofyear]).first().unstack(0)
        plt.figure(figsize=(14, 6))
        yearly_data.plot()
        plt.title(f'{ticker} Year-over-Year Comparison')
        plt.xlabel('Day of Year')
        plt.ylabel('Price (USD)')
        plt.legend([str(year) for year in yearly_data.columns])
        plt.grid(True)

    elif plot_type == 'volume':
        fig, ax1 = plt.subplots(figsize=(14, 6))

        ax1.plot(data.index, data['Close'], 'b-')
        ax1.set_xlabel('Date')
        ax1.set_ylabel('Price (USD)', color='b')
        ax1.tick_params(axis='y', labelcolor='b')

        ax2 = ax1.twinx()
        ax2.fill_between(data.index, 0, data['Volume'], color='r', alpha=0.3)
        ax2.set_ylabel('Volume', color='r')
        ax2.tick_params(axis='y', labelcolor='r')

        plt.title(f'{ticker} Price and Volume')

    elif plot_type == 'returns':
        returns = data['Close'].pct_change().dropna()

        plt.figure(figsize=(14, 12))

        plt.subplot(211)
        plt.plot(returns)
        plt.title(f'{ticker} Daily Returns')
        plt.ylabel('Return %')
        plt.grid(True)

        plt.subplot(212)
        returns.hist(bins=50)
        plt.title('Returns Distribution')
        plt.xlabel('Return %')
        plt.grid(True)

    plt.tight_layout()
    plt.show()

    print("Visualization is akin to setting the stage—revealing time's passage and the storyline we aim to decode.")

def interactive_stock_visualization():
    """Interactive widget for visualizing stock data"""
    # Stock selection
    ticker_input = widgets.Text(value='AAPL', description='Ticker:')

    # Date range
    start_date_picker = widgets.DatePicker(description='Start Date:', value=pd.to_datetime('2018-01-01'))
    end_date_picker = widgets.DatePicker(description='End Date:', value=pd.to_datetime('2023-01-01'))

    # Visualization options
    ma_slider = widgets.IntSlider(value=30, min=0, max=200, step=5, description='MA Window:')

    plot_dropdown = widgets.Dropdown(
        options=[
            ('Line Plot', 'line'),
            ('Resampled', 'resampled'),
            ('Yearly Comparison', 'yearly'),
            ('Price & Volume', 'volume'),
            ('Returns', 'returns')
        ],
        value='line',
        description='Plot Type:'
    )

    # Quick ticker selection
    popular_tickers = widgets.Dropdown(
        options=['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA', 'SPY', 'QQQ'],
        value='AAPL',
        description='Quick Select:'
    )

    # Connect popular ticker dropdown to text input
    def update_ticker(*args):
        ticker_input.value = popular_tickers.value

    popular_tickers.observe(update_ticker, 'value')

    # Create UI layout
    date_box = widgets.HBox([start_date_picker, end_date_picker])
    ticker_box = widgets.HBox([ticker_input, popular_tickers])
    option_box = widgets.HBox([plot_dropdown, ma_slider])

    ui = widgets.VBox([ticker_box, date_box, option_box])
    out = widgets.Output()

    # Update function
    def update_plot(*args):
        with out:
            clear_output(wait=True)
            plot_stock_data(
                ticker_input.value,
                start_date_picker.value.strftime('%Y-%m-%d'),
                end_date_picker.value.strftime('%Y-%m-%d'),
                ma_slider.value,
                plot_dropdown.value
            )

    # Create the update button
    update_button = widgets.Button(description='Update Plot')
    update_button.on_click(update_plot)

    ui = widgets.VBox([ui, update_button])

    display(ui, out)
    update_plot()  # Initial plot

# Main function to run the interactive visualization explorer
def explore_visualizations():
    interactive_stock_visualization()

# Uncomment to run this section:
explore_visualizations()

VBox(children=(VBox(children=(HBox(children=(Text(value='AAPL', description='Ticker:'), Dropdown(description='…

Output()

#What Makes Time Series Special: Trends, Seasonality & More

In [4]:
#added during screencast
def run_time_series_models(data, model_type, p=1, d=1, q=1, trend='add', seasonal='add',
                          seasonal_period=365, forecast_steps=30):
    """Fit and forecast using selected time series model"""
    dates = data.index
    last_date = dates[-1]
    forecast_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=forecast_steps, freq='D')

    if model_type == 'ARIMA':
        model = ARIMA(data, order=(p, d, q))
        results = model.fit()
        forecast = results.forecast(steps=forecast_steps)
        model_name = f'ARIMA({p},{d},{q})'

    elif model_type == 'Exponential Smoothing':
        model = ExponentialSmoothing(data, trend=trend, seasonal=seasonal, seasonal_periods=seasonal_period)
        results = model.fit()
        forecast = results.forecast(forecast_steps)
        model_name = f'ETS({trend},{seasonal})'

    else:  # Simple Moving Average
        lookback = p  # Use p parameter as window size
        sma = data.rolling(window=lookback).mean()
        model_name = f'SMA({lookback})'

        # For SMA, we just extend the last value for forecasting
        forecast = pd.Series([sma.iloc[-1]] * forecast_steps, index=forecast_dates)
        results = None

    return forecast, results, model_name, forecast_dates

def plot_model_results(data, model_type, p=1, d=1, q=1, trend='add', seasonal='add',
                     seasonal_period=365, forecast_steps=30, history_points=100):
    """Plot the time series data with the selected model forecast"""
    forecast, results, model_name, forecast_dates = run_time_series_models(
        data, model_type, p, d, q, trend, seasonal, seasonal_period, forecast_steps)

    plt.figure(figsize=(14, 6))

    # Plot historical data
    if history_points < len(data):
        historical_data = data[-history_points:]
    else:
        historical_data = data

    plt.plot(historical_data, label='Historical Data')

    # Plot forecast
    plt.plot(forecast_dates, forecast, label=f'{model_name} Forecast', color='red', linestyle='--')

    plt.title(f'{model_name} Model Forecast')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    # Print model summary if available
    if results is not None and model_type == 'ARIMA':
        print(results.summary().tables[0])

def decompose_time_series(data, period=365, model_type='additive'):
    """Decompose time series into trend, seasonal, and residual components"""
    decomposition = seasonal_decompose(data, model=model_type, period=period)

    plt.figure(figsize=(14, 10))
    plt.subplot(411)
    plt.plot(data)
    plt.title('Original Time Series')

    plt.subplot(412)
    plt.plot(decomposition.trend)
    plt.title('Trend Component')

    plt.subplot(413)
    plt.plot(decomposition.seasonal)
    plt.title('Seasonal Component')

    plt.subplot(414)
    plt.plot(decomposition.resid)
    plt.title('Residual Component')

    plt.tight_layout()
    plt.show()

def interactive_time_series_models():
    """Interactive widget for exploring time series models"""
    # Generate synthetic data or load real data
    dates, components = create_trend_seasonal_data()
    data = pd.Series(components['combined'], index=dates)

    # Create widget controls
    model_dropdown = widgets.Dropdown(
        options=['ARIMA', 'Exponential Smoothing', 'Simple Moving Average'],
        value='ARIMA',
        description='Model:'
    )

    # ARIMA parameters
    p_slider = widgets.IntSlider(value=1, min=0, max=5, step=1, description='p:')
    d_slider = widgets.IntSlider(value=1, min=0, max=2, step=1, description='d:')
    q_slider = widgets.IntSlider(value=1, min=0, max=5, step=1, description='q:')

    # ETS parameters
    trend_dropdown = widgets.Dropdown(
        options=['add', 'mul', None],
        value='add',
        description='Trend:'
    )
    seasonal_dropdown = widgets.Dropdown(
        options=['add', 'mul', None],
        value='add',
        description='Seasonal:'
    )
    period_slider = widgets.IntSlider(value=365, min=7, max=365, step=1, description='Period:')

    # General parameters
    forecast_slider = widgets.IntSlider(value=30, min=7, max=100, step=1, description='Forecast Days:')
    history_slider = widgets.IntSlider(value=100, min=30, max=500, step=10, description='History Points:')

    # Layout for conditional parameters
    arima_box = widgets.VBox([p_slider, d_slider, q_slider])
    ets_box = widgets.VBox([trend_dropdown, seasonal_dropdown, period_slider])

    # Create tabs for different model parameters
    model_params_tab = widgets.Tab()
    model_params_tab.children = [arima_box, ets_box]
    model_params_tab.set_title(0, 'ARIMA Parameters')
    model_params_tab.set_title(1, 'ETS Parameters')

    # Function to show/hide parameter tabs based on model selection
    def update_tabs(*args):
        if model_dropdown.value == 'ARIMA':
            model_params_tab.selected_index = 0
        elif model_dropdown.value == 'Exponential Smoothing':
            model_params_tab.selected_index = 1
        else:  # Simple Moving Average
            # Still show ARIMA tab since we use p as window size
            model_params_tab.selected_index = 0

    model_dropdown.observe(update_tabs, 'value')

    # Create main UI
    ui = widgets.VBox([
        model_dropdown,
        model_params_tab,
        widgets.HBox([forecast_slider, history_slider])
    ])

    out = widgets.Output()

    # Update function for model visualization
    def update_model(*args):
        with out:
            clear_output(wait=True)
            plot_model_results(
                data,
                model_dropdown.value,
                p_slider.value,
                d_slider.value,
                q_slider.value,
                trend_dropdown.value,
                seasonal_dropdown.value,
                period_slider.value,
                forecast_slider.value,
                history_slider.value
            )

    # Connect events
    model_dropdown.observe(update_model, 'value')
    p_slider.observe(update_model, 'value')
    d_slider.observe(update_model, 'value')
    q_slider.observe(update_model, 'value')
    trend_dropdown.observe(update_model, 'value')
    seasonal_dropdown.observe(update_model, 'value')
    period_slider.observe(update_model, 'value')
    forecast_slider.observe(update_model, 'value')
    history_slider.observe(update_model, 'value')

    display(ui, out)
    update_tabs()
    update_model()

def interactive_decomposition():
    """Interactive widget for time series decomposition"""
    # Generate synthetic data
    dates, components = create_trend_seasonal_data()
    data = pd.Series(components['combined'], index=dates)

    # Create controls
    period_slider = widgets.IntSlider(value=365, min=7, max=365, step=1, description='Period:')
    model_dropdown = widgets.Dropdown(
        options=['additive', 'multiplicative'],
        value='additive',
        description='Model:'
    )

    ui = widgets.VBox([period_slider, model_dropdown])
    out = widgets.Output()

    def update_decomposition(*args):
        with out:
            clear_output(wait=True)
            decompose_time_series(data, period_slider.value, model_dropdown.value)

    period_slider.observe(update_decomposition, 'value')
    model_dropdown.observe(update_decomposition, 'value')

    display(ui, out)
    update_decomposition()

def explore_analytical_techniques():
    tab = widgets.Tab()
    tab.children = [
        widgets.VBox([widgets.HTML("<h3>Forecasting Models</h3>"),
                     widgets.Output(layout={'height': '800px'})]),
        widgets.VBox([widgets.HTML("<h3>Time Series Decomposition</h3>"),
                     widgets.Output(layout={'height': '800px'})])
    ]
    tab.set_title(0, 'Forecasting')
    tab.set_title(1, 'Decomposition')

    display(tab)

    with tab.children[0].children[1]:
        interactive_time_series_models()

    with tab.children[1].children[1]:
        interactive_decomposition()

# Uncomment to run this section:
explore_analytical_techniques()

Tab(children=(VBox(children=(HTML(value='<h3>Forecasting Models</h3>'), Output(layout=Layout(height='800px')))…