# Time Series Analysis & Forecasting Project

This notebook demonstrates time series analysis and forecasting, including:
- Data preprocessing and visualization
- Time series decomposition
- Model development using SARIMA
- Forecasting and evaluation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error
from datetime import datetime, timedelta

# Set style for visualizations
plt.style.use('seaborn')
sns.set_palette('deep')

In [None]:
def generate_sample_data(n_periods=365):
    """
    Generate sample time series data with trend, seasonality, and noise
    
    Parameters:
    -----------
    n_periods : int
        Number of time periods to generate
    """
    # Generate date range
    dates = pd.date_range(start='2023-01-01', periods=n_periods, freq='D')
    
    # Generate components
    trend = np.linspace(0, 10, n_periods)
    seasonal = 5 * np.sin(2 * np.pi * np.arange(n_periods) / 365)  # Yearly seasonality
    noise = np.random.normal(0, 1, n_periods)
    
    # Combine components
    values = trend + seasonal + noise
    
    # Create DataFrame
    df = pd.DataFrame({
        'date': dates,
        'value': values
    })
    df.set_index('date', inplace=True)
    
    return df

# Generate sample data
df = generate_sample_data()
df.head()

In [None]:
# Visualize the time series
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['value'])
plt.title('Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

# Decompose the time series
decomposition = seasonal_decompose(df['value'], period=365)

fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 10))
decomposition.observed.plot(ax=ax1)
ax1.set_title('Observed')
decomposition.trend.plot(ax=ax2)
ax2.set_title('Trend')
decomposition.seasonal.plot(ax=ax3)
ax3.set_title('Seasonal')
decomposition.resid.plot(ax=ax4)
ax4.set_title('Residual')
plt.tight_layout()
plt.show()

In [None]:
# Split data into train and test sets
train_size = int(len(df) * 0.8)
train = df[:train_size]
test = df[train_size:]

# Fit SARIMA model
model = SARIMAX(train['value'],
                order=(1, 1, 1),
                seasonal_order=(1, 1, 1, 12))
results = model.fit()

# Make predictions
forecast = results.get_forecast(steps=len(test))
forecast_mean = forecast.predicted_mean
forecast_ci = forecast.conf_int()

In [None]:
# Plot results
plt.figure(figsize=(12, 6))
plt.plot(train.index, train['value'], label='Training Data')
plt.plot(test.index, test['value'], label='Actual Test Data')
plt.plot(test.index, forecast_mean, label='Forecast')
plt.fill_between(test.index,
                 forecast_ci.iloc[:, 0],
                 forecast_ci.iloc[:, 1],
                 color='k', alpha=0.1)
plt.title('Time Series Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# Calculate error metrics
mse = mean_squared_error(test['value'], forecast_mean)
mae = mean_absolute_error(test['value'], forecast_mean)
rmse = np.sqrt(mse)

print(f'Mean Squared Error: {mse:.2f}')
print(f'Mean Absolute Error: {mae:.2f}')
print(f'Root Mean Squared Error: {rmse:.2f}')