# 01 - Multivariate Time Series Data Exploration

This notebook focuses on:
1. Loading and exploring multivariate time series datasets
2. Understanding multiple features per time step
3. Feature relationships and correlations
4. Time series characteristics and patterns
5. Data quality assessment and preprocessing


In [None]:
# Import libraries with robust error handling
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from datetime import datetime, timedelta

# Set style
plt.style.use('seaborn')
sns.set_palette("husl")

print("✅ All imports completed successfully!")


In [None]:
# Dataset Selection and Loading
print("=== MULTIVARIATE TIME SERIES DATASET SELECTION ===")

# Available datasets
datasets = {
    'beijing_air': {
        'name': 'Beijing Multi-Site Air-Quality Data',
        'file': '../data_multivariate/PRSA_Data_Aotizhongxin_20130301-20170228.csv',
        'features': ['PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3', 'TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM'],
        'target': 'PM2.5',
        'time_col': 'datetime'
    },
    'appliances_energy': {
        'name': 'Appliances Energy Prediction',
        'file': '../data_multivariate/energydata_complete.csv',
        'features': ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'RH_1', 'RH_2', 'RH_3', 'RH_4', 'RH_5', 'RH_6', 'RH_7', 'RH_8', 'RH_9', 'T_out', 'RH_out', 'Press_mm_hg', 'Windspeed', 'Visibility', 'Tdewpoint'],
        'target': 'Appliances',
        'time_col': 'date'
    },
    'ett': {
        'name': 'Electricity Transformer Temperature',
        'file': '../data_multivariate/ETT-small/ETTm1.csv',
        'features': ['HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL', 'LULL'],
        'target': 'OT',
        'time_col': 'date'
    }
}

print("Available datasets:")
for i, (key, dataset) in enumerate(datasets.items(), 1):
    print(f"{i}. {dataset['name']}")
    print(f"   Features: {len(dataset['features'])} variables")
    print(f"   Target: {dataset['target']}")
    print(f"   File: {dataset['file']}")
    print()

# Select dataset (default to Beijing Air Quality)
selected_dataset = 'beijing_air'  # Change this to 'appliances_energy' or 'ett' as needed
dataset_info = datasets[selected_dataset]

print(f"Selected dataset: {dataset_info['name']}")
print(f"Target variable: {dataset_info['target']}")
print(f"Number of features: {len(dataset_info['features'])}")
