<a href="https://github.com/timeseriesAI/tsai-rs" target="_parent"><img src="https://img.shields.io/badge/tsai--rs-Time%20Series%20AI%20in%20Rust-blue" alt="tsai-rs"/></a>

# Multi-Class and Multi-Label Time Series Classification

This notebook demonstrates multi-class and multi-label classification using **tsai-rs**.

## Classification Types

| Type | Description | Labels per Sample |
|------|-------------|-------------------|
| Binary | Two classes | 1 (0 or 1) |
| Multi-class | Multiple exclusive classes | 1 (one of N) |
| Multi-label | Multiple non-exclusive classes | 0 to N |

## Install tsai-rs

```bash
cd crates/tsai_python
maturin develop --release
```

## Import Libraries

In [None]:
import tsai_rs
import numpy as np
import matplotlib.pyplot as plt

print(f"tsai-rs version: {tsai_rs.version()}")
tsai_rs.my_setup()

## Multi-Class Classification

Standard classification where each sample belongs to exactly one class.

In [None]:
# Load multi-class dataset
dsid = 'NATOPS'  # 6 classes
X_train, y_train, X_test, y_test = tsai_rs.get_UCR_data(dsid, return_split=True)

n_vars = X_train.shape[1]
seq_len = X_train.shape[2]
n_classes = len(np.unique(y_train))

print(f"Dataset: {dsid}")
print(f"X shape: {X_train.shape}")
print(f"Classes: {n_classes}")
print(f"Class distribution: {np.bincount(y_train.astype(int))}")

In [None]:
# Standardize
X_train_std = tsai_rs.ts_standardize(X_train.astype(np.float32), by_sample=True)
X_test_std = tsai_rs.ts_standardize(X_test.astype(np.float32), by_sample=True)

# Create datasets
train_ds = tsai_rs.TSDataset(X_train_std, y_train)
test_ds = tsai_rs.TSDataset(X_test_std, y_test)

print(f"Train: {train_ds}")
print(f"Test: {test_ds}")

In [None]:
# Configure model for multi-class
config = tsai_rs.InceptionTimePlusConfig(
    n_vars=n_vars,
    seq_len=seq_len,
    n_classes=n_classes  # Number of classes
)

print(f"Multi-class config: {config}")

## Binary Classification

A special case of multi-class with exactly 2 classes.

In [None]:
# Load binary dataset
dsid = 'GunPoint'  # 2 classes
X_train, y_train, X_test, y_test = tsai_rs.get_UCR_data(dsid, return_split=True)

n_vars = X_train.shape[1]
seq_len = X_train.shape[2]
n_classes = len(np.unique(y_train))

print(f"Dataset: {dsid}")
print(f"X shape: {X_train.shape}")
print(f"Classes: {n_classes}")
print(f"Class distribution: {np.bincount(y_train.astype(int))}")

In [None]:
# Configure model for binary classification
config_binary = tsai_rs.InceptionTimePlusConfig(
    n_vars=n_vars,
    seq_len=seq_len,
    n_classes=2  # Binary = 2 classes
)

print(f"Binary config: {config_binary}")

## Multi-Label Classification

Each sample can belong to multiple classes simultaneously.

In [None]:
# Create synthetic multi-label data
n_samples = 200
n_vars = 10
seq_len = 100
n_labels = 5  # 5 possible labels

# Generate random time series
X = np.random.randn(n_samples, n_vars, seq_len).astype(np.float32)

# Generate multi-hot encoded labels (each sample can have multiple labels)
# Shape: (n_samples, n_labels)
y_multilabel = np.random.randint(0, 2, size=(n_samples, n_labels)).astype(np.float32)

print(f"X shape: {X.shape}")
print(f"y shape: {y_multilabel.shape}")
print(f"\nFirst 5 samples' labels:")
print(y_multilabel[:5])

In [None]:
# Analyze label distribution
labels_per_sample = y_multilabel.sum(axis=1)

print(f"Labels per sample distribution:")
print(f"  Min: {labels_per_sample.min():.0f}")
print(f"  Max: {labels_per_sample.max():.0f}")
print(f"  Mean: {labels_per_sample.mean():.2f}")

print(f"\nLabel frequency:")
for i in range(n_labels):
    count = y_multilabel[:, i].sum()
    pct = count / n_samples * 100
    print(f"  Label {i}: {count:.0f} ({pct:.1f}%)")

In [None]:
# Train/test split
split_idx = int(0.8 * n_samples)
X_train = X[:split_idx]
X_test = X[split_idx:]
y_train = y_multilabel[:split_idx]
y_test = y_multilabel[split_idx:]

print(f"Train: X={X_train.shape}, y={y_train.shape}")
print(f"Test: X={X_test.shape}, y={y_test.shape}")

In [None]:
# Standardize
X_train_std = tsai_rs.ts_standardize(X_train, by_sample=True)
X_test_std = tsai_rs.ts_standardize(X_test, by_sample=True)

# Create datasets for multi-label
train_ds = tsai_rs.TSDataset(X_train_std, y_train)
test_ds = tsai_rs.TSDataset(X_test_std, y_test)

print(f"Train dataset: {train_ds}")

In [None]:
# Configure model for multi-label
# For multi-label, n_classes = number of possible labels
config_multilabel = tsai_rs.InceptionTimePlusConfig(
    n_vars=n_vars,
    seq_len=seq_len,
    n_classes=n_labels  # Number of labels (not exclusive)
)

print(f"Multi-label config: {config_multilabel}")

## Comparing Different Architectures

In [None]:
# Load data for comparison
dsid = 'NATOPS'
X_train, y_train, X_test, y_test = tsai_rs.get_UCR_data(dsid, return_split=True)

n_vars = X_train.shape[1]
seq_len = X_train.shape[2]
n_classes = len(np.unique(y_train))

# Different architectures
architectures = {
    'InceptionTimePlus': tsai_rs.InceptionTimePlusConfig(
        n_vars=n_vars, seq_len=seq_len, n_classes=n_classes
    ),
    'ResNetPlus': tsai_rs.ResNetPlusConfig(
        n_vars=n_vars, seq_len=seq_len, n_classes=n_classes
    ),
    'TST': tsai_rs.TSTConfig(
        n_vars=n_vars, seq_len=seq_len, n_classes=n_classes
    ),
    'MiniRocket': tsai_rs.MiniRocketConfig(
        n_vars=n_vars, seq_len=seq_len, n_classes=n_classes
    )
}

print(f"Architectures for {n_classes}-class classification:")
print("-" * 60)
for name, config in architectures.items():
    print(f"{name}: {config}")

## Handling Class Imbalance

In [None]:
# Calculate class weights for imbalanced data
def compute_class_weights(y):
    """Compute class weights inversely proportional to frequency."""
    classes = np.unique(y)
    n_samples = len(y)
    n_classes = len(classes)
    
    weights = {}
    for c in classes:
        n_c = (y == c).sum()
        weights[c] = n_samples / (n_classes * n_c)
    
    return weights

# Example with NATOPS
dsid = 'NATOPS'
X_train, y_train, X_test, y_test = tsai_rs.get_UCR_data(dsid, return_split=True)

class_weights = compute_class_weights(y_train)
print(f"Class weights for {dsid}:")
for c, w in class_weights.items():
    print(f"  Class {c}: {w:.4f}")

## Complete Multi-Class Pipeline

In [None]:
def multiclass_pipeline(dsid):
    """Complete multi-class classification pipeline."""
    
    # 1. Load data
    print(f"\n{'='*50}")
    print(f"Dataset: {dsid}")
    print(f"{'='*50}")
    
    X_train, y_train, X_test, y_test = tsai_rs.get_UCR_data(dsid, return_split=True)
    
    n_vars = X_train.shape[1]
    seq_len = X_train.shape[2]
    n_classes = len(np.unique(y_train))
    
    print(f"Shape: {X_train.shape}")
    print(f"Variables: {n_vars}")
    print(f"Length: {seq_len}")
    print(f"Classes: {n_classes}")
    
    # 2. Preprocess
    X_train_std = tsai_rs.ts_standardize(X_train.astype(np.float32), by_sample=True)
    X_test_std = tsai_rs.ts_standardize(X_test.astype(np.float32), by_sample=True)
    
    # 3. Create datasets
    train_ds = tsai_rs.TSDataset(X_train_std, y_train)
    test_ds = tsai_rs.TSDataset(X_test_std, y_test)
    
    # 4. Configure model
    config = tsai_rs.InceptionTimePlusConfig(
        n_vars=n_vars,
        seq_len=seq_len,
        n_classes=n_classes
    )
    
    # 5. Configure training
    learner_config = tsai_rs.LearnerConfig(
        lr=1e-3,
        weight_decay=0.01,
        grad_clip=1.0
    )
    
    print(f"\nModel: {config}")
    print(f"Ready for training!")
    
    return config, train_ds, test_ds, learner_config

# Run pipeline on different datasets
for dsid in ['ECG200', 'GunPoint', 'NATOPS']:
    config, train_ds, test_ds, learner_config = multiclass_pipeline(dsid)

## Summary

### Classification Types

| Type | y Shape | Loss | Output |
|------|---------|------|--------|
| Binary | (n,) | CrossEntropy | 2 classes |
| Multi-class | (n,) | CrossEntropy | N classes |
| Multi-label | (n, K) | BCEWithLogits | K labels |

### Key Points
1. **Multi-class**: Each sample has exactly one label
2. **Multi-label**: Each sample can have 0 to K labels
3. **Class imbalance**: Use weighted loss or oversampling

In [None]:
# Quick reference
print("Classification Quick Reference")
print("=" * 50)
print("\n# Multi-class (exclusive labels)")
print("y = np.array([0, 1, 2, 0, 1])  # One label per sample")
print("config = tsai_rs.InceptionTimePlusConfig(n_vars, seq_len, n_classes=3)")
print("\n# Multi-label (non-exclusive)")
print("y = np.array([[1,0,1], [0,1,0]])  # Multi-hot encoding")
print("config = tsai_rs.InceptionTimePlusConfig(n_vars, seq_len, n_classes=3)")
print("# Use BCEWithLogitsLoss for multi-label")