# cuTWED Demo Notebook

This notebook demonstrates how to use cuTWED with different backends in Google Colab.

## Setup

First, let's check if a GPU is available in this Colab environment.

In [None]:
!nvidia-smi

Now, let's clone the repository and build the library.

In [None]:
!git clone https://github.com/garrettwrong/cuTWED.git
%cd cuTWED/refactored

Install the necessary dependencies.

In [None]:
!apt-get update && apt-get install -y cmake ninja-build
!pip install numpy matplotlib pytest torch jax jaxlib
!pip install cupy-cuda11x  # Adjust based on the CUDA version in your Colab instance

Build and install the library.

In [None]:
!mkdir -p build && cd build && cmake .. && make -j && make install
!cd build && make python_package && make python_install

Set the library path.

In [None]:
import os
os.environ['LD_LIBRARY_PATH'] = '/usr/local/lib:' + os.environ.get('LD_LIBRARY_PATH', '')

## Basic Usage

Let's import the library and check available backends.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import time
from cutwed import twed, twed_batch, set_backend, get_backend_name, get_available_backends

# Check available backends
print(f"Available backends: {get_available_backends()}")
print(f"Current backend: {get_backend_name()}")

### Simple Example

Let's create two simple time series and compute the TWED distance between them.

In [None]:
# Create simple time series
A = np.sin(np.linspace(0, 2*np.pi, 100)) + np.random.randn(100) * 0.1
TA = np.arange(100)
B = np.sin(np.linspace(0, 2*np.pi, 80)) + np.random.randn(80) * 0.1
TB = np.arange(80)

# Parameters for TWED
nu = 1.0
lamb = 1.0
degree = 2

# Plot the time series
plt.figure(figsize=(10, 4))
plt.plot(TA, A, label='Time Series A')
plt.plot(TB, B, label='Time Series B')
plt.legend()
plt.title('Time Series')
plt.grid(True)
plt.show()

### Compare Backends

Let's compute the TWED distance using different backends and compare the results and performance.

In [None]:
backends = get_available_backends()
results = {}

for backend in backends:
    print(f"Using {backend} backend...")
    start_time = time.time()
    distance = twed(A, TA, B, TB, nu, lamb, degree, backend=backend)
    elapsed = time.time() - start_time
    results[backend] = {'distance': distance, 'time': elapsed}
    print(f"  Distance: {distance:.6f}")
    print(f"  Time: {elapsed:.6f} seconds")

# Compare results
print("\nResults comparison:")
for backend, result in results.items():
    print(f"{backend}: {result['distance']:.6f} in {result['time']:.6f}s")

### Batch Processing

Now let's try batch processing with multiple time series.

In [None]:
# Create batch data
batch_size = 10
AA = np.zeros((batch_size, 100, 1))
TAA = np.tile(np.arange(100), (batch_size, 1))
BB = np.zeros((batch_size, 80, 1))
TBB = np.tile(np.arange(80), (batch_size, 1))

# Fill with sine waves with different frequencies
for i in range(batch_size):
    freq = 1.0 + 0.2 * i
    AA[i, :, 0] = np.sin(freq * np.linspace(0, 2*np.pi, 100)) + np.random.randn(100) * 0.1
    BB[i, :, 0] = np.sin(freq * np.linspace(0, 2*np.pi, 80)) + np.random.randn(80) * 0.1

# Plot some examples
plt.figure(figsize=(12, 8))
for i in range(min(4, batch_size)):
    plt.subplot(2, 2, i+1)
    plt.plot(TAA[i], AA[i, :, 0], label=f'A[{i}]')
    plt.plot(TBB[i], BB[i, :, 0], label=f'B[{i}]')
    plt.legend()
    plt.title(f'Time Series Pair {i}')
    plt.grid(True)
plt.tight_layout()
plt.show()

Compute batch distances with different backends.

In [None]:
batch_results = {}

for backend in backends:
    print(f"Using {backend} backend for batch processing...")
    start_time = time.time()
    distances = twed_batch(AA, TAA, BB, TBB, nu, lamb, degree, backend=backend)
    elapsed = time.time() - start_time
    batch_results[backend] = {'distances': distances, 'time': elapsed}
    print(f"  First distance: {distances[0, 0]:.6f}")
    print(f"  Time: {elapsed:.6f} seconds")

# Compare results
print("\nBatch results comparison:")
for backend, result in batch_results.items():
    print(f"{backend}: {result['distances'][0, 0]:.6f} in {result['time']:.6f}s")

### Visualization of Distance Matrix

Let's visualize the distance matrix from batch processing.

In [None]:
# Use the fastest backend
fastest_backend = min(batch_results.keys(), key=lambda k: batch_results[k]['time'])
distance_matrix = batch_results[fastest_backend]['distances']

plt.figure(figsize=(8, 6))
plt.imshow(distance_matrix, cmap='viridis', interpolation='nearest')
plt.colorbar(label='TWED Distance')
plt.title(f'TWED Distance Matrix (using {fastest_backend} backend)')
plt.xlabel('Time Series B Index')
plt.ylabel('Time Series A Index')
plt.show()

## Performance Comparison

Let's compare the performance of different backends with varying time series lengths.

In [None]:
import numpy as np
import time
import matplotlib.pyplot as plt
from cutwed import twed, set_backend, get_available_backends

# Parameters
lengths = [10, 50, 100, 200, 500, 1000]
backends = get_available_backends()
times = {backend: [] for backend in backends}

# Run benchmark
for length in lengths:
    print(f"\nTesting with length: {length}")
    
    # Create time series
    A = np.random.randn(length, 3).astype(np.float32)
    TA = np.arange(length, dtype=np.float32)
    B = np.random.randn(length, 3).astype(np.float32)
    TB = np.arange(length, dtype=np.float32)
    
    # Test each backend
    for backend in backends:
        print(f"  Using {backend} backend...")
        start_time = time.time()
        distance = twed(A, TA, B, TB, 1.0, 1.0, 2, backend=backend)
        elapsed = time.time() - start_time
        times[backend].append(elapsed)
        print(f"    Time: {elapsed:.6f} seconds")

# Plot results
plt.figure(figsize=(10, 6))
for backend in backends:
    plt.plot(lengths, times[backend], marker='o', label=backend)
plt.xlabel('Time Series Length')
plt.ylabel('Computation Time (s)')
plt.title('TWED Performance Comparison')
plt.grid(True)
plt.legend()
plt.yscale('log')
plt.xscale('log')
plt.show()

## Real-world Example: Classifying Time Series

Let's demonstrate a practical application of TWED for time series classification using a nearest-neighbor approach.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from cutwed import twed, set_backend

# Generate synthetic data with 3 classes
np.random.seed(42)
n_samples = 150
length = 100
time = np.arange(length)

# Class 1: Sine wave
class1 = np.sin(2 * np.pi * time / length) + np.random.normal(0, 0.2, (n_samples//3, length))

# Class 2: Square wave
class2 = np.zeros((n_samples//3, length))
for i in range(n_samples//3):
    noise = np.random.normal(0, 0.2, length)
    square = np.zeros(length)
    square[length//4:3*length//4] = 1
    class2[i] = square + noise

# Class 3: Triangle wave
class3 = np.zeros((n_samples//3, length))
for i in range(n_samples//3):
    noise = np.random.normal(0, 0.2, length)
    triangle = np.zeros(length)
    for j in range(length):
        if j < length/2:
            triangle[j] = 2 * j / length
        else:
            triangle[j] = 2 - 2 * j / length
    class3[i] = triangle + noise

# Combine data
X = np.vstack([class1, class2, class3])
y = np.array([0] * (n_samples//3) + [1] * (n_samples//3) + [2] * (n_samples//3))

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.T).T
X_test_scaled = scaler.transform(X_test.T).T

# Plot examples from each class
plt.figure(figsize=(12, 4))
for i, label in enumerate(['Sine', 'Square', 'Triangle']):
    plt.subplot(1, 3, i+1)
    plt.plot(time, X[y == i][0])
    plt.title(f'Class {i}: {label}')
    plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Classify using 1-nearest neighbor with TWED distance
def classify_1nn(X_train, y_train, X_test, backend='numpy'):
    TA = np.arange(length, dtype=np.float32)
    y_pred = []
    
    for i, test_sample in enumerate(X_test):
        print(f"Classifying test sample {i+1}/{len(X_test)}\r", end="")
        TB = np.arange(length, dtype=np.float32)
        
        # Calculate distances to all training samples
        distances = []
        for train_sample in X_train:
            dist = twed(train_sample.reshape(-1, 1), TA, test_sample.reshape(-1, 1), TB, 
                        nu=1.0, lamb=1.0, degree=2, backend=backend)
            distances.append(dist)
        
        # Find nearest neighbor
        nearest_idx = np.argmin(distances)
        y_pred.append(y_train[nearest_idx])
    
    return np.array(y_pred)

# Set the fastest backend for the classifier
fastest_backend = min(times.keys(), key=lambda k: np.mean(times[k]))
print(f"Using {fastest_backend} backend for classification")

# Run classifier
y_pred = classify_1nn(X_train_scaled, y_train, X_test_scaled, backend=fastest_backend)

# Evaluate results
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"\nClassification accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)

## Conclusion

In this notebook, we've demonstrated:

1. How to use cuTWED with different backends (NumPy, PyTorch, JAX, CuPy, CUDA)
2. Performance comparison between backends
3. Batch processing of multiple time series
4. A practical application of TWED for time series classification

The refactored cuTWED library provides a unified interface to different implementations, allowing users to choose the best backend for their specific needs and hardware availability.