In [1]:
import os
data_path = "../Data/data_angles/data6_angles.csv"
print("Attempting to access:", data_path)
print("Full path:", os.path.abspath(data_path))
if os.path.exists(data_path):
    print("File found!")
else:
    print("File not found!")

Attempting to access: ../Data/data_angles/data6_angles.csv
Full path: /home/exx/Desktop/quantum/Data/data_angles/data6_angles.csv
File found!


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torch.amp import GradScaler, autocast
import matplotlib.pyplot as plt
from typing import Tuple

# Verify GPU
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("GPU count:", torch.cuda.device_count())
    print("GPU name:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected, exiting!")
    exit(1)

class AngleDataset(Dataset):
    def __init__(self, data_path: str, window_size: int):
        self.df = pd.read_csv(data_path)
        if self.df[['Feature Phi (degrees)', 'Feature Theta (degrees)', 
                   'Target Phi (degrees)', 'Target Theta (degrees)']].isna().any().any():
            print("Warning: NaN values found in data!")
            self.df = self.df.dropna()
        if np.isinf(self.df[['Feature Phi (degrees)', 'Feature Theta (degrees)', 
                            'Target Phi (degrees)', 'Target Theta (degrees)']].values).any():
            print("Warning: Infinite values found in data!")
            self.df = self.df[~np.isinf(self.df[['Feature Phi (degrees)', 'Feature Theta (degrees)', 
                                                'Target Phi (degrees)', 'Target Theta (degrees)']]).any(axis=1)]
        
        self.features = self.df[['Feature Phi (degrees)', 'Feature Theta (degrees)']].values
        self.targets = self.df[['Target Phi (degrees)', 'Target Theta (degrees)']].values
        self.features[:, 0] /= 180.0
        self.features[:, 1] /= 180.0
        self.targets[:, 0] /= 180.0
        self.targets[:, 1] /= 180.0
        
        print("Normalized Feature Phi range:", self.features[:, 0].min(), "to", self.features[:, 0].max())
        print("Normalized Feature Theta range:", self.features[:, 1].min(), "to", self.features[:, 1].max())
        print("Normalized Target Phi range:", self.targets[:, 0].min(), "to", self.targets[:, 0].max())
        print("Normalized Target Theta range:", self.targets[:, 1].min(), "to", self.targets[:, 1].max())
        
        self.window_size = window_size
        self.length = len(self.df) - window_size + 1
        self.indices = np.arange(window_size - 1, len(self.df))  # Time indices for targets

    def __len__(self) -> int:
        return self.length

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, int]:
        window = self.features[idx:idx + self.window_size]
        target = self.targets[idx + self.window_size - 1]
        index = self.indices[idx]
        return torch.FloatTensor(window), torch.FloatTensor(target), index

class FlashAttention(nn.Module):
    def __init__(self, d_model: int, n_heads: int, dropout: float = 0.1):
        super().__init__()
        assert d_model % n_heads == 0, "d_model must be divisible by n_heads"
        self.d_model = d_model
        self.n_heads = n_heads
        self.d_k = d_model // n_heads
        self.dropout = nn.Dropout(dropout)
        
        self.qkv = nn.Linear(d_model, 3 * d_model)
        self.out = nn.Linear(d_model, d_model)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        B, T, C = x.shape
        qkv = self.qkv(x).reshape(B, T, 3, self.n_heads, self.d_k).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]
        
        scale = 1.0 / torch.sqrt(torch.tensor(self.d_k, dtype=torch.float32))
        attn = torch.matmul(q, k.transpose(-2, -1)) * scale
        attn = F.softmax(attn, dim=-1)
        attn = self.dropout(attn)
        
        out = torch.matmul(attn, v)
        out = out.transpose(1, 2).reshape(B, T, C)
        out = self.out(out)
        return out

class AnglePredictionModel(nn.Module):
    def __init__(self, input_dim: int, d_model: int, n_heads: int, n_layers: int, dropout: float = 0.1):
        super().__init__()
        self.input_dim = input_dim
        self.d_model = d_model
        
        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_encoding = nn.Parameter(torch.randn(1, 128, d_model) * 0.1)
        self.attn_layers = nn.ModuleList([
            FlashAttention(d_model, n_heads, dropout) for _ in range(n_layers)
        ])
        self.norm_layers = nn.ModuleList([
            nn.LayerNorm(d_model) for _ in range(n_layers)
        ])
        self.output = nn.Linear(d_model, 2)
        
        self.apply(self._init_weights)
    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.input_proj(x)
        x = x + self.pos_encoding[:, :x.size(1), :]
        
        for attn, norm in zip(self.attn_layers, self.norm_layers):
            residual = x
            x = attn(x)
            x = norm(x + residual)
        
        x = x[:, -1, :]
        return self.output(x)

def train_model(model: nn.Module, train_loader: DataLoader, val_loader: DataLoader, 
                epochs: int, device: torch.device, lr: float = 1e-4):
    print("Model device:", next(model.parameters()).device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    scaler = torch.amp.GradScaler('cuda')
    criterion = nn.MSELoss()
    
    train_losses, val_losses = [], []
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for i, (batch_x, batch_y, _) in enumerate(train_loader):
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            if i == 0:
                print("Batch x device:", batch_x.device)
            
            optimizer.zero_grad()
            with torch.amp.autocast('cuda'):
                output = model(batch_x)
                if torch.isnan(output).any():
                    print(f"NaN in output at epoch {epoch+1}, batch {i}")
                    break
                loss = criterion(output, batch_y)
            
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_x, batch_y, _ in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                with torch.amp.autocast('cuda'):
                    output = model(batch_x)
                val_loss += criterion(output, batch_y).item()
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        
        scheduler.step()
        
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    
    return train_losses, val_losses

def experiment_window_sizes(data_path: str, window_sizes: list, batch_size: int = 64, 
                          epochs: int = 50):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    results = {}
    
    for window_size in window_sizes:
        print(f"\nExperimenting with window size: {window_size}")
        
        dataset = AngleDataset(data_path, window_size)
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
        
        # Get validation indices in dataset order
        val_indices = np.array([dataset.indices[idx] for idx in val_dataset.indices])
        sorted_order = np.argsort(val_indices)
        val_indices_sorted = val_indices[sorted_order]
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        
        model = AnglePredictionModel(
            input_dim=2,
            d_model=32,  # Reduced for simpler mapping
            n_heads=4,
            n_layers=2,  # Reduced for stability
            dropout=0.1
        ).to(device)
        
        train_losses, val_losses = train_model(
            model, train_loader, val_loader, epochs, device
        )
        
        # Generate predictions on validation set
        model.eval()
        predictions = []
        actuals = []
        time_indices = []
        with torch.no_grad():
            for batch_x, batch_y, batch_indices in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                with torch.amp.autocast('cuda'):
                    output = model(batch_x)
                output = output.cpu().numpy() * 180.0
                batch_y = batch_y.cpu().numpy() * 180.0
                predictions.append(output)
                actuals.append(batch_y)
                time_indices.append(batch_indices.numpy())
        
        predictions = np.concatenate(predictions, axis=0)
        actuals = np.concatenate(actuals, axis=0)
        time_indices = np.concatenate(time_indices, axis=0)
        
        # Sort by time indices to ensure time series order
        sort_idx = np.argsort(time_indices)
        time_indices = time_indices[sort_idx]
        predictions = predictions[sort_idx]
        actuals = actuals[sort_idx]
        
        # Compute RMSE
        rmse_phi = np.sqrt(np.mean((predictions[:, 0] - actuals[:, 0])**2))
        rmse_theta = np.sqrt(np.mean((predictions[:, 1] - actuals[:, 1])**2))
        
        # Plot predictions vs actuals as line graphs
        plt.figure(figsize=(12, 8))
        
        # Phi plot
        plt.subplot(2, 1, 1)
        plt.plot(time_indices, actuals[:, 0], label='Actual Phi (1551 nm, °)', color='blue', linewidth=2)
        plt.plot(time_indices, predictions[:, 0], label='Predicted Phi (from 1550 nm, °)', color='red', linestyle='--', linewidth=2)
        plt.title(f'Phi Time Series (Window Size: {window_size}, RMSE: {rmse_phi:.2f}°)')
        plt.xlabel('Time Index')
        plt.ylabel('Phi (degrees)')
        plt.legend()
        plt.grid(True)
        
        # Theta plot
        plt.subplot(2, 1, 2)
        plt.plot(time_indices, actuals[:, 1], label='Actual Theta (1551 nm, °)', color='blue', linewidth=2)
        plt.plot(time_indices, predictions[:, 1], label='Predicted Theta (from 1550 nm, °)', color='red', linestyle='--', linewidth=2)
        plt.title(f'Theta Time Series (Window Size: {window_size}, RMSE: {rmse_theta:.2f}°)')
        plt.xlabel('Time Index')
        plt.ylabel('Theta (degrees)')
        plt.legend()
        plt.grid(True)
        
        plt.tight_layout()
        plt.savefig(f'predictions_window_{window_size}.png')
        plt.close()
        
        # Plot absolute errors
        errors = np.abs(predictions - actuals)
        plt.figure(figsize=(12, 8))
        
        plt.subplot(2, 1, 1)
        plt.plot(time_indices, errors[:, 0], label='|Predicted - Actual| Phi', color='purple', linewidth=2)
        plt.title(f'Phi Absolute Error (Window Size: {window_size})')
        plt.xlabel('Time Index')
        plt.ylabel('Error (degrees)')
        plt.legend()
        plt.grid(True)
        
        plt.subplot(2, 1, 2)
        plt.plot(time_indices, errors[:, 1], label='|Predicted - Actual| Theta', color='purple', linewidth=2)
        plt.title(f'Theta Absolute Error (Window Size: {window_size})')
        plt.xlabel('Time Index')
        plt.ylabel('Error (degrees)')
        plt.legend()
        plt.grid(True)
        
        plt.tight_layout()
        plt.savefig(f'errors_window_{window_size}.png')
        plt.close()
        
        results[window_size] = {
            'train_losses': train_losses,
            'val_losses': val_losses,
            'model': model,
            'predictions': predictions,
            'actuals': actuals,
            'time_indices': time_indices,
            'rmse_phi': rmse_phi,
            'rmse_theta': rmse_theta
        }
        
        plt.figure(figsize=(10, 5))
        plt.plot(train_losses, label='Train Loss')
        plt.plot(val_losses, label='Val Loss')
        plt.title(f'Loss Curves (Window Size: {window_size})')
        plt.xlabel('Epoch')
        plt.ylabel('MSE Loss')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'loss_window_{window_size}.png')
        plt.close()
    
    plt.figure(figsize=(12, 6))
    for window_size, result in results.items():
        plt.plot(result['val_losses'], label=f'Window {window_size}')
    plt.title('Validation Loss Comparison Across Window Sizes')
    plt.xlabel('Epoch')
    plt.ylabel('MSE Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig('window_size_comparison.png')
    plt.close()
    
    return results

if __name__ == "__main__":
    data_path = os.path.join("..", "Data", "data_angles", "data6_angles.csv")
    
    print("Attempting to access:", data_path)
    print("Full path:", os.path.abspath(data_path))
    if os.path.exists(data_path):
        print("File found!")
    else:
        print("File not found!")
        exit(1)
    
    window_sizes = [8, 16, 32,64,128]
    results = experiment_window_sizes(data_path, window_sizes)

PyTorch version: 2.5.1
CUDA available: True
CUDA version: 12.4
GPU count: 1
GPU name: NVIDIA H100 NVL
Attempting to access: ../Data/data_angles/data6_angles.csv
Full path: /home/exx/Desktop/quantum/Data/data_angles/data6_angles.csv
File found!

Experimenting with window size: 8
Normalized Feature Phi range: -0.9987468179464775 to 1.0
Normalized Feature Theta range: 0.004026122250999709 to 0.9932257217575585
Normalized Target Phi range: -0.9996426482636093 to 0.9999915158988666
Normalized Target Theta range: 0.008747641666239578 to 0.9978211921309839
Model device: cuda:0
Batch x device: cuda:0
Epoch 1/50, Train Loss: 0.2149, Val Loss: 0.1229
Batch x device: cuda:0
Epoch 2/50, Train Loss: 0.1171, Val Loss: 0.1010
Batch x device: cuda:0
Epoch 3/50, Train Loss: 0.0997, Val Loss: 0.0865
Batch x device: cuda:0
Epoch 4/50, Train Loss: 0.0856, Val Loss: 0.0740
Batch x device: cuda:0
Epoch 5/50, Train Loss: 0.0731, Val Loss: 0.0612
Batch x device: cuda:0
Epoch 6/50, Train Loss: 0.0643, Val Loss

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
print(device)

cuda
