In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Generate sample data (replace with your actual data)
def create_sample_data(seq_length=50, n_samples=10000, input_size=10):
    # Create synthetic time series data
    X = np.zeros((n_samples, seq_length, input_size))
    y = np.zeros((n_samples, 1))
    
    for i in range(n_samples):
        base_pattern = np.sin(np.linspace(0, 4*np.pi, seq_length))
        noise = 0.1 * np.random.randn(seq_length, input_size)
        
        # Create input features with some relationship to the base pattern
        for j in range(input_size):
            X[i, :, j] = base_pattern + 0.5 * j * np.cos(np.linspace(0, 2*np.pi*j/input_size, seq_length)) + noise[:, j]
        
        # Target is a function of the final state
        y[i] = np.mean(X[i, -5:, :]) + 0.1 * np.random.randn()
    
    return X, y

# Create sample dataset
X, y = create_sample_data()
print(f"Data shape: X {X.shape}, y {y.shape}")

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# Reshape for scaling
X_train_reshaped = X_train.reshape(-1, X_train.shape[2])
X_test_reshaped = X_test.reshape(-1, X_test.shape[2])

# Fit and transform
X_train_scaled = scaler_X.fit_transform(X_train_reshaped).reshape(X_train.shape)
X_test_scaled = scaler_X.transform(X_test_reshaped).reshape(X_test.shape)

y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
y_train_tensor = torch.FloatTensor(y_train_scaled).to(device)
y_test_tensor = torch.FloatTensor(y_test_scaled).to(device)

# Create DataLoader
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

# Define the GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, output_size=1, num_layers=3):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Three GRU layers
        self.gru1 = nn.GRU(input_size, hidden_size, batch_first=True)
        self.gru2 = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.gru3 = nn.GRU(hidden_size, hidden_size, batch_first=True)
        
        # Output layer
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Initialize hidden states
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        h1 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        h2 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        
        # Forward pass through GRU layers
        out, _ = self.gru1(x, h0)  # Returns full sequence
        out, _ = self.gru2(out, h1)  # Returns full sequence
        out, _ = self.gru3(out, h2)  # Returns only last output
        
        # Only use the last output sequence element
        out = out[:, -1, :]
        
        # Fully connected layer
        out = self.fc(out)
        return out

# Initialize model
input_size = X_train.shape[2]  # Number of features
model = GRUModel(input_size=input_size, hidden_size=64, output_size=1).to(device)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
train_losses = []
test_losses = []

for epoch in range(100):
    # Training
    model.train()
    train_loss = 0
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Calculate average training loss
    avg_train_loss = train_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    
    # Evaluation
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            test_loss += loss.item()
    
    # Calculate average test loss
    avg_test_loss = test_loss / len(test_loader)
    test_losses.append(avg_test_loss)
    
    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/100], Train Loss: {avg_train_loss:.6f}, Test Loss: {avg_test_loss:.6f}')

# Plot training history
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(test_losses, label='Test Loss')
plt.title('Training and Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Make predictions
model.eval()
with torch.no_grad():
    # Get predictions for test set
    y_pred_scaled = model(X_test_tensor).cpu().numpy()
    
    # Inverse transform to original scale
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test_scaled)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f'Test RMSE: {rmse:.4f}')

# Plot some predictions vs actual
plt.figure(figsize=(10, 5))
plt.plot(y_true[:100], label='True Values', alpha=0.7)
plt.plot(y_pred[:100], label='Predictions', alpha=0.7)
plt.title('Predictions vs True Values (First 100 Samples)')
plt.xlabel('Sample Index')
plt.ylabel('Target Value')
plt.legend()
plt.grid(True)
plt.show()

# Save the model
torch.save(model.state_dict(), 'gru_model.pth')
print("Model saved as gru_model.pth")

Using device: cpu
Data shape: X (10000, 50, 10), y (10000, 1)
Epoch [10/100], Train Loss: 1.000842, Test Loss: 1.033788
Epoch [20/100], Train Loss: 1.005901, Test Loss: 1.033678
Epoch [30/100], Train Loss: 0.997812, Test Loss: 1.033854
Epoch [40/100], Train Loss: 0.997431, Test Loss: 1.033935
Epoch [50/100], Train Loss: 1.000877, Test Loss: 1.033458
Epoch [60/100], Train Loss: 0.999039, Test Loss: 1.036647
