feat: implement learnable gated pooling model

devin-ai-integration[bot] · devin-ai-integration[bot] · commit 14dfbc3105b6 · 2025-01-26T15:38:34.000Z
- Add LearnableGatedPooling model implementation
- Add training, evaluation, and preprocessing utilities
- Add configuration file
- Update requirements.txt with dependencies
diff --git a/config/model_config.py b/config/model_config.py
@@ -0,0 +1,19 @@
+"""
+Configuration for the Learnable Gated Pooling model
+"""
+
+class ModelConfig:
+    # Model parameters
+    INPUT_DIM = 768  # Dimension of input vectors
+    SEQ_LEN = 10    # Maximum sequence length
+    
+    # Training parameters
+    BATCH_SIZE = 32
+    NUM_EPOCHS = 10
+    LEARNING_RATE = 0.001
+    
+    # Data split
+    TRAIN_RATIO = 0.8
+    
+    # Device configuration
+    USE_CUDA = True  # Will fall back to CPU if CUDA is not available
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,3 @@
-numpy
+torch>=2.0.0
+numpy>=1.21.0
+typing>=3.7.4
diff --git a/src/evaluate.py b/src/evaluate.py
@@ -0,0 +1,35 @@
+import torch
+from torch.utils.data import DataLoader
+from typing import Dict, Any
+from models import LearnableGatedPooling
+
+def evaluate_model(
+    model: LearnableGatedPooling,
+    test_loader: DataLoader,
+    criterion: torch.nn.Module,
+    device: torch.device
+) -> Dict[str, float]:
+    """
+    Evaluate the LearnableGatedPooling model.
+    
+    Args:
+        model: Trained LearnableGatedPooling model
+        test_loader: DataLoader for test data
+        criterion: Loss function
+        device: Device to evaluate on (CPU/GPU)
+    
+    Returns:
+        Dictionary containing evaluation metrics
+    """
+    model.eval()
+    total_loss = 0.0
+    
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            loss = criterion(output, target)
+            total_loss += loss.item()
+    
+    avg_loss = total_loss / len(test_loader)
+    return {'test_loss': avg_loss}
diff --git a/src/main.py b/src/main.py
@@ -1 +1,67 @@
-print('Hello, World!')
+print('Hello, World!')import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from torch.optim import Adam
+
+from models import LearnableGatedPooling
+from preprocess import prepare_data
+from train import train_model
+from evaluate import evaluate_model
+
+def main():
+    # Configuration
+    input_dim = 768  # Example: BERT embedding dimension
+    batch_size = 32
+    seq_len = 10
+    num_epochs = 10
+    learning_rate = 0.001
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+    # Initialize model
+    model = LearnableGatedPooling(input_dim=input_dim, seq_len=seq_len)
+    
+    # Example data (replace with your actual data loading)
+    dummy_sequences = [torch.randn(seq_len, input_dim) for _ in range(100)]
+    
+    # Preprocess data
+    processed_data, max_seq_len = prepare_data(dummy_sequences, batch_size)
+    
+    # Create dummy targets (replace with your actual targets)
+    dummy_targets = torch.randn(100, input_dim)
+    
+    # Create data loaders
+    dataset = torch.utils.data.TensorDataset(processed_data, dummy_targets)
+    train_size = int(0.8 * len(dataset))
+    test_size = len(dataset) - train_size
+    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
+    
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+    test_loader = DataLoader(test_dataset, batch_size=batch_size)
+    
+    # Initialize optimizer and loss function
+    optimizer = Adam(model.parameters(), lr=learning_rate)
+    criterion = nn.MSELoss()
+    
+    # Train model
+    training_history = train_model(
+        model=model,
+        train_loader=train_loader,
+        optimizer=optimizer,
+        criterion=criterion,
+        num_epochs=num_epochs,
+        device=device
+    )
+    
+    # Evaluate model
+    evaluation_results = evaluate_model(
+        model=model,
+        test_loader=test_loader,
+        criterion=criterion,
+        device=device
+    )
+    
+    print("\nTraining completed!")
+    print(f"Final test loss: {evaluation_results['test_loss']:.4f}")
+
+if __name__ == "__main__":
+    main()
diff --git a/src/models.py b/src/models.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class LearnableGatedPooling(nn.Module):
+    def __init__(self, input_dim, seq_len):
+        super(LearnableGatedPooling, self).__init__()
+        self.weights = nn.Parameter(torch.ones(input_dim))
+        self.gate_linear = nn.Linear(input_dim, 1)  # Linear layer for gating
+
+    def forward(self, x):
+        # x: (batch_size, seq_len, input_dim)
+        weighted_x = x * self.weights
+        gate_values = torch.sigmoid(self.gate_linear(x)).squeeze(2)  # (batch_size, seq_len)
+        gated_x = weighted_x * gate_values.unsqueeze(2)
+        pooled_vector = torch.mean(gated_x, dim=1)  # Average pooling
+        return pooled_vector
diff --git a/src/preprocess.py b/src/preprocess.py
@@ -0,0 +1,33 @@
+import torch
+from typing import List, Tuple
+
+def prepare_data(sequences: List[torch.Tensor], batch_size: int) -> Tuple[torch.Tensor, int]:
+    """
+    Prepare input sequences for the LearnableGatedPooling model.
+    
+    Args:
+        sequences: List of input sequences
+        batch_size: Size of each training batch
+    
+    Returns:
+        Tuple containing:
+        - Padded and batched sequences
+        - Maximum sequence length
+    """
+    # Get maximum sequence length
+    max_seq_len = max(seq.size(0) for seq in sequences)
+    
+    # Pad sequences to max_seq_len
+    padded_sequences = []
+    for seq in sequences:
+        if seq.size(0) < max_seq_len:
+            padding = torch.zeros(max_seq_len - seq.size(0), seq.size(1))
+            padded_seq = torch.cat([seq, padding], dim=0)
+            padded_sequences.append(padded_seq)
+        else:
+            padded_sequences.append(seq)
+    
+    # Stack sequences into a single tensor
+    batched_sequences = torch.stack(padded_sequences)
+    
+    return batched_sequences, max_seq_len
diff --git a/src/train.py b/src/train.py
@@ -0,0 +1,52 @@
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from typing import Dict, Any
+from models import LearnableGatedPooling
+
+def train_model(
+    model: LearnableGatedPooling,
+    train_loader: DataLoader,
+    optimizer: torch.optim.Optimizer,
+    criterion: nn.Module,
+    num_epochs: int,
+    device: torch.device
+) -> Dict[str, Any]:
+    """
+    Train the LearnableGatedPooling model.
+    
+    Args:
+        model: LearnableGatedPooling model instance
+        train_loader: DataLoader for training data
+        optimizer: Optimizer for training
+        criterion: Loss function
+        num_epochs: Number of training epochs
+        device: Device to train on (CPU/GPU)
+    
+    Returns:
+        Dictionary containing training history
+    """
+    model.to(device)
+    history = {'loss': []}
+    
+    for epoch in range(num_epochs):
+        model.train()
+        epoch_loss = 0.0
+        
+        for batch_idx, (data, target) in enumerate(train_loader):
+            data, target = data.to(device), target.to(device)
+            
+            optimizer.zero_grad()
+            output = model(data)
+            loss = criterion(output, target)
+            
+            loss.backward()
+            optimizer.step()
+            
+            epoch_loss += loss.item()
+        
+        avg_epoch_loss = epoch_loss / len(train_loader)
+        history['loss'].append(avg_epoch_loss)
+        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_epoch_loss:.4f}')
+    
+    return history