From 2e1b9f533199105f25788b83414794c78fa219f5 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 26 Jan 2025 17:44:51 +0000 Subject: [PATCH 1/2] feat: Implement Learnable Gated Pooling model with training and evaluation scripts - Add LearnableGatedPooling model implementation - Implement training loop with validation - Add evaluation metrics calculation - Create data preprocessing utilities - Update requirements.txt with dependencies --- requirements.txt | 4 ++- src/evaluate.py | 33 ++++++++++++++++++++++++ src/main.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++- src/models.py | 17 ++++++++++++ src/preprocess.py | 47 +++++++++++++++++++++++++++++++++ src/train.py | 45 ++++++++++++++++++++++++++++++++ 6 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 src/models.py diff --git a/requirements.txt b/requirements.txt index 24ce15a..b8a46ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ -numpy +numpy>=1.21.0 +torch>=2.0.0 +scikit-learn>=1.0.0 diff --git a/src/evaluate.py b/src/evaluate.py index e69de29..c15db36 100644 --- a/src/evaluate.py +++ b/src/evaluate.py @@ -0,0 +1,33 @@ +import torch +from sklearn.metrics import accuracy_score, precision_recall_fscore_support + +def evaluate_model(model, test_loader, device): + """ + Evaluate the trained model on test data + """ + model.eval() + all_preds = [] + all_targets = [] + + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + pred = output.argmax(dim=1).cpu().numpy() + all_preds.extend(pred) + all_targets.extend(target.cpu().numpy()) + + # Calculate metrics + accuracy = accuracy_score(all_targets, all_preds) + precision, recall, f1, _ = precision_recall_fscore_support( + all_targets, all_preds, average='weighted' + ) + + metrics = { + 'accuracy': accuracy, + 'precision': precision, + 'recall': recall, + 'f1': f1 + } + + return metrics diff --git a/src/main.py b/src/main.py index 0990720..13f642e 100644 --- a/src/main.py +++ b/src/main.py @@ -1 +1,65 @@ -print('Hello, World!') +import torch +import torch.nn as nn +import torch.optim as optim +import argparse +from models import LearnableGatedPooling +from train import train_model +from evaluate import evaluate_model +from preprocess import prepare_data + +def main(args): + # Set device + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + # Prepare data + train_loader, val_loader, test_loader = prepare_data( + args.data_path, + batch_size=args.batch_size + ) + + # Initialize model + model = LearnableGatedPooling( + input_dim=args.input_dim, + seq_len=args.seq_len + ) + + # Define loss function and optimizer + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) + + # Train model + model, history = train_model( + model=model, + train_loader=train_loader, + val_loader=val_loader, + criterion=criterion, + optimizer=optimizer, + num_epochs=args.num_epochs, + device=device + ) + + # Evaluate model + metrics = evaluate_model(model, test_loader, device) + + # Print results + print("\nEvaluation Results:") + for metric_name, value in metrics.items(): + print(f"{metric_name}: {value:.4f}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Train and evaluate LearnableGatedPooling model') + parser.add_argument('--data_path', type=str, default='data', + help='path to data directory') + parser.add_argument('--input_dim', type=int, default=768, + help='input dimension size') + parser.add_argument('--seq_len', type=int, default=10, + help='sequence length') + parser.add_argument('--batch_size', type=int, default=32, + help='batch size for training') + parser.add_argument('--learning_rate', type=float, default=0.001, + help='learning rate') + parser.add_argument('--num_epochs', type=int, default=10, + help='number of training epochs') + + args = parser.parse_args() + main(args) diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..b904df1 --- /dev/null +++ b/src/models.py @@ -0,0 +1,17 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class LearnableGatedPooling(nn.Module): + def __init__(self, input_dim, seq_len): + super(LearnableGatedPooling, self).__init__() + self.weights = nn.Parameter(torch.ones(input_dim)) + self.gate_linear = nn.Linear(input_dim, 1) # Linear layer for gating + + def forward(self, x): + # x: (batch_size, seq_len, input_dim) + weighted_x = x * self.weights + gate_values = torch.sigmoid(self.gate_linear(x)).squeeze(2) # (batch_size, seq_len) + gated_x = weighted_x * gate_values.unsqueeze(2) + pooled_vector = torch.mean(gated_x, dim=1) # Average pooling + return pooled_vector diff --git a/src/preprocess.py b/src/preprocess.py index e69de29..77e5642 100644 --- a/src/preprocess.py +++ b/src/preprocess.py @@ -0,0 +1,47 @@ +import torch +from torch.utils.data import Dataset, DataLoader +import numpy as np + +class SequenceDataset(Dataset): + """ + Dataset class for handling sequence data + """ + def __init__(self, sequences, labels): + self.sequences = torch.FloatTensor(sequences) + self.labels = torch.LongTensor(labels) + + def __len__(self): + return len(self.sequences) + + def __getitem__(self, idx): + return self.sequences[idx], self.labels[idx] + +def prepare_data(data_path, batch_size=32): + """ + Prepare data loaders for training, validation, and testing + """ + # Load and preprocess data + # Note: Implement actual data loading logic based on your data format + + # Example data creation (replace with actual data loading) + def create_dummy_data(num_samples, seq_len, input_dim): + sequences = np.random.randn(num_samples, seq_len, input_dim) + labels = np.random.randint(0, 2, num_samples) + return sequences, labels + + # Create datasets + train_sequences, train_labels = create_dummy_data(1000, 10, 768) + val_sequences, val_labels = create_dummy_data(200, 10, 768) + test_sequences, test_labels = create_dummy_data(200, 10, 768) + + # Create dataset objects + train_dataset = SequenceDataset(train_sequences, train_labels) + val_dataset = SequenceDataset(val_sequences, val_labels) + test_dataset = SequenceDataset(test_sequences, test_labels) + + # Create data loaders + train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=batch_size) + test_loader = DataLoader(test_dataset, batch_size=batch_size) + + return train_loader, val_loader, test_loader diff --git a/src/train.py b/src/train.py index e69de29..097f7a4 100644 --- a/src/train.py +++ b/src/train.py @@ -0,0 +1,45 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from models import LearnableGatedPooling + +def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device): + """ + Train the LearnableGatedPooling model + """ + model = model.to(device) + history = {'train_loss': [], 'val_loss': []} + + for epoch in range(num_epochs): + model.train() + train_loss = 0.0 + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + loss.backward() + optimizer.step() + + train_loss += loss.item() + + # Validation phase + model.eval() + val_loss = 0.0 + with torch.no_grad(): + for data, target in val_loader: + data, target = data.to(device), target.to(device) + output = model(data) + val_loss += criterion(output, target).item() + + # Record losses + train_loss /= len(train_loader) + val_loss /= len(val_loader) + history['train_loss'].append(train_loss) + history['val_loss'].append(val_loss) + + print(f'Epoch {epoch+1}/{num_epochs}:') + print(f'Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}') + + return model, history From 51816023e8d7c1c0c6250d90ee84379fca02c367 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 26 Jan 2025 18:10:07 +0000 Subject: [PATCH 2/2] fix: Add classification head and balance data distribution to improve model performance --- src/evaluate.py | 5 ++++- src/main.py | 3 ++- src/models.py | 6 ++++-- src/preprocess.py | 10 ++++++++-- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/evaluate.py b/src/evaluate.py index c15db36..f625ed3 100644 --- a/src/evaluate.py +++ b/src/evaluate.py @@ -20,7 +20,10 @@ def evaluate_model(model, test_loader, device): # Calculate metrics accuracy = accuracy_score(all_targets, all_preds) precision, recall, f1, _ = precision_recall_fscore_support( - all_targets, all_preds, average='weighted' + all_targets, + all_preds, + average='weighted', + zero_division=0 # Handle undefined recall ) metrics = { diff --git a/src/main.py b/src/main.py index 13f642e..fbb8fd0 100644 --- a/src/main.py +++ b/src/main.py @@ -20,7 +20,8 @@ def main(args): # Initialize model model = LearnableGatedPooling( input_dim=args.input_dim, - seq_len=args.seq_len + seq_len=args.seq_len, + num_classes=2 # Binary classification ) # Define loss function and optimizer diff --git a/src/models.py b/src/models.py index b904df1..826bf3a 100644 --- a/src/models.py +++ b/src/models.py @@ -3,10 +3,11 @@ import torch.nn.functional as F class LearnableGatedPooling(nn.Module): - def __init__(self, input_dim, seq_len): + def __init__(self, input_dim, seq_len, num_classes=2): super(LearnableGatedPooling, self).__init__() self.weights = nn.Parameter(torch.ones(input_dim)) self.gate_linear = nn.Linear(input_dim, 1) # Linear layer for gating + self.classifier = nn.Linear(input_dim, num_classes) # Classification head def forward(self, x): # x: (batch_size, seq_len, input_dim) @@ -14,4 +15,5 @@ def forward(self, x): gate_values = torch.sigmoid(self.gate_linear(x)).squeeze(2) # (batch_size, seq_len) gated_x = weighted_x * gate_values.unsqueeze(2) pooled_vector = torch.mean(gated_x, dim=1) # Average pooling - return pooled_vector + logits = self.classifier(pooled_vector) # (batch_size, num_classes) + return logits diff --git a/src/preprocess.py b/src/preprocess.py index 77e5642..d6ee91a 100644 --- a/src/preprocess.py +++ b/src/preprocess.py @@ -25,8 +25,14 @@ def prepare_data(data_path, batch_size=32): # Example data creation (replace with actual data loading) def create_dummy_data(num_samples, seq_len, input_dim): - sequences = np.random.randn(num_samples, seq_len, input_dim) - labels = np.random.randint(0, 2, num_samples) + half_samples = num_samples // 2 + sequences_0 = np.random.randn(half_samples, seq_len, input_dim) + labels_0 = np.zeros(half_samples, dtype=int) + sequences_1 = np.random.randn(num_samples - half_samples, seq_len, input_dim) + labels_1 = np.ones(num_samples - half_samples, dtype=int) + + sequences = np.concatenate([sequences_0, sequences_1], axis=0) + labels = np.concatenate([labels_0, labels_1], axis=0) return sequences, labels # Create datasets