diff --git a/.gitignore b/.gitignore index 2e0bd24..6f7d5e5 100644 --- a/.gitignore +++ b/.gitignore @@ -172,4 +172,5 @@ cython_debug/ !.gitkeep data/ -models/ +models/* +!models/.gitkeep diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000..87275e7 --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,9 @@ +# Model parameters +input_dim: 768 +seq_len: 10 +batch_size: 32 +learning_rate: 0.001 +num_epochs: 10 + +# Data parameters +data_path: "data/" diff --git a/models/.gitkeep b/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt index 24ce15a..28fd334 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ numpy +torch>=2.0.0 +pyyaml>=6.0.0 diff --git a/src/evaluate.py b/src/evaluate.py index e69de29..0306947 100644 --- a/src/evaluate.py +++ b/src/evaluate.py @@ -0,0 +1,26 @@ +import torch +from models import LearnableGatedPooling + +def evaluate_model(model, test_loader, criterion, device='cuda'): + """ + Evaluation function for the LearnableGatedPooling model + + Args: + model: Trained LearnableGatedPooling model instance + test_loader: DataLoader for test data + criterion: Loss function + device: Device to evaluate on ('cuda' or 'cpu') + """ + model = model.to(device) + model.eval() + test_loss = 0.0 + + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + test_loss += criterion(output, target).item() + + avg_test_loss = test_loss / len(test_loader) + print(f'Test Loss: {avg_test_loss:.4f}') + return avg_test_loss diff --git a/src/main.py b/src/main.py index 0990720..350aa5c 100644 --- a/src/main.py +++ b/src/main.py @@ -1 +1,55 @@ -print('Hello, World!') +import torch +import torch.nn as nn +import torch.optim as optim +from models import LearnableGatedPooling +from train import train_model +from evaluate import evaluate_model +from preprocess import prepare_data +import yaml +import os + +def main(): + # Load configuration + with open('config/config.yaml', 'r') as f: + config = yaml.safe_load(f) + + # Set device + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + # Prepare data + train_loader, val_loader, test_loader = prepare_data( + data_path=config['data_path'], + batch_size=config['batch_size'] + ) + + # Initialize model + model = LearnableGatedPooling( + input_dim=config['input_dim'], + seq_len=config['seq_len'] + ) + + # Define loss function and optimizer + criterion = nn.MSELoss() + optimizer = optim.Adam(model.parameters(), lr=config['learning_rate']) + + # Train model + train_model( + model=model, + train_loader=train_loader, + val_loader=val_loader, + criterion=criterion, + optimizer=optimizer, + num_epochs=config['num_epochs'], + device=device + ) + + # Evaluate model + evaluate_model( + model=model, + test_loader=test_loader, + criterion=criterion, + device=device + ) + +if __name__ == '__main__': + main() diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..847ca58 --- /dev/null +++ b/src/models.py @@ -0,0 +1,18 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class LearnableGatedPooling(nn.Module): + def __init__(self, input_dim, seq_len): + super(LearnableGatedPooling, self).__init__() + self.weights = nn.Parameter(torch.ones(input_dim)) + self.gate_linear = nn.Linear(input_dim, 1) # Linear layer for gating + + def forward(self, x): + # x: (batch_size, seq_len, input_dim) + weighted_x = x * self.weights + gate_values = torch.sigmoid(self.gate_linear(x)).squeeze(2) # (batch_size, seq_len) + gated_x = weighted_x * gate_values.unsqueeze(2) + + pooled_vector = torch.mean(gated_x, dim=1) # Average pooling + return pooled_vector diff --git a/src/preprocess.py b/src/preprocess.py index e69de29..39ba897 100644 --- a/src/preprocess.py +++ b/src/preprocess.py @@ -0,0 +1,48 @@ +import torch +from torch.utils.data import Dataset, DataLoader + +class SequenceDataset(Dataset): + """ + Dataset class for handling sequence data + """ + def __init__(self, sequences, labels): + self.sequences = sequences + self.labels = labels + + def __len__(self): + return len(self.sequences) + + def __getitem__(self, idx): + return self.sequences[idx], self.labels[idx] + +def prepare_data(data_path, batch_size=32): + """ + Prepare data loaders for training, validation, and testing + + Args: + data_path: Path to the data directory + batch_size: Batch size for DataLoader + + Returns: + train_loader, val_loader, test_loader + """ + # Load and preprocess data + # This is a placeholder - implement actual data loading logic based on your data format + train_sequences = torch.randn(1000, 10, 768) # Example dimensions + train_labels = torch.randn(1000, 768) + val_sequences = torch.randn(200, 10, 768) + val_labels = torch.randn(200, 768) + test_sequences = torch.randn(200, 10, 768) + test_labels = torch.randn(200, 768) + + # Create datasets + train_dataset = SequenceDataset(train_sequences, train_labels) + val_dataset = SequenceDataset(val_sequences, val_labels) + test_dataset = SequenceDataset(test_sequences, test_labels) + + # Create data loaders + train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=batch_size) + test_loader = DataLoader(test_dataset, batch_size=batch_size) + + return train_loader, val_loader, test_loader diff --git a/src/train.py b/src/train.py index e69de29..6696ded 100644 --- a/src/train.py +++ b/src/train.py @@ -0,0 +1,54 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from models import LearnableGatedPooling + +def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device='cuda'): + """ + Training function for the LearnableGatedPooling model + + Args: + model: LearnableGatedPooling model instance + train_loader: DataLoader for training data + val_loader: DataLoader for validation data + criterion: Loss function + optimizer: Optimizer instance + num_epochs: Number of training epochs + device: Device to train on ('cuda' or 'cpu') + """ + model = model.to(device) + best_val_loss = float('inf') + + for epoch in range(num_epochs): + # Training phase + model.train() + train_loss = 0.0 + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + loss.backward() + optimizer.step() + + train_loss += loss.item() + + # Validation phase + model.eval() + val_loss = 0.0 + with torch.no_grad(): + for data, target in val_loader: + data, target = data.to(device), target.to(device) + output = model(data) + val_loss += criterion(output, target).item() + + # Print epoch statistics + print(f'Epoch {epoch+1}/{num_epochs}:') + print(f'Training Loss: {train_loss/len(train_loader):.4f}') + print(f'Validation Loss: {val_loss/len(val_loader):.4f}') + + # Save best model + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), 'models/best_model.pth')