In [4]:
from dataProcessing import *
from conv_class import *
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from sklearn.model_selection import train_test_split


class VectorDataset(Dataset):
    def __init__(self, data_list, target_list):
        self.data_list = data_list
        self.target_list = target_list

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx], self.target_list[idx]

class EarlyStopping:
    def __init__(self, patience=7, delta=0):
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.delta = delta
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        torch.save(model.state_dict(), 'checkpoint.pt')
        print(f'Validation loss decreased ({-self.best_score:.6f} --> {val_loss:.6f}).  Saving model ...')

# Load data
train_texts, train_labels = load_data('../aclImdb_data/train')
test_texts, test_labels = load_data('../aclImdb_data/test')

# Preprocess texts
train_texts = preprocess_text(train_texts)
test_texts = preprocess_text(test_texts)

# Train a Word2Vec model
w2v_model = w2v_train(train_texts)

# Convert texts to vectors

max_length = 1519
train_data = [text_to_vec_gen(text, w2v_model, max_length) for text in train_texts]
test_data = [text_to_vec_gen(text, w2v_model, max_length) for text in test_texts]

# Divide the training set and validation set
train_data, val_data, train_labels, val_labels = train_test_split(
    train_data, train_labels, test_size=0.2, random_state=42)

# Convert lists to tensors
train_data = [torch.tensor(vec).unsqueeze(0) for vec in train_data]
val_data = [torch.tensor(vec).unsqueeze(0) for vec in val_data]
test_data = [torch.tensor(vec).unsqueeze(0) for vec in test_data]

# Create DataLoader
train_loader = DataLoader(VectorDataset(train_data, train_labels), batch_size=32)
val_loader = DataLoader(VectorDataset(val_data, val_labels), batch_size=32)
test_loader = DataLoader(VectorDataset(test_data, test_labels), batch_size=32)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ConvLSTM(input_dim =100, hidden_dim = 128 , kernel_size = (5,5), num_layers =2)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
early_stopping = EarlyStopping(patience=10)

n_epochs = 100

for epoch in range(1, n_epochs + 1):

    # Training loop here
    model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs, _ = model(inputs)

        # Compute loss
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)

    train_loss /= len(train_loader.dataset)

    # Validation loop here
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs, _ = model(inputs)

            # Compute loss
            loss = criterion(outputs, targets)

            val_loss += loss.item() * inputs.size(0)

    val_loss /= len(val_loader.dataset)

    # Print training and validation loss
    print(f'Epoch: {epoch} \tTraining Loss: {train_loss:.6f} \tValidation Loss: {val_loss:.6f}')

    # Early stopping check
    early_stopping(val_loss, model)

    if early_stopping.early_stop:
        print("Early stopping")
        break

# Load the last checkpoint with the best model
model.load_state_dict(torch.load('checkpoint.pt'))

NameError: name 'text_to_vec_gen' is not defined