In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from warnings import filterwarnings

filterwarnings('ignore')

In [2]:
from util.helpers import *

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_device_memory_report(device):
    print(f'Device: {device} [{torch.cuda.get_device_name(device)}]')
    free_memory, total_memory = torch.cuda.mem_get_info(device)
    
    free_memory_gb = free_memory / (1024 ** 3)
    total_memory_gb = total_memory / (1024 ** 3)
    
    print(f"Free Memory: {free_memory_gb:.2f}/{total_memory_gb:.2f} GB [{free_memory / total_memory * 100:.2f}%]")

get_device_memory_report(device)

Device: cuda [NVIDIA RTX 6000 Ada Generation]
Free Memory: 44.73/47.50 GB [94.17%]


In [4]:
positive_encodings, negative_encodings = load_complete_dataset(n_files=10)

100%|██████████| 10/10 [00:01<00:00,  8.75it/s]
100%|██████████| 10/10 [00:00<00:00, 16.05it/s]
100%|██████████| 10/10 [00:00<00:00, 17.22it/s]
100%|██████████| 10/10 [00:00<00:00, 11.20it/s]
100%|██████████| 10/10 [00:00<00:00, 12.94it/s]
100%|██████████| 10/10 [00:00<00:00, 16.70it/s]
100%|██████████| 10/10 [00:00<00:00, 12.10it/s]
100%|██████████| 10/10 [00:00<00:00, 12.22it/s]
100%|██████████| 10/10 [00:01<00:00,  7.92it/s]
100%|██████████| 10/10 [00:00<00:00, 12.22it/s]


In [11]:
class MLPClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.01)
        self.sinu = nn.SiLU()
        self.batchnorm = nn.BatchNorm1d(hidden_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.batchnorm(x)
        x = self.sinu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.batchnorm(x)
        x = self.sinu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

In [12]:
batch_size = 16

dataset = torch.concat([positive_encodings, negative_encodings], dim=0)
labels = torch.tensor([1] * len(positive_encodings) + [0] * len(negative_encodings))

dataset = torch.utils.data.TensorDataset(dataset, labels)

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8 * len(dataset))])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [15]:
learning_rate = 2 * 1e-6
num_epochs = 10
input_size = 2048
hidden_size = 1024
n_classes = 2

model = MLPClassifier(input_size, hidden_size, n_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for i, (data, labels) in enumerate(train_loader):

        data = data.to(device).float()
        labels = labels.to(device).long()

        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'[Epoch {epoch + 1}] | Loss: {round(loss.item(), 3)}')

[Epoch 1] | Loss: 0.644
[Epoch 2] | Loss: 0.65
[Epoch 3] | Loss: 0.554
[Epoch 4] | Loss: 0.47
[Epoch 5] | Loss: 0.5
[Epoch 6] | Loss: 0.484
[Epoch 7] | Loss: 0.517
[Epoch 8] | Loss: 0.478
[Epoch 9] | Loss: 0.509
[Epoch 10] | Loss: 0.47


In [16]:
train_acc = 0

with torch.no_grad():
    for data, labels in test_loader:
        data = data.to(device).float()
        labels = labels.to(device).long()

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        train_acc += (predicted == labels).sum().item()

print(f'Test Accuracy: {round(train_acc / len(test_dataset), 3)}')

Test Accuracy: 0.81


In [17]:
test_acc = 0
with torch.no_grad():
    for data, labels in test_loader:
        data = data.to(device).float()
        labels = labels.to(device).long()

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        test_acc += (predicted == labels).sum().item()

print(f'Test Accuracy: {round(test_acc / len(test_dataset), 3)}')

Test Accuracy: 0.81


In [18]:
positive_encodings_prompt, negative_encodings_prompt = load_complete_dataset(n_files=10, add_grammar_prompt=True)

100%|██████████| 10/10 [00:01<00:00,  6.07it/s]
100%|██████████| 10/10 [00:01<00:00,  6.81it/s]
100%|██████████| 10/10 [00:01<00:00,  7.09it/s]
100%|██████████| 10/10 [00:01<00:00,  5.53it/s]
100%|██████████| 10/10 [00:01<00:00,  6.06it/s]
100%|██████████| 10/10 [00:01<00:00,  6.96it/s]
100%|██████████| 10/10 [00:01<00:00,  5.73it/s]
100%|██████████| 10/10 [00:01<00:00,  5.82it/s]
100%|██████████| 10/10 [00:02<00:00,  4.73it/s]
100%|██████████| 10/10 [00:01<00:00,  5.75it/s]


In [19]:
batch_size = 16

dataset_prompt = torch.concat([positive_encodings_prompt, negative_encodings_prompt], dim=0)
labels_prompt = torch.tensor([1] * len(positive_encodings_prompt) + [0] * len(negative_encodings_prompt))

dataset_prompt = torch.utils.data.TensorDataset(dataset_prompt, labels_prompt)

train_dataset, test_dataset = torch.utils.data.random_split(dataset_prompt, [int(0.8 * len(dataset_prompt)), len(dataset_prompt) - int(0.8 * len(dataset_prompt))])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [21]:
model_prompt = MLPClassifier(input_size, hidden_size, n_classes)
model_prompt = model_prompt.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_prompt.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for i, (data, labels) in enumerate(train_loader):

        data = data.to(device).float()
        labels = labels.to(device).long()

        optimizer.zero_grad()
        outputs = model_prompt(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'[Epoch {epoch + 1}] | Loss: {round(loss.item(), 3)}')

[Epoch 1] | Loss: 0.66
[Epoch 2] | Loss: 0.544
[Epoch 3] | Loss: 0.577
[Epoch 4] | Loss: 0.479
[Epoch 5] | Loss: 0.457
[Epoch 6] | Loss: 0.588
[Epoch 7] | Loss: 0.507
[Epoch 8] | Loss: 0.426
[Epoch 9] | Loss: 0.612
[Epoch 10] | Loss: 0.516


In [22]:
train_acc = 0

with torch.no_grad():
    for data, labels in test_loader:
        data = data.to(device).float()
        labels = labels.to(device).long()

        outputs = model_prompt(data)
        _, predicted = torch.max(outputs.data, 1)
        train_acc += (predicted == labels).sum().item()

print(f'Test Accuracy: {round(train_acc / len(test_dataset), 3)}')

Test Accuracy: 0.809


In [23]:
test_acc = 0
with torch.no_grad():
    for data, labels in test_loader:
        data = data.to(device).float()
        labels = labels.to(device).long()

        outputs = model_prompt(data)
        _, predicted = torch.max(outputs.data, 1)
        test_acc += (predicted == labels).sum().item()

print(f'Test Accuracy: {round(test_acc / len(test_dataset), 3)}')

Test Accuracy: 0.809


In [24]:
positive_sentences, negative_sentences = get_blimp_dataset()

In [None]:
def get_labels(sentences, model, tokenizer):
    pass