In [1]:
import os
import pickle
import re
import numpy as np 
import pandas as pd 
import warnings
import itertools

import statsmodels.api as sm
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

from sentence_transformers import SentenceTransformer
from datasets import Dataset, load_dataset
from transformers import BertTokenizer
import torch
from utils.adapter import BERTAdapter
from utils.feed_forward import FeedForward
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Set Device ##########################################################
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#######################################################################

## Data

In [2]:
# Load Data & Sentence Transformer
data = load_dataset("glue", "cola")
mpnetv2 = SentenceTransformer("all-mpnet-base-v2").to(device)

X_train = mpnetv2.encode(data["train"]["sentence"])
X_val = mpnetv2.encode(data["validation"]["sentence"])
X_test = mpnetv2.encode(data["test"]["sentence"])

Y_train = np.array(data["train"]["label"])
Y_val = np.array(data["validation"]["label"])
Y_test = np.array(data["test"]["label"])

data



DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 8551
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1043
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1063
    })
})

## Sentence Transformer + FFN

In [None]:
param_grid = {
    'num_epochs': [100],
    'batch_size': [32, 128, 512],
    'learning_rate': [1e-2, 1e-3, 1e-4, 1e-5],
    'category': ['C'],
    'norm': [False],
    'size': [768],
    'num_layers': [1, 2, 3],
    'weight_decay':[1e-2, 1e-3, 1e-4, 1e-5],
    'patience': [3],
    'min_delta': [0],
    'device': ['cuda:0']
}

# Create a list of all combinations of hyperparameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

best_params = None
highest_val_accuracy = 0

# Iterate over all combinations of hyperparameters
for params in all_params:
    print("Training with parameters:", params)
    # Initialize the model with current set of hyperparameters
    feed_forward = FeedForward(**params)
    
    _, _, val_accuracy = feed_forward.fit(X_train, Y_train, X_val, Y_val)
    print("Validation accuracy:", val_accuracy)
    
    # Save the parameters if they provide a better accuracy
    if val_accuracy > highest_val_accuracy:
        highest_val_accuracy = val_accuracy
        best_params = params

# Print the best parameters
print("Best Parameters:", best_params)
print("Highest Validation Accuracy:", highest_val_accuracy)

In [4]:
best_feed_forward = FeedForward(num_epochs=100,
                                batch_size=128,
                                learning_rate=5e-5,
                                category='C',
                                norm=False,
                                size=768,
                                num_layers=1,
                                weight_decay=0.0001,
                                patience=100,
                                min_delta=0,
                                device="cuda:0")

X = np.concatenate((X_train, X_val), axis=0)
Y = np.concatenate((Y_train, Y_val), axis=0)

best_feed_forward.fit(X, Y)

preds = np.argmax(best_feed_forward.predict_proba(X_test), axis=1)
print(preds.shape)


df = pd.DataFrame({
    'index': range(len(preds)),
    'prediction': preds
})

# Write the DataFrame to a .tsv file, without the header and index
df.to_csv('CoLA.tsv', sep='\t', index=False, header=True)

Epoch : 1/100 | Training Loss : 50.235904812812805
Epoch : 2/100 | Training Loss : 48.14344102144241
Epoch : 3/100 | Training Loss : 46.77004659175873
Epoch : 4/100 | Training Loss : 46.19079315662384
Epoch : 5/100 | Training Loss : 45.90345078706741
Epoch : 6/100 | Training Loss : 45.66457623243332
Epoch : 7/100 | Training Loss : 45.43268531560898
Epoch : 8/100 | Training Loss : 45.205299615859985
Epoch : 9/100 | Training Loss : 44.977967262268066
Epoch : 10/100 | Training Loss : 44.739410281181335
Epoch : 11/100 | Training Loss : 44.50588530302048
Epoch : 12/100 | Training Loss : 44.26710081100464
Epoch : 13/100 | Training Loss : 44.01945984363556
Epoch : 14/100 | Training Loss : 43.77765375375748
Epoch : 15/100 | Training Loss : 43.54137274622917
Epoch : 16/100 | Training Loss : 43.30214911699295
Epoch : 17/100 | Training Loss : 43.07091397047043
Epoch : 18/100 | Training Loss : 42.857427567243576
Epoch : 19/100 | Training Loss : 42.629251569509506
Epoch : 20/100 | Training Loss : 4