In [18]:
import torch
import torch.nn as nn
from torch.nn import Linear
import torch.nn.functional as F
import pickle
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from pytorch_metric_learning.losses import NTXentLoss
import pandas as pd
import numpy as np

In [16]:
with open('cls_emb.pkl', 'rb') as f:
    cls = pickle.load(f)
with open('feature_vectors.pkl', 'rb')as f:
    feature_vectors= pickle.load(f)

In [30]:
cls[0].size()

torch.Size([1, 768])

In [3]:
response_df = pd.read_csv('final_data.csv')
map_dict = {'llama3.1-70b':0, 'mistral':1, 'gpt-4o-2024-05-13':2}
response_df['model_nums'] = response_df['model'].map(map_dict)

In [45]:
embeddings = [torch.cat((cls[i].float(), torch.from_numpy(feature_vectors[i]).unsqueeze(0).float()), dim=1) for i in range(len(cls))]

In [46]:
def extract_and_split(response_df, embeddings, temperature):
    temp_idx = response_df[response_df['temperature'] == temperature].index
    temp_embs = [embeddings[idx] for idx in temp_idx]
    temp_targs = [response_df['model_nums'][idx] for idx in temp_idx]
    
    return train_test_split(temp_embs, temp_targs, test_size=0.1, random_state=42)
    
temp_0_train, temp_0_test, temp_0_targs_train, temp_0_targs_test = extract_and_split(response_df, embeddings, 0)
temp_7_train, temp_7_test, temp_7_targs_train, temp_7_targs_test = extract_and_split(response_df, embeddings, 0.7)
temp_14_train, temp_14_test, temp_14_targs_train, temp_14_targs_test = extract_and_split(response_df, embeddings, 1.4)
temp_all_train, temp_all_test, temp_all_targs_train, temp_all_targs_test = train_test_split(embeddings, response_df['model_nums'], 
                                                                                            test_size=0.1, random_state=42)

In [47]:
class FAM(nn.Module):
    def __init__(self, embed_size, hidden_size, hidden_dropout_prob):
        super().__init__()
        self.dropout = nn.Dropout(hidden_dropout_prob)
        self.fc = nn.Linear(embed_size, hidden_size)
        
    def init_weights(self):
        initrange = 0.2
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()


    def forward(self, text):
        batch,  dim = text.size()
        feat = self.fc(torch.tanh(self.dropout(text.view(batch, dim))))
        feat = F.normalize(feat, dim=1)
        return feat

In [48]:
class Projection(nn.Module):
    def __init__(self, hidden_size, projection_size):
        super().__init__()
        self.fc = nn.Linear(hidden_size, projection_size)
        self.ln = nn.LayerNorm(projection_size)
        self.bn = nn.BatchNorm1d(projection_size)
        self.init_weights()
    def init_weights(self):
        initrange = 0.01
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()


    def forward(self, text):
        batch,  dim = text.size()
        return self.ln(self.fc(torch.tanh(text.view(batch, dim))))

In [49]:
class SupConHead(nn.Module):
    """backbone + projection head"""
    def __init__(self, head='mlp', dim_in=128, feat_dim=64):
        super(SupConHead, self).__init__()
        
        if head == 'linear':
            self.head = nn.Linear(dim_in, feat_dim)
        elif head == 'mlp':
            self.head = nn.Sequential(
                nn.Linear(dim_in, dim_in),
                nn.Tanh(),
                nn.Linear(dim_in, feat_dim)
            )
        else:
            raise NotImplementedError(
                'head not supported: {}'.format(head))
    def forward(self, x):
        feat = F.normalize(self.head(x), dim=1)
        return feat

In [50]:
class Classifier(nn.Module):
    def __init__(self, hidden_size, num_class, hidden_dropout_prob):
        super().__init__()
        self.dropout = nn.Dropout(hidden_dropout_prob)
        self.fc = nn.Linear(hidden_size, num_class)
        self.init_weights()

    def init_weights(self):
        initrange = 0.02
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, feature):
        return self.fc(torch.tanh(feature))

In [51]:
class WordEmbeddingDataset(Dataset):
    def __init__(self, cls_embs, targs):
        self.cls_embs = cls_embs
        self.targs = targs 

    def __len__(self):
        return len(self.cls_embs)

    def __getitem__(self, idx):
        return self.cls_embs[idx], self.targs[idx]

In [52]:
BATCH_SIZE = 100
dataset_0 = WordEmbeddingDataset(temp_0_train, temp_0_targs_train)
dataset_0_test = WordEmbeddingDataset(temp_0_test, temp_0_targs_test)

dataset_7 =  WordEmbeddingDataset(temp_7_train, temp_7_targs_train)
dataset_7_test = WordEmbeddingDataset(temp_7_test, temp_7_targs_test)

dataset_14 = WordEmbeddingDataset(temp_14_train, temp_14_targs_train)
dataset_14_test = WordEmbeddingDataset(temp_14_test, temp_14_targs_test)

dataset_all = WordEmbeddingDataset(temp_all_train, temp_all_targs_train)
dataset_all_test = WordEmbeddingDataset(temp_all_test, temp_all_targs_test)

data_loader_0 = DataLoader(dataset_0, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
data_loader_0_test = DataLoader(dataset_0_test, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

data_loader_7 = DataLoader(dataset_7, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
data_loader_7_test = DataLoader(dataset_7_test, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

data_loader_14 = DataLoader(dataset_14, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
data_loader_14_test = DataLoader(dataset_14_test, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

data_loader_all = DataLoader(dataset_all, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
data_loader_all_test = DataLoader(dataset_all_test, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

In [53]:
def train(fa_module, proj_module, supconhead_module, classifier, data_loader):
    fa_module.train()
    proj_module.train()
    supconhead_module.train()
    classifier.train()
    contrastive_loss = nn.CrossEntropyLoss()
    batch_acc_cumulative = 0
    n_batches = 0
    train_loss=0
    fa_module.train()
    for _, data in tqdm(enumerate(data_loader)):
        n_batches += 1
        optimizer.zero_grad()
        cls_embs = data[0].squeeze(1) 
        targets = data[1]
        fam_output = fa_module(cls_embs)
        #proj_output = proj_module(fam_output)
        #supcon_output = supconhead_module(proj_output)
        #final_output = classifier(supcon_output)
        #final_output = classifier(proj_output)
        final_output = classifier(fam_output)
        loss = contrastive_loss(final_output, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
        batch_predictions = final_output.argmax(1)
        batch_acc = (batch_predictions == targets).sum().item() / BATCH_SIZE
        batch_acc_cumulative += batch_acc
    average_acc = batch_acc_cumulative / n_batches
    print('Average Accuracy: ' + str("{:.2f}".format(batch_acc_cumulative*100 / n_batches)+'%'))
    return train_loss / n_batches, average_acc

In [54]:
def evaluate(fa_module, proj_module, supconhead_module, classifier, data_loader):
    fa_module.eval()  
    proj_module.eval()
    supconhead_module.eval()
    classifier.eval()
    
    correct = 0
    total = 0
    with torch.no_grad():  
        for data in data_loader:
            cls_embs = data[0].squeeze(1)  
            targets = data[1].tolist()
            fam_output = fa_module(cls_embs)
            #proj_output = proj_module(fam_output)
            #supcon_output = supconhead_module(proj_output)
            #final_output = classifier(supcon_output)
            final_output = classifier(fam_output)
            preds = final_output.argmax(1).tolist()
            
            total += len(preds) 
            for i in range(len(preds)):
                if preds[i] == targets[i]:
                    correct += 1

    accuracy = correct / total 
    print(f'Test Accuracy: {accuracy:.4f}')
    return accuracy

In [61]:
fam_0 = FAM(797, 256, 0.3)
proj_0 = Projection(256, 128)
supcon_0 = SupConHead()
classifier_0 = Classifier(256, 3, 0.3)
optimizer = torch.optim.Adam(list(fam_0.parameters()) + 
                             list(proj_0.parameters()) + 
                             list(supcon_0.parameters()) + 
                             list(classifier_0.parameters()), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
for epoch in range(1, 50):
    loss, acc = train(fam_0, proj_0, supcon_0, classifier_0, data_loader_0)  
    print(f'Epoch {epoch}, Loss: {loss:.4f}')
    scheduler.step()
test_accuracy = evaluate(fam_0, proj_0, supcon_0, classifier_0, data_loader_0_test)

43it [00:00, 261.80it/s]


Average Accuracy: 35.81%
Epoch 1, Loss: 1.0961


43it [00:00, 307.07it/s]


Average Accuracy: 39.51%
Epoch 2, Loss: 1.0877


43it [00:00, 314.99it/s]


Average Accuracy: 42.14%
Epoch 3, Loss: 1.0668


43it [00:00, 268.67it/s]


Average Accuracy: 45.81%
Epoch 4, Loss: 1.0424


43it [00:00, 283.80it/s]


Average Accuracy: 47.19%
Epoch 5, Loss: 1.0221


43it [00:00, 282.82it/s]


Average Accuracy: 47.14%
Epoch 6, Loss: 1.0204


43it [00:00, 283.79it/s]


Average Accuracy: 48.67%
Epoch 7, Loss: 1.0142


43it [00:00, 287.15it/s]


Average Accuracy: 46.26%
Epoch 8, Loss: 1.0255


43it [00:00, 314.92it/s]


Average Accuracy: 49.53%
Epoch 9, Loss: 1.0025


43it [00:00, 302.73it/s]


Average Accuracy: 49.35%
Epoch 10, Loss: 1.0046


43it [00:00, 262.90it/s]


Average Accuracy: 49.86%
Epoch 11, Loss: 0.9961


43it [00:00, 255.88it/s]


Average Accuracy: 49.19%
Epoch 12, Loss: 0.9995


43it [00:00, 251.35it/s]


Average Accuracy: 50.07%
Epoch 13, Loss: 0.9979


43it [00:00, 248.62it/s]


Average Accuracy: 49.28%
Epoch 14, Loss: 0.9961


43it [00:00, 303.87it/s]


Average Accuracy: 50.70%
Epoch 15, Loss: 0.9921


43it [00:00, 310.44it/s]


Average Accuracy: 49.91%
Epoch 16, Loss: 0.9956


43it [00:00, 307.06it/s]


Average Accuracy: 50.44%
Epoch 17, Loss: 0.9890


43it [00:00, 310.45it/s]


Average Accuracy: 50.53%
Epoch 18, Loss: 0.9843


43it [00:00, 290.43it/s]


Average Accuracy: 51.72%
Epoch 19, Loss: 0.9845


43it [00:00, 249.34it/s]


Average Accuracy: 50.00%
Epoch 20, Loss: 0.9925


43it [00:00, 219.69it/s]


Average Accuracy: 51.58%
Epoch 21, Loss: 0.9810


43it [00:00, 237.50it/s]


Average Accuracy: 52.19%
Epoch 22, Loss: 0.9742


43it [00:00, 252.81it/s]


Average Accuracy: 52.14%
Epoch 23, Loss: 0.9722


43it [00:00, 245.67it/s]


Average Accuracy: 50.30%
Epoch 24, Loss: 0.9793


43it [00:00, 272.97it/s]


Average Accuracy: 51.98%
Epoch 25, Loss: 0.9787


43it [00:00, 313.71it/s]


Average Accuracy: 50.86%
Epoch 26, Loss: 0.9816


43it [00:00, 297.43it/s]


Average Accuracy: 51.77%
Epoch 27, Loss: 0.9761


43it [00:00, 273.82it/s]


Average Accuracy: 51.63%
Epoch 28, Loss: 0.9759


43it [00:00, 244.23it/s]


Average Accuracy: 51.91%
Epoch 29, Loss: 0.9722


43it [00:00, 252.11it/s]


Average Accuracy: 52.60%
Epoch 30, Loss: 0.9683


43it [00:00, 300.62it/s]


Average Accuracy: 52.47%
Epoch 31, Loss: 0.9642


43it [00:00, 319.68it/s]


Average Accuracy: 51.33%
Epoch 32, Loss: 0.9779


43it [00:00, 262.57it/s]


Average Accuracy: 52.00%
Epoch 33, Loss: 0.9723


43it [00:00, 248.46it/s]


Average Accuracy: 51.79%
Epoch 34, Loss: 0.9759


43it [00:00, 291.49it/s]


Average Accuracy: 51.37%
Epoch 35, Loss: 0.9707


43it [00:00, 326.92it/s]


Average Accuracy: 51.09%
Epoch 36, Loss: 0.9782


43it [00:00, 338.48it/s]


Average Accuracy: 52.56%
Epoch 37, Loss: 0.9727


43it [00:00, 339.89it/s]


Average Accuracy: 52.07%
Epoch 38, Loss: 0.9749


43it [00:00, 329.43it/s]


Average Accuracy: 52.07%
Epoch 39, Loss: 0.9690


43it [00:00, 323.10it/s]


Average Accuracy: 52.28%
Epoch 40, Loss: 0.9758


43it [00:00, 334.57it/s]


Average Accuracy: 51.60%
Epoch 41, Loss: 0.9704


43it [00:00, 329.44it/s]


Average Accuracy: 51.79%
Epoch 42, Loss: 0.9697


43it [00:00, 320.81it/s]


Average Accuracy: 52.21%
Epoch 43, Loss: 0.9646


43it [00:00, 350.91it/s]


Average Accuracy: 53.35%
Epoch 44, Loss: 0.9595


43it [00:00, 342.04it/s]


Average Accuracy: 51.91%
Epoch 45, Loss: 0.9658


43it [00:00, 345.30it/s]


Average Accuracy: 52.30%
Epoch 46, Loss: 0.9653


43it [00:00, 335.88it/s]


Average Accuracy: 52.02%
Epoch 47, Loss: 0.9699


43it [00:00, 320.11it/s]


Average Accuracy: 51.65%
Epoch 48, Loss: 0.9728


43it [00:00, 307.08it/s]

Average Accuracy: 52.42%
Epoch 49, Loss: 0.9659
Test Accuracy: 0.5900





In [62]:
fam_7 = FAM(797, 256, 0.3)
proj_7 = Projection(256, 128)
supcon_7 = SupConHead()
classifier_7 = Classifier(256, 3, 0.3)
optimizer = torch.optim.Adam(list(fam_7.parameters()) + 
                             list(proj_7.parameters()) + 
                             list(supcon_7.parameters()) + 
                             list(classifier_7.parameters()), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
for epoch in range(1, 15):
    loss, acc = train(fam_7, proj_7, supcon_7, classifier_7, data_loader_7)  
    print(f'Epoch {epoch}, Loss: {loss:.4f}')
    scheduler.step()
test_accuracy = evaluate(fam_7, proj_7, supcon_7, classifier_7, data_loader_7_test)

43it [00:00, 262.13it/s]


Average Accuracy: 34.23%
Epoch 1, Loss: 1.0959


43it [00:00, 300.63it/s]


Average Accuracy: 40.14%
Epoch 2, Loss: 1.0798


43it [00:00, 306.01it/s]


Average Accuracy: 43.77%
Epoch 3, Loss: 1.0582


43it [00:00, 299.60it/s]


Average Accuracy: 44.67%
Epoch 4, Loss: 1.0455


43it [00:00, 285.41it/s]


Average Accuracy: 46.16%
Epoch 5, Loss: 1.0319


43it [00:00, 281.91it/s]


Average Accuracy: 47.58%
Epoch 6, Loss: 1.0256


43it [00:00, 286.62it/s]


Average Accuracy: 46.65%
Epoch 7, Loss: 1.0337


43it [00:00, 312.58it/s]


Average Accuracy: 46.98%
Epoch 8, Loss: 1.0304


43it [00:00, 249.94it/s]


Average Accuracy: 46.84%
Epoch 9, Loss: 1.0226


43it [00:00, 242.86it/s]


Average Accuracy: 47.95%
Epoch 10, Loss: 1.0198


43it [00:00, 285.68it/s]


Average Accuracy: 48.40%
Epoch 11, Loss: 1.0109


43it [00:00, 310.99it/s]


Average Accuracy: 48.16%
Epoch 12, Loss: 1.0158


43it [00:00, 310.45it/s]


Average Accuracy: 48.30%
Epoch 13, Loss: 1.0140


43it [00:00, 313.75it/s]

Average Accuracy: 49.42%
Epoch 14, Loss: 1.0063
Test Accuracy: 0.5300





In [64]:
fam_14 = FAM(797, 256, 0.3)
proj_14 = Projection(256, 128)
supcon_14 = SupConHead()
classifier_14 = Classifier(256, 3, 0.3)
optimizer = torch.optim.Adam(list(fam_14.parameters()) + 
                             list(proj_14.parameters()) + 
                             list(supcon_14.parameters()) + 
                             list(classifier_14.parameters()), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
for epoch in range(1, 20):
    loss, acc = train(fam_14, proj_14, supcon_14, classifier_14, data_loader_14)  
    print(f'Epoch {epoch}, Loss: {loss:.4f}')
    scheduler.step()
test_accuracy = evaluate(fam_14, proj_14, supcon_14, classifier_14, data_loader_14_test)

44it [00:00, 276.02it/s]


Average Accuracy: 35.50%
Epoch 1, Loss: 1.0977


44it [00:00, 307.62it/s]


Average Accuracy: 37.80%
Epoch 2, Loss: 1.0895


44it [00:00, 298.25it/s]


Average Accuracy: 43.20%
Epoch 3, Loss: 1.0662


44it [00:00, 299.26it/s]


Average Accuracy: 45.80%
Epoch 4, Loss: 1.0394


44it [00:00, 306.59it/s]


Average Accuracy: 46.43%
Epoch 5, Loss: 1.0219


44it [00:00, 299.24it/s]


Average Accuracy: 48.14%
Epoch 6, Loss: 1.0172


44it [00:00, 292.32it/s]


Average Accuracy: 47.91%
Epoch 7, Loss: 1.0040


44it [00:00, 305.39it/s]


Average Accuracy: 47.39%
Epoch 8, Loss: 0.9982


44it [00:00, 306.60it/s]


Average Accuracy: 47.34%
Epoch 9, Loss: 1.0068


44it [00:00, 266.55it/s]


Average Accuracy: 48.73%
Epoch 10, Loss: 0.9893


44it [00:00, 296.13it/s]


Average Accuracy: 47.91%
Epoch 11, Loss: 0.9923


44it [00:00, 267.30it/s]


Average Accuracy: 48.89%
Epoch 12, Loss: 0.9892


44it [00:00, 287.36it/s]


Average Accuracy: 48.36%
Epoch 13, Loss: 0.9965


44it [00:00, 308.75it/s]


Average Accuracy: 48.30%
Epoch 14, Loss: 0.9910


44it [00:00, 307.64it/s]


Average Accuracy: 49.73%
Epoch 15, Loss: 0.9781


44it [00:00, 298.27it/s]


Average Accuracy: 48.00%
Epoch 16, Loss: 0.9914


44it [00:00, 307.63it/s]


Average Accuracy: 49.43%
Epoch 17, Loss: 0.9834


44it [00:00, 313.14it/s]


Average Accuracy: 49.41%
Epoch 18, Loss: 0.9835


44it [00:00, 241.65it/s]

Average Accuracy: 49.55%
Epoch 19, Loss: 0.9791
Test Accuracy: 0.4975





In [237]:
fam_all = FAM(768, 256, 0.3)
proj_all = Projection(256, 128)
supcon_all = SupConHead()
classifier_all = Classifier(256, 3, 0.3)
optimizer = torch.optim.Adam(list(fam_all.parameters()) + 
                             list(proj_all.parameters()) + 
                             list(supcon_all.parameters()) + 
                             list(classifier_all.parameters()), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
for epoch in range(1, 20):
    loss, acc = train(fam_all, proj_all, supcon_all, classifier_all, data_loader_all)  
    print(f'Epoch {epoch}, Loss: {loss:.4f}')
    scheduler.step()
test_accuracy = evaluate(fam_all, proj_all, supcon_all, classifier_all, data_loader_all_test)

0it [00:00, ?it/s]


KeyError: 6884