In [1]:
import torch 
import torch.nn as nn
import pandas as pd 
import numpy as np

In [2]:
class label_smooth_loss(torch.nn.Module):
    def __init__(self, num_classes, smoothing=0.1):
        super(label_smooth_loss, self).__init__()
        eps = smoothing / num_classes
        self.negative = eps
        self.positive = (1 - smoothing) + eps
    
    def forward(self, pred, target):
        pred = pred.log_softmax(dim=1)
        true_dist = torch.zeros_like(pred)
        true_dist.fill_(self.negative)
        true_dist.scatter_(1, target.data.unsqueeze(1), self.positive)
        return torch.sum(-true_dist * pred, dim=1).mean()

In [48]:
smooth_loss = label_smooth_loss(10)
data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_m_experiment_drug_data_ties_test_probabilities.csv")
data_true_label = torch.from_numpy(data['true_label'].values)
data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)

In [6]:
data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_1_drug_data_test_probabilities.csv")
data_true_label = torch.from_numpy(data['true_label'].values)
data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)

In [11]:
loss = nn.NLLLoss()
log_probs = torch.log(data_pred)
value = loss(log_probs, data_true_label)
print(value)

tensor(2.1543, dtype=torch.float64)


In [10]:
smooth_loss = label_smooth_loss(10)
data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_m_experiment_drug_data_ties_test_probabilities.csv")
data_true_label = torch.from_numpy(data['true_label'].values)
data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)

In [58]:
log_probs = -torch.log(data_pred)
if data_true_label.dim() == log_probs.dim() - 1:
    labels = data_true_label.unsqueeze(-1)
padding_mask = labels.eq(-100)
labels = torch.clamp(labels, min=0)
nll_loss = log_probs.gather(dim=-1, index=labels)
smoothed_loss = log_probs.sum(dim=-1, keepdim=True, dtype=torch.float32)
nll_loss.masked_fill_(padding_mask, 0.0)
smoothed_loss.masked_fill_(padding_mask, 0.0)

# Take the mean over the label dimensions, then divide by the number of active elements (i.e. not-padded):
num_active_elements = padding_mask.numel() - padding_mask.long().sum()
nll_loss = nll_loss.sum() / num_active_elements
smoothed_loss = smoothed_loss.sum() / (num_active_elements * log_probs.shape[-1])
print(nll_loss)
print(smoothed_loss)
total_loss = (1 - 0.1) * nll_loss + 0.1 * smoothed_loss
print(total_loss)
# padding_mask = labels.eq(self.ignore_index)
# # In case the ignore_index is -100, the gather will fail, so we replace labels by 0. The padding_mask
# # will ignore them in any case.
# labels = torch.clamp(labels, min=0)
# nll_loss = log_probs.gather(dim=-1, index=labels)
# # works for fp16 input tensor too, by internally upcasting it to fp32
# smoothed_loss = log_probs.sum(dim=-1, keepdim=True, dtype=torch.float32)

# nll_loss.masked_fill_(padding_mask, 0.0)
# smoothed_loss.masked_fill_(padding_mask, 0.0)

# # Take the mean over the label dimensions, then divide by the number of active elements (i.e. not-padded):
# num_active_elements = padding_mask.numel() - padding_mask.long().sum()
# nll_loss = nll_loss.sum() / num_active_elements
# smoothed_loss = smoothed_loss.sum() / (num_active_elements * log_probs.shape[-1])
# return (1 - self.epsilon) * nll_loss + self.epsilon * smoothed_loss

tensor(2.1543, dtype=torch.float64)
tensor(7.1735)
tensor(2.6562, dtype=torch.float64)


In [35]:
smooth_loss = label_smooth_loss(10)
data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_m_experiment_drug_data_ties_test_probabilities.csv")
data_true_label = torch.from_numpy(data['true_label'].values)
data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)

        

Shape of data_pred : torch.Size([43013, 10])


tensor(1.0000, dtype=torch.float64)

In [29]:
value = loss(data_pred, data_true_label)

In [30]:
value

tensor(2.0189, dtype=torch.float64)

In [6]:
def calculate_accuracy(predictions,labels):
    predictions = torch.argmax(predictions,dim=1)
    correct = (predictions == labels).sum().item()
    accuracy = correct/len(labels)
    return accuracy

In [7]:
def calculate_loss(predictions,labels):
    loss = nn.CrossEntropyLoss()
    return loss(predictions,labels)

In [8]:
with open("accuracy_manual_results_yelp_test_finetune_accuracy.txt","w") as f:
    for i in range(10):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/yelp_experiment_probs/gpt2_f_experiment_{i}_test_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        accuracy = calculate_accuracy(data_pred,data_true_label)
        f.write(f'Finetune test accuracy {i}: {accuracy}\n')

In [9]:
with open("accuracy_manual_results_yelp_val_finetune_accuracy.txt","w") as f:
    for i in range(10):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/yelp_experiment_probs/gpt2_f_experiment_{i}_validation_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        accuracy = calculate_accuracy(data_pred,data_true_label)
        f.write(f'Finetune validaiton accuracy {i}: {accuracy}\n')

In [10]:
with open("accuracy_manual_results_yelp_test_finetune_loss.txt","w") as f:
    for i in range(10):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/yelp_experiment_probs/gpt2_f_experiment_{i}_test_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        loss = calculate_loss(data_pred,data_true_label)
        f.write(f'Finetune test loss {i}: {loss}\n')

In [11]:
with open("accuracy_manual_results_yelp_val_finetune_loss.txt","w") as f:
    for i in range(10):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/yelp_experiment_probs/gpt2_f_experiment_{i}_validation_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        loss = calculate_loss(data_pred,data_true_label)
        f.write(f'Finetune validation loss" {i}: {loss}\n')

In [12]:
with open("accuracy_manual_results_drug_test_finetune_accuracy.txt","w") as f:
    for i in range(5):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_{i}_drug_data_test_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        accuracy = calculate_accuracy(data_pred,data_true_label)
        f.write(f'Finetune test accuracy {i}: {accuracy}\n')

In [13]:
with open("accuracy_manual_results_drug_val_finetune_accuracy.txt","w") as f:
    for i in range(5):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_{i}_drug_data_validation_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        accuracy = calculate_accuracy(data_pred,data_true_label)
        f.write(f'Finetune validation accuracy {i}: {accuracy}\n')

In [14]:
with open("accuracy_manual_results_drug_test_finetune_loss.txt","w") as f:
    for i in range(5):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_{i}_drug_data_test_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        loss = calculate_loss(data_pred,data_true_label)
        f.write(f'Finetune test loss {i}: {loss}\n')

In [15]:
with open("accuracy_manual_results_drug_val_finetune_loss.txt","w") as f:
    for i in range(5):
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_{i}_drug_data_validation_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        loss = calculate_loss(data_pred,data_true_label)
        f.write(f'Finetune validation accuracy {i}: {loss}\n')

In [16]:
with open("accuracy_manual_results_drug_xl_test_accuracy.txt","w") as f:
    data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_drug_data_large_test_probabilities.csv")
    data_true_label = torch.from_numpy(data['true_label'].values)
    data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
    accuracy = calculate_accuracy(data_pred,data_true_label)
    f.write(f'Finetune test accuracy big : {accuracy}\n')

In [17]:
with open("accuracy_manual_results_drug_xl_val_accuracy.txt","w") as f:
    data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_xl_validation_probabilities.csv")
    data_true_label = torch.from_numpy(data['true_label'].values)
    data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
    accuracy = calculate_accuracy(data_pred,data_true_label)
    f.write(f'Finetune test accuracy big : {accuracy}\n')

In [18]:
merge = ["dare_linear","dare_ties","linear","ties"]

In [19]:
with open("accuracy_manual_results_drug_test_merge_method_accuracy.txt","w") as f:
    for merge_type in merge:
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_m_experiment_drug_data_{merge_type}_test_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        accuracy = calculate_accuracy(data_pred,data_true_label)
        f.write(f'Merge method test accuracy {merge_type}: {accuracy}\n')

In [20]:
with open("accuracy_manual_results_drug_test_merge_method_loss.txt","w") as f:
    for merge_type in merge:
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_m_experiment_drug_data_{merge_type}_test_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        loss = calculate_loss(data_pred,data_true_label)
        f.write(f'Merge method test accuracy {merge_type}: {loss}\n')

In [21]:
with open("accuracy_manual_results_drug_val_merge_method_accuracy.txt","w") as f:
    for merge_type in merge:
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_m_experiment_drug_data_{merge_type}_validation_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        accuracy = calculate_accuracy(data_pred,data_true_label)
        f.write(f'Merge method test accuracy {merge_type}: {accuracy}\n')

In [202]:
with open("accuracy_manual_results_drug_val_merge_method_loss.txt","w") as f:
    for merge_type in merge:
        data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_m_experiment_drug_data_{merge_type}_validation_probabilities.csv")
        data_true_label = torch.from_numpy(data['true_label'].values)
        data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
        loss = calculate_loss(data_pred,data_true_label)
        f.write(f'Merge method test accuracy {merge_type}: {loss}\n')

## Get output tensor

In [203]:
for i in range(5):
    data = pd.read_csv(f"mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_{i}_drug_data_test_probabilities.csv")
    data_true_label = torch.from_numpy(data['true_label'].values)
    data_pred = torch.from_numpy(data.drop(columns=['true_label']).values)
    torch.save(data_pred,f'mllm/new_test_loss/test_loss_exp/drug_experiment_probs/gpt2_f_experiment_{i}_drug_data_test_probabilities.pt')
    #accuracy = calculate_accuracy(data_pred,data_true_label)
    #f.write(f'Finetune test accuracy {i}: {accuracy}\n')