In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import Trainer, TrainingArguments
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os

In [19]:
dataset_name = "gab"
dataset_home = "/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/create_dataset_jsons_aadish"
model_name = "llama"
base_or_finetuned = "finetuned"
model_adapter_output_home = "/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/model_adapter_output_home"
classifier_training_dataset_output_home = "/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/classifier_training_dataset_output_home"


OUTPUT_DIR = os.path.join(classifier_training_dataset_output_home, model_name, base_or_finetuned)
# os.makedirs(OUTPUT_DIR, exist_ok=True)


In [20]:
classifier_training_dataset_dict_path = os.path.join(OUTPUT_DIR, f"{model_name}={base_or_finetuned}={dataset_name}.dataset_dict")

In [21]:
classifier_training_dataset_dict_path

'/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/classifier_training_dataset_output_home/llama/finetuned/llama=finetuned=gab.dataset_dict'

In [22]:
classifier_training_dataset_split_path_train = os.path.join(OUTPUT_DIR, f"{model_name}={base_or_finetuned}={dataset_name}=train.dataset_split")

In [23]:
classifier_training_dataset_split_path_train

'/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/classifier_training_dataset_output_home/llama/finetuned/llama=finetuned=gab=train.dataset_split'

In [24]:
device_no = 14

if torch.cuda.is_available():
    device = f"cuda:{device_no}"
else:
    device = "cpu"



In [25]:
def load_custom_dataset_dict_for_classifier(classifier_training_dataset_dict_path):
    dataset_dict_classifier = torch.load(classifier_training_dataset_dict_path)
    return dataset_dict_classifier

In [26]:


from torch.utils.data import Dataset


from tqdm import tqdm


class CustomDataset(Dataset):
    def __init__(self, data):
        self.last_hidden_state_of_last_token = data['last_hidden_state_of_last_token']
        self.label = data['label']

    def __len__(self):
        return len(self.last_hidden_state_of_last_token)

    def __getitem__(self, idx):
        return {
            'last_hidden_state_of_last_token': self.last_hidden_state_of_last_token[idx],
            'label': self.label[idx]
        }

    def __repr__(self):
        return f"Dataset(features={list(self[0].keys())}, num_rows={len(self)})"




In [27]:

def load_custom_dataset_split_for_classifier(classifier_training_dataset_split_path):
    return CustomDataset(torch.load(classifier_training_dataset_split_path))



In [28]:
os.path.exists(classifier_training_dataset_split_path_train)

True

In [29]:
path2 = "/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/classifier_training_dataset_output_home/llama/finetuned/llama=finetuned=gab=train.dataset_split"

In [30]:
classifier_training_dataset_split_path_train
'/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/model_adapter_output_home/classifier_training_dataset_output_home/llama/finetuned/llama=finetuned=gab=train.dataset_split'

'/home/kokil/ahmad/benchmarking_llms/projects/qlora_training/model_adapter_output_home/classifier_training_dataset_output_home/llama/finetuned/llama=finetuned=gab=train.dataset_split'

In [31]:
classifier_training_dataset_split_path_train == path2

True

In [32]:
train_dataset_classifier = load_custom_dataset_split_for_classifier(classifier_training_dataset_split_path_train)

In [33]:
train_dataset_classifier

Dataset(features=['last_hidden_state_of_last_token', 'label'], num_rows=900)

In [41]:
num_tokens_sum = 0
for i in range(len(train_dataset_classifier)):
    num_tokens_sum += train_dataset_classifier[i]['last_hidden_state_of_last_token'].shape[0]

In [46]:
(15*1024)/(num_tokens_sum/len(train_dataset_classifier))

142.2032032752821

In [135]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4096, num_classes)  # Assuming input dimension is 100
        # self.fc2 = nn.Linear(50, num_classes)

    def forward(self, x):
        # x = torch.relu(self.fc1(x))
        # x = self.fc2(x)
        # x = x.half() if x.dtype == torch.float32 else x
        x = x.float() if x.dtype == torch.float16 else x
        # print(x.dtype)
        x = torch.softmax(self.fc1(x), dim=1)
        return x
        # return torch.softmax(x, dim=1)


In [136]:
net = Net(2).to(device)

In [137]:
train_dataset_classifier[0]['last_hidden_state_of_last_token'][-1].dtype

torch.float16

In [138]:
net(train_dataset_classifier[0]['last_hidden_state_of_last_token'][-1].unsqueeze(0))

tensor([[0.1005, 0.8995]], device='cuda:14', grad_fn=<SoftmaxBackward0>)

In [139]:
train_dataset_classifier[0]['last_hidden_state_of_last_token'][-1].unsqueeze(0).shape

torch.Size([1, 4096])

In [140]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [141]:
train_dataset_classifier[0]['last_hidden_state_of_last_token'][-1].unsqueeze(0).shape

torch.Size([1, 4096])

In [142]:
last_hidden_states = [entry['last_hidden_state_of_last_token'][-1, :] for entry in train_dataset_classifier]
labels = [entry['label'] for entry in train_dataset_classifier]

In [143]:
# Dataset({'tensors': last_hidden_states, 'labels': labels})

In [144]:
last_hidden_states[0]

tensor([ 2.2383, -1.1963,  3.7461,  ..., -0.3518, -2.0215, -0.8984],
       device='cuda:14', dtype=torch.float16)

In [145]:
classifier_training_tensors = torch.stack(last_hidden_states)

In [146]:
net(classifier_training_tensors[:2]).shape

torch.Size([2, 2])

In [147]:
classifier_training_tensors[:2].shape

torch.Size([2, 4096])

In [148]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [149]:
net.train()
net.training

True

In [150]:
len(last_hidden_states)

900

In [151]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# Convert lists to tensors
inputs_tensor = torch.stack(last_hidden_states)
labels_tensor = torch.tensor(labels)

# Create TensorDataset
dataset = TensorDataset(inputs_tensor, labels_tensor)

# Create DataLoader
train_dataloader = DataLoader(dataset, batch_size=8, shuffle=True)


In [152]:
inputs_tensor.shape

torch.Size([900, 4096])

In [153]:
labels_tensor.shape

torch.Size([900])

In [154]:
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x7fa6a1545f10>

In [163]:
losses = []
# for epoch in range(training_args.num_train_epochs):
for epoch in range(10):    
    running_loss = 0.0
    for batch in tqdm(train_dataloader):
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    losses.append(running_loss)
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_dataloader)}")


100%|██████████| 113/113 [00:00<00:00, 1291.77it/s]


Epoch 1, Loss: 0.7590621936637744


100%|██████████| 113/113 [00:00<00:00, 1184.36it/s]


Epoch 2, Loss: 0.7568495455568871


100%|██████████| 113/113 [00:00<00:00, 1265.44it/s]


Epoch 3, Loss: 0.7601678703738525


100%|██████████| 113/113 [00:00<00:00, 1290.77it/s]


Epoch 4, Loss: 0.7601676427685053


100%|██████████| 113/113 [00:00<00:00, 1100.91it/s]


Epoch 5, Loss: 0.7590612057036004


100%|██████████| 113/113 [00:00<00:00, 1030.57it/s]


Epoch 6, Loss: 0.7601671630302361


100%|██████████| 113/113 [00:00<00:00, 1164.65it/s]


Epoch 7, Loss: 0.7590607386247247


100%|██████████| 113/113 [00:00<00:00, 1218.03it/s]


Epoch 8, Loss: 0.759060503370994


100%|██████████| 113/113 [00:00<00:00, 1231.90it/s]


Epoch 9, Loss: 0.7601664287854085


100%|██████████| 113/113 [00:00<00:00, 1265.20it/s]

Epoch 10, Loss: 0.7568475714827005





In [164]:
def calculate_accuracy(model, dataloader, device):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [165]:
# Calculate accuracy on training dataset
train_accuracy = calculate_accuracy(net, train_dataloader, device)
print(f"Accuracy on training dataset: {train_accuracy}")

# # Assuming you have a dataloader for the validation dataset named val_dataloader
# # Calculate accuracy on validation dataset
# val_accuracy = calculate_accuracy(net, val_dataloader, device)
# print(f"Accuracy on validation dataset: {val_accuracy}")


Accuracy on training dataset: 0.5544444444444444


In [34]:
os.path.exists(classifier_training_dataset_dict_path)

False

In [35]:

from torch.utils.data import Dataset


from tqdm import tqdm


class CustomDataset(Dataset):
    def __init__(self, data):
        self.last_hidden_state_of_last_token = data['last_hidden_state_of_last_token']
        self.label = data['label']

    def __len__(self):
        return len(self.last_hidden_state_of_last_token)

    def __getitem__(self, idx):
        return {
            'last_hidden_state_of_last_token': self.last_hidden_state_of_last_token[idx],
            'label': self.label[idx]
        }

    def __repr__(self):
        return f"Dataset(features={list(self[0].keys())}, num_rows={len(self)})"




In [16]:
CustomDataset(torch.load(classifier_training_dataset_dict_path))

RuntimeError: PytorchStreamReader failed locating file data.pkl: file not found

In [9]:
dataset_dict_classifier = load_custom_dataset_dict_for_classifier(classifier_training_dataset_dict_path)

RuntimeError: PytorchStreamReader failed locating file data.pkl: file not found

In [None]:
# Define your neural network architecture (net) and dataset (train_set) here
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(100, 50)
        self.fc2 = nn.Linear(50, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x


class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(100, 50)  # Assuming input dimension is 100
        self.fc2 = nn.Linear(50, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.softmax(x, dim=1)


In [None]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(100, 50)  # Assuming input dimension is 100
        self.fc2 = nn.Linear(50, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.softmax(x, dim=1)
