In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2").to(device)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load test dataset with labels
test_dataset = load_dataset(
    'csv', 
    data_files='test_samples.csv', 
    column_names=['premise', 'hypothesis', 'label'],  # Include the label column
    split='train'
)

# Map string labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(test_dataset['label']))}
encoded_labels = [label_mapping[label] for label in test_dataset['label']]

# Tokenize the test dataset
def tokenize_function(examples):
    return tokenizer(
        examples['premise'],
        examples['hypothesis'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_data, labels):
        self.input_ids = tokenized_data['input_ids']
        self.attention_mask = tokenized_data['attention_mask']
        self.labels = labels  # Add this to capture the numeric labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx]),
            'attention_mask': torch.tensor(self.attention_mask[idx]),
            'labels': torch.tensor(self.labels[idx]),  # Store numeric labels
        }

# Create test DataLoader
custom_test_dataset = CustomDataset(tokenized_test, encoded_labels)
test_dataloader = DataLoader(custom_test_dataset, batch_size=1)

# Function to calculate accuracy
def calculate_accuracy(predictions, labels):
    return (predictions == labels).sum().item()

# Evaluate pre-trained model
total_correct = 0
total_samples = 0

model.eval()
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Get logits from the model output
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Get the most likely class predictions
        predictions = torch.argmax(logits, dim=-1)

        # Compare predictions with actual labels
        total_correct += calculate_accuracy(predictions, labels)
        total_samples += labels.size(0)  # Count total samples

# Calculate accuracy
accuracy = total_correct / total_samples
print(f"Token-level accuracy of the pre-trained model: {accuracy:.4f}")


  from .autonotebook import tqdm as notebook_tqdm


cuda


2024-11-04 16:59:35.682105: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730739575.695371  554627 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730739575.699332  554627 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-04 16:59:35.715480: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00,  1.76s/it]


Token-level accuracy of the pre-trained model: 0.6733


Fine Tuned Models Accuracy

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Load fine-tuned tokenizer and model from the specified directory
model_directory = "phi2-finetuned-epoch-1"  # Update this path to your model's directory
tokenizer = AutoTokenizer.from_pretrained(model_directory)
model = AutoModelForCausalLM.from_pretrained(model_directory).to(device)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load test dataset with labels
test_dataset = load_dataset(
    'csv', 
    data_files='test_samples.csv', 
    column_names=['premise', 'hypothesis', 'label'],  # Include the label column
    split='train'
)

# Map string labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(test_dataset['label']))}
encoded_labels = [label_mapping[label] for label in test_dataset['label']]

# Tokenize the test dataset
def tokenize_function(examples):
    return tokenizer(
        examples['premise'],
        examples['hypothesis'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_data, labels):
        self.input_ids = tokenized_data['input_ids']
        self.attention_mask = tokenized_data['attention_mask']
        self.labels = labels  # Add this to capture the numeric labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx]),
            'attention_mask': torch.tensor(self.attention_mask[idx]),
            'labels': torch.tensor(self.labels[idx]),  # Store numeric labels
        }

# Create test DataLoader
custom_test_dataset = CustomDataset(tokenized_test, encoded_labels)
test_dataloader = DataLoader(custom_test_dataset, batch_size=1)

# Function to calculate accuracy
def calculate_accuracy(predictions, labels):
    return (predictions == labels).sum().item()

# Evaluate pre-trained model
total_correct = 0
total_samples = 0

model.eval()
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Get logits from the model output
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Get the most likely class predictions
        predictions = torch.argmax(logits, dim=-1)

        # Compare predictions with actual labels
        total_correct += calculate_accuracy(predictions, labels)
        total_samples += labels.size(0)  # Count total samples

# Calculate accuracy
accuracy = total_correct / total_samples
print(f"Token-level accuracy of the pre-trained model: {accuracy:.4f}")

cuda


Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00,  1.79s/it]


Token-level accuracy of the pre-trained model: 0.0000


In [6]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Load fine-tuned tokenizer and model from the specified directory
model_directory = "phi2-finetuned-epoch-2"  # Update this path to your model's directory
tokenizer = AutoTokenizer.from_pretrained(model_directory)
model = AutoModelForCausalLM.from_pretrained(model_directory).to(device)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load test dataset with labels
test_dataset = load_dataset(
    'csv', 
    data_files='test_samples.csv', 
    column_names=['premise', 'hypothesis', 'label'],  # Include the label column
    split='train'
)

# Map string labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(test_dataset['label']))}
encoded_labels = [label_mapping[label] for label in test_dataset['label']]

# Tokenize the test dataset
def tokenize_function(examples):
    return tokenizer(
        examples['premise'],
        examples['hypothesis'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_data, labels):
        self.input_ids = tokenized_data['input_ids']
        self.attention_mask = tokenized_data['attention_mask']
        self.labels = labels  # Add this to capture the numeric labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx]),
            'attention_mask': torch.tensor(self.attention_mask[idx]),
            'labels': torch.tensor(self.labels[idx]),  # Store numeric labels
        }

# Create test DataLoader
custom_test_dataset = CustomDataset(tokenized_test, encoded_labels)
test_dataloader = DataLoader(custom_test_dataset, batch_size=1)

# Function to calculate accuracy
def calculate_accuracy(predictions, labels):
    return (predictions == labels).sum().item()

# Evaluate fine-tuned model
total_correct = 0
total_samples = 0

model.eval()
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Get logits from the model output
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Get the most likely class predictions
        predictions = torch.argmax(logits, dim=-1)

        # Compare predictions with actual labels
        total_correct += calculate_accuracy(predictions, labels)
        total_samples += labels.size(0)  # Count total samples

# Calculate accuracy
accuracy = total_correct / total_samples
print(f"Token-level accuracy of the fine-tuned model: {accuracy:.4f}")


cuda


Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00,  1.88s/it]
Map: 100%|███████████████████████████| 101/101 [00:00<00:00, 3858.82 examples/s]


Token-level accuracy of the fine-tuned model: 0.0000


In [7]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Load fine-tuned tokenizer and model from the specified directory
model_directory = "phi2-finetuned-epoch-3"  # Update this path to your model's directory
tokenizer = AutoTokenizer.from_pretrained(model_directory)
model = AutoModelForCausalLM.from_pretrained(model_directory).to(device)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load test dataset with labels
test_dataset = load_dataset(
    'csv', 
    data_files='test_samples.csv', 
    column_names=['premise', 'hypothesis', 'label'],  # Include the label column
    split='train'
)

# Map string labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(test_dataset['label']))}
encoded_labels = [label_mapping[label] for label in test_dataset['label']]

# Tokenize the test dataset
def tokenize_function(examples):
    return tokenizer(
        examples['premise'],
        examples['hypothesis'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_data, labels):
        self.input_ids = tokenized_data['input_ids']
        self.attention_mask = tokenized_data['attention_mask']
        self.labels = labels  # Add this to capture the numeric labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx]),
            'attention_mask': torch.tensor(self.attention_mask[idx]),
            'labels': torch.tensor(self.labels[idx]),  # Store numeric labels
        }

# Create test DataLoader
custom_test_dataset = CustomDataset(tokenized_test, encoded_labels)
test_dataloader = DataLoader(custom_test_dataset, batch_size=1)

# Function to calculate accuracy
def calculate_accuracy(predictions, labels):
    return (predictions == labels).sum().item()

# Evaluate fine-tuned model
total_correct = 0
total_samples = 0

model.eval()
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Get logits from the model output
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Get the most likely class predictions
        predictions = torch.argmax(logits, dim=-1)

        # Compare predictions with actual labels
        total_correct += calculate_accuracy(predictions, labels)
        total_samples += labels.size(0)  # Count total samples

# Calculate accuracy
accuracy = total_correct / total_samples
print(f"Token-level accuracy of the fine-tuned model: {accuracy:.4f}")


cuda


Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00,  1.83s/it]
Map: 100%|███████████████████████████| 101/101 [00:00<00:00, 3169.06 examples/s]


Token-level accuracy of the fine-tuned model: 0.0000


In [8]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Load fine-tuned tokenizer and model from the specified directory
model_directory = "phi2-finetuned-epoch-4"  # Update this path to your model's directory
tokenizer = AutoTokenizer.from_pretrained(model_directory)
model = AutoModelForCausalLM.from_pretrained(model_directory).to(device)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load test dataset with labels
test_dataset = load_dataset(
    'csv', 
    data_files='test_samples.csv', 
    column_names=['premise', 'hypothesis', 'label'],  # Include the label column
    split='train'
)

# Map string labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(test_dataset['label']))}
encoded_labels = [label_mapping[label] for label in test_dataset['label']]

# Tokenize the test dataset
def tokenize_function(examples):
    return tokenizer(
        examples['premise'],
        examples['hypothesis'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_data, labels):
        self.input_ids = tokenized_data['input_ids']
        self.attention_mask = tokenized_data['attention_mask']
        self.labels = labels  # Add this to capture the numeric labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx]),
            'attention_mask': torch.tensor(self.attention_mask[idx]),
            'labels': torch.tensor(self.labels[idx]),  # Store numeric labels
        }

# Create test DataLoader
custom_test_dataset = CustomDataset(tokenized_test, encoded_labels)
test_dataloader = DataLoader(custom_test_dataset, batch_size=1)

# Function to calculate accuracy
def calculate_accuracy(predictions, labels):
    return (predictions == labels).sum().item()

# Evaluate fine-tuned model
total_correct = 0
total_samples = 0

model.eval()
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Get logits from the model output
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Get the most likely class predictions
        predictions = torch.argmax(logits, dim=-1)

        # Compare predictions with actual labels
        total_correct += calculate_accuracy(predictions, labels)
        total_samples += labels.size(0)  # Count total samples

# Calculate accuracy
accuracy = total_correct / total_samples
print(f"Token-level accuracy of the fine-tuned model: {accuracy:.4f}")


cuda


Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00,  1.83s/it]
Map: 100%|███████████████████████████| 101/101 [00:00<00:00, 3492.60 examples/s]


Token-level accuracy of the fine-tuned model: 0.0000


In [15]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Load the tokenizer and model from the same directory
model_directory = "phi2-finetuned-epoch-5"  # Update this to your model folder path
tokenizer = AutoTokenizer.from_pretrained(model_directory)
model = AutoModelForCausalLM.from_pretrained(model_directory).to(device)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load test dataset with labels
test_dataset = load_dataset(
    'csv', 
    data_files='test_samples.csv', 
    column_names=['premise', 'hypothesis', 'label'],  # Include the label column
    split='train'
)

# Map string labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(test_dataset['label']))}
encoded_labels = [label_mapping[label] for label in test_dataset['label']]

# Tokenize the test dataset
def tokenize_function(examples):
    return tokenizer(
        examples['premise'],
        examples['hypothesis'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_data, labels):
        self.input_ids = tokenized_data['input_ids']
        self.attention_mask = tokenized_data['attention_mask']
        self.labels = labels  # Add this to capture the numeric labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx]),
            'attention_mask': torch.tensor(self.attention_mask[idx]),
            'labels': torch.tensor(self.labels[idx]),  # Store numeric labels
        }

# Create test DataLoader
custom_test_dataset = CustomDataset(tokenized_test, encoded_labels)
test_dataloader = DataLoader(custom_test_dataset, batch_size=1)

# Function to calculate accuracy
def calculate_accuracy(predictions, labels):
    return (predictions == labels).sum().item()

# Evaluate pre-trained model
total_correct = 0
total_samples = 0

model.eval()
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Get logits from the model output
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Get the most likely class predictions for the last token
        predictions = torch.argmax(logits[:, -1, :], dim=-1)

        # Convert predicted token IDs to class labels
        predicted_classes = predictions.cpu().numpy()
        actual_classes = labels.cpu().numpy()

        # Compare predictions with actual labels
        total_correct += (predicted_classes == actual_classes).sum()
        total_samples += actual_classes.size  # Count total samples

# Calculate accuracy
accuracy = total_correct / total_samples
print(f"Token-level accuracy of the pre-trained model: {accuracy:.4f}")


cuda


Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00,  1.82s/it]


Token-level accuracy of the pre-trained model: 0.0000


In [16]:
print("Predictions:", predictions.cpu().numpy())
print("Labels:", labels.cpu().numpy())
print("Logits:", logits.cpu().numpy())

Predictions: [50256]
Labels: [2]
Logits: [[[ 6.014112    2.7832603  -3.4612164  ... -1.1376925  -1.1369531
   -1.1378222 ]
  [ 3.6340315   2.84673    -0.25915742 ... -1.5786233  -1.5770816
   -1.5782006 ]
  [ 8.899673    5.6947994   4.1239567  ... -1.1998765  -1.1994023
   -1.2003493 ]
  ...
  [10.216738   10.579777   11.074715   ... -4.5852213  -4.586058
   -4.586841  ]
  [10.189975   10.670112   11.233218   ... -4.567991   -4.5688133
   -4.5695624 ]
  [10.1188     10.721686   11.641347   ... -4.5617695  -4.562585
   -4.563337  ]]]


In [17]:
# Print the vocabulary and label mapping
print("Vocabulary Size:", len(tokenizer.get_vocab()))
print("Label Mapping:", label_mapping)

# Check the token id of the label
print("Token ID for Label 2:", tokenizer.convert_ids_to_tokens(2))
print("Predicted Token ID:", predictions.item())
print("Predicted Token:", tokenizer.convert_ids_to_tokens(predictions.item()))


Vocabulary Size: 50295
Label Mapping: {'0': 0, '2': 1, '1': 2, 'label': 3}
Token ID for Label 2: #
Predicted Token ID: 50256
Predicted Token: <|endoftext|>


In [3]:
from peft import get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)

def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

# Calculate total and fine-tunable parameters
total_params, trainable_params = count_parameters(model)
print(f"Total parameters in the model: {total_params}")
print(f"Number of parameters being fine-tuned: {trainable_params}")
print(f"Percentage of parameters fine-tuned: {100 * trainable_params / total_params:.2f}%")


Loading checkpoint shards: 100%|██████████████████| 2/2 [00:01<00:00,  1.46it/s]


Total parameters in the model: 2798033920
Number of parameters being fine-tuned: 18350080
Percentage of parameters fine-tuned: 0.66%
