In [None]:
!pip install bert-score 

In [5]:
!pip install rouge_score 

Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [6]:
import numpy as np 
import pandas as pd
import csv
import nltk
import random
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import string
import torch
import json
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
from transformers import GPT2LMHeadModel, GPT2Tokenizer,GPT2Model, GPT2Config, AdamW
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu
from rouge_score import rouge_scorer
from bert_score import BERTScorer
from transformers import BertTokenizer, BertForMaskedLM, BertModel

In [7]:
def squad_json_to_dataframe(file_path, record_path=['data','paragraphs','qas','answers']):

    file = json.loads(open(file_path).read())
    # parsing different level's in the json file
    js = pd.json_normalize(file, record_path)
    m = pd.json_normalize(file, record_path[:-1])
    r = pd.json_normalize(file,record_path[:-2])
    # combining it into single dataframe
    idx = np.repeat(r['context'].values, r.qas.str.len())
    m['context'] = idx
    data = m[['id','question','context','answers']].set_index('id').reset_index()
    data['c_id'] = data['context'].factorize()[0]
    return data

def preprocess_text_en(data):
    # Convert to lowercase
    data2 = []
    for text in data:
        # Skip None values
        if text is None:
            continue
        text = text.lower()

        # Tokenization
        tokens = word_tokenize(text)

        # Remove stop words
        stop_words = set(stopwords.words('english'))
        tokens = [word for word in tokens if word not in stop_words]

        # Join the tokens back into a string
        preprocessed_text = ' '.join(tokens)
        data2.append(preprocessed_text)

    return data2
    

def calculate_bleu_score(machine_results, reference_texts):
    bleu_score = corpus_bleu([[ref.split()] for ref in reference_texts], [gen.split() for gen in machine_results])
    return bleu_score

def calculate_rouge_scores(generated_answers, ground_truth):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    total_rouge1, total_rouge2, total_rougeL = 0, 0, 0
    for gen, ref in zip(generated_answers, ground_truth):
        scores = scorer.score(gen, ref)
        total_rouge1 += scores['rouge1'].fmeasure
        total_rouge2 += scores['rouge2'].fmeasure
        total_rougeL += scores['rougeL'].fmeasure
    average_rouge1 = total_rouge1 / len(generated_answers)
    average_rouge2 = total_rouge2 / len(generated_answers)
    average_rougeL = total_rougeL / len(generated_answers)
    return average_rouge1, average_rouge2, average_rougeL

def calculate_bert_score(generated_answers, ground_truth):
    scorer = BERTScorer(model_type='bert-base-uncased')
    P, R, F1 = scorer.score(generated_answers, ground_truth)
    avg_precision = sum(p.mean() for p in P) / len(P)
    avg_recall = sum(r.mean() for r in R) / len(R)
    avg_f1 = sum(f1.mean() for f1 in F1) / len(F1)
    return avg_precision, avg_recall, avg_f1


# Define a function to tokenize, convert text to indices, and pad sequences
def tokenize_and_pad(data_list, max_question_length=1018, max_answer_length=1024):
    tokenized_data_list = []
    for question, answer in data_list:
        # Tokenize and convert to indices
        question_tokens = tokenizer.encode(question, add_special_tokens=True)
        answer_tokens = tokenizer.encode(answer, add_special_tokens=True)

        # Pad sequences to specified lengths
        padded_question_tokens = torch.tensor(question_tokens + [tokenizer.convert_tokens_to_ids(pad_token)] * (max_question_length - len(question_tokens)))
        padded_answer_tokens = torch.tensor(answer_tokens + [tokenizer.convert_tokens_to_ids(pad_token)] * (max_answer_length - len(answer_tokens)))

        # Append to the tokenized_data_list only if both token lists are not empty
        if len(question_tokens) > 0 and len(answer_tokens) > 0:
            tokenized_data_list.append((padded_question_tokens, padded_answer_tokens))

    return tokenized_data_list

In [8]:
# Lists to store scores
train_bleu_scores = []
train_bert_scores = []
train_rouge1_scores = []
train_rouge2_scores = []
train_rougeL_scores = []

val_bleu_scores = []
val_bert_scores = []
val_rouge1_scores = []
val_rouge2_scores = []
val_rougeL_scores = []

In [9]:
train = '/kaggle/input/squad-20/train-v2.0.json'
dev = '/kaggle/input/squad-20/dev-v2.0.json'
train_df = squad_json_to_dataframe(train)
dev_df = squad_json_to_dataframe(dev)

train_df['question_context'] = 'Question: ' + train_df['question'] + ' Context: ' + train_df['context']
dev_df['question_context'] = 'Question: ' + dev_df['question'] + ' Context: ' + dev_df['context']
# Extract 'text' value from 'answers' column for each row
train_df['answers'] = train_df['answers'].apply(lambda x: x[0]['text'] if len(x) > 0 else None)
dev_df['answers'] = dev_df['answers'].apply(lambda x: x[0]['text'] if len(x) > 0 else None)


# Extract 'question_context' column into a list
question_context_list_train = train_df['question_context'].tolist()
# Extract 'answers' column into a list
answers_list_train = train_df['answers'].tolist()


# Extract 'question_context' column into a list
question_context_list_dev = dev_df['question_context'].tolist()
# Extract 'answers' column into a list
answers_list_dev = dev_df['answers'].tolist()

In [10]:
question_context_list_train = question_context_list_train[:100]
answers_list_train = answers_list_train[:100]

question_context_list_dev = question_context_list_dev[:30]
answers_list_dev = answers_list_dev[:30]

question_context_list_train = preprocess_text_en(question_context_list_train)
answers_list_train = preprocess_text_en(answers_list_train)

question_context_list_dev = preprocess_text_en(question_context_list_dev)
answers_list_dev = preprocess_text_en(answers_list_dev)


mapped_list_1 = list(zip(question_context_list_train, answers_list_train))
mapped_list_2 = list(zip(question_context_list_dev, answers_list_dev))

In [11]:
print(np.shape(mapped_list_1))
print(mapped_list_1[1][1])
print("\n\n",mapped_list_1[0][1])

(100, 2)
singing dancing


 late 1990s


In [12]:
# Load GPT-2 tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Define a pad token and add it to the tokenizer
pad_token = tokenizer.eos_token
tokenizer.add_tokens([pad_token])


tokenized_data_train_list = tokenize_and_pad(mapped_list_1, 1018, 1024)
tokenized_data_val_list = tokenize_and_pad(mapped_list_2, 1018, 1024)
# question_context_list_train, answers_list_train = zip(*tokenized_data_train_list)
# question_context_list_val, answers_list_val = zip(*tokenized_data_val_list)
# print(len(question_context_list_train),len(answers_list_train))

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [13]:
print(len(tokenized_data_train_list))
print(tokenized_data_train_list[0][0])
print("\n\n",tokenized_data_train_list[0][1])
##print("\n\n",tokenized_data_train_list)

100
tensor([25652,  1058,   307,  ..., 50256, 50256, 50256])


 tensor([17660,  6303,    82,  ..., 50256, 50256, 50256])


In [14]:
# Initialize lists to store input and target ids
input_ids_train = []
target_ids_train = []

# Specify maximum lengths
max_question_length = 1018
max_answer_length = 1024

# Iterate through the tokenized data
for padded_question_tokens, padded_answer_tokens in tokenized_data_train_list:
    # Truncate question tokens if greater than max_question_length
    truncated_question_tokens = padded_question_tokens[:max_question_length]

    # Truncate answer tokens if greater than max_answer_length
    truncated_answer_tokens = padded_answer_tokens[:max_answer_length]

    # Append truncated question tokens to input_ids_train
    input_ids_train.append(truncated_question_tokens)

    # Append truncated answer tokens to target_ids_train
    target_ids_train.append(truncated_answer_tokens)

# Convert the lists to PyTorch tensors
input_ids_train = torch.stack(input_ids_train)
target_ids_train = torch.stack(target_ids_train)

In [15]:
print(input_ids_train.shape)
print(target_ids_train.shape)

torch.Size([100, 1018])
torch.Size([100, 1024])


In [16]:
# Initialize lists to store input and target ids
input_ids_val = []
target_ids_val = []

# Specify maximum lengths
max_question_length = 1018
max_answer_length = 1024

# Iterate through the tokenized data
for padded_question_tokens, padded_answer_tokens in tokenized_data_val_list:
    # Truncate question tokens if greater than max_question_length
    truncated_question_tokens = padded_question_tokens[:max_question_length]

    # Truncate answer tokens if greater than max_answer_length
    truncated_answer_tokens = padded_answer_tokens[:max_answer_length]

    # Append truncated question tokens to input_ids_train
    input_ids_val.append(truncated_question_tokens)

    # Append truncated answer tokens to target_ids_train
    target_ids_val.append(truncated_answer_tokens)

# Convert the lists to PyTorch tensors
input_ids_val = torch.stack(input_ids_val)
target_ids_val = torch.stack(target_ids_val)

In [18]:
# Load GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
gpt2_model = GPT2LMHeadModel.from_pretrained(model_name)

# Define the number of prompts and embedding size
num_prompts = 6  # "summarize the following text"
embedding_size = 768

# Define a specific sentence
sentence = "Answer the question using given context"

# Tokenize the sentence
input_ids = tokenizer.encode(sentence, return_tensors='pt')

# Get the embeddings for the input_ids from the GPT-2 model
gpt2_embeddings = gpt2_model.transformer.wte(input_ids)

# Create an embedding layer for soft prompts and initialize with the sentence embeddings
soft_prompt_embeddings = nn.Embedding(num_prompts, embedding_size)
soft_prompt_embeddings.weight.data.copy_(gpt2_embeddings.squeeze(0))

# Concatenate soft prompt embeddings at the beginning of the input sequence
class GPT2WithPromptTuning(nn.Module):
    def __init__(self, gpt2_model, soft_prompt_embeddings):
        super(GPT2WithPromptTuning, self).__init__()
        self.gpt2_model = gpt2_model
        self.soft_prompt_embeddings = soft_prompt_embeddings
    
    def forward(self, input_ids, soft_prompt_ids):
        # Get the embeddings for the input_ids from the GPT-2 model
        gpt2_embeddings = self.gpt2_model.transformer.wte(input_ids)
        # Get the embeddings for the soft prompts
        soft_prompt_embeds = self.soft_prompt_embeddings(soft_prompt_ids)
        # print(gpt2_embeddings.shape,soft_prompt_embeds.shape )
#         print("gpt2_embeddings",gpt2_embeddings.shape,"soft_prompt_embeds",soft_prompt_embeds.shape)
        # Concatenate the embeddings
        embeddings = torch.cat([soft_prompt_embeds, gpt2_embeddings], dim=0)
        
        # Pass the concatenated embeddings through the GPT-2 model
        outputs = self.gpt2_model(inputs_embeds=embeddings)
        
        return outputs

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [19]:
# Initialize the model
model = GPT2WithPromptTuning(gpt2_model, soft_prompt_embeddings)

# Freeze GPT-2 model weights
for param in model.gpt2_model.parameters():
    param.requires_grad = False

# Define hyperparameters
batch_size = 4
epochs = 2
learning_rate = 2e-3
gradient_clip_value = 1.0

# Define optimizer and criterion
optimizer = torch.optim.AdamW(model.soft_prompt_embeddings.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss(ignore_index=-100)

soft_prompt_ids = torch.tensor([0, 1, 2, 3 ,4 ,5]).to("cuda")

In [20]:
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu
from rouge_score import rouge_scorer
from transformers import BertTokenizer, BertForMaskedLM, BertModel
from bert_score import BERTScorer

device = "cuda"
# Move model to GPU
model.to(device)


# Training loop
for epoch in range(epochs):
    # Create a tqdm progress bar for the training data
    data_iterator = tqdm(zip(input_ids_train, target_ids_train), desc=f'Epoch {epoch + 1}', total=len(input_ids_train))
    
    for input_ids, target_ids in data_iterator:
        optimizer.zero_grad()

        # Move input and target tensors to GPU
        input_ids, target_ids = input_ids.to(device), target_ids.to(device)
        
        # Assuming you have a soft_prompt_ids for each training instance
        # If not, you might need to modify this part accordingly
        outputs = model(input_ids, soft_prompt_ids.to(device))
        logits = outputs.logits if hasattr(outputs, "logits") else outputs.last_hidden_state

        loss = criterion(logits, target_ids)
        loss.backward()

        # Gradient clipping to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip_value)

        optimizer.step()

        # Update the progress bar description with the current loss
        data_iterator.set_postfix(loss=loss.item())

        # Convert tensor predictions and references to lists
        predictions = logits.argmax(dim=-1).squeeze(0).tolist()
        references = target_ids.squeeze(0).tolist()

        # Calculate BLEU Score for training
        bleu_score = calculate_bleu_score([tokenizer.decode(predictions)], [tokenizer.decode(references)])
        train_bleu_scores.append(bleu_score)

        # Calculate BERTScore for training
        bert_precision, bert_recall, bert_f1 = calculate_bert_score([tokenizer.decode(predictions)], [tokenizer.decode(references)])
        train_bert_scores.append(bert_f1)

        # Calculate ROUGE Scores for training
        rouge1, rouge2, rougeL = calculate_rouge_scores([tokenizer.decode(predictions)], [tokenizer.decode(references)])
        train_rouge1_scores.append(rouge1)
        train_rouge2_scores.append(rouge2)
        train_rougeL_scores.append(rougeL)

    # Validation loop
    model.eval()
    val_losses = []
    val_bleu_scores_epoch = []
    val_bert_scores_epoch = []
    val_rouge1_scores_epoch = []
    val_rouge2_scores_epoch = []
    val_rougeL_scores_epoch = []
    with torch.no_grad():
        for input_ids_val, target_ids_val in zip(input_ids_val, target_ids_val):
            input_ids_val, target_ids_val = input_ids_val.to(device), target_ids_val.to(device)
            outputs_val = model(input_ids_val, soft_prompt_ids.to(device))
            logits_val = outputs_val.logits if hasattr(outputs_val, "logits") else outputs_val.last_hidden_state
            loss_val = criterion(logits_val, target_ids_val)
            val_losses.append(loss_val.item())

            # Convert tensor predictions and references to lists
            predictions_val = logits_val.argmax(dim=-1).squeeze(0).tolist()
            references_val = target_ids_val.squeeze(0).tolist()

            # Calculate BLEU Score for validation
            bleu_score_val = calculate_bleu_score([tokenizer.decode(predictions_val)], [tokenizer.decode(references_val)])
            
            val_bleu_scores_epoch.append(bleu_score_val)

            # Calculate BERTScore for validation
            bert_precision_val, bert_recall_val, bert_f1_val = calculate_bert_score([tokenizer.decode(predictions_val)], [tokenizer.decode(references_val)])
            val_bert_scores_epoch.append(bert_f1_val)

            # Calculate ROUGE Scores for validation
            rouge1_val, rouge2_val, rougeL_val = calculate_rouge_scores([tokenizer.decode(predictions_val)], [tokenizer.decode(references_val)])
            val_rouge1_scores_epoch.append(rouge1_val)
            val_rouge2_scores_epoch.append(rouge2_val)
            val_rougeL_scores_epoch.append(rougeL_val)

    # Calculate average validation loss
    avg_val_loss = sum(val_losses) / len(val_losses)
    print("epoch :", epoch + 1,"train_loss :", loss.item(),"val_loss :", avg_val_loss)

    # Calculate average validation scores
    avg_bleu_score_val = sum(val_bleu_scores_epoch) / len(val_bleu_scores_epoch)
    avg_bert_score_val = sum(val_bert_scores_epoch) / len(val_bert_scores_epoch)
    avg_rouge1_score_val = sum(val_rouge1_scores_epoch) / len(val_rouge1_scores_epoch)
    avg_rouge2_score_val = sum(val_rouge2_scores_epoch) / len(val_rouge2_scores_epoch)
    avg_rougeL_score_val = sum(val_rougeL_scores_epoch) / len(val_rougeL_scores_epoch)

    print("Validation BLEU Score:", avg_bleu_score_val)
    print("Validation BERTScore:", avg_bert_score_val)
    print("Validation ROUGE-1 Score:", avg_rouge1_score_val)
    print("Validation ROUGE-2 Score:", avg_rouge2_score_val)
    print("Validation ROUGE-L Score:", avg_rougeL_score_val)

    # Append validation scores
    val_bleu_scores.append(avg_bleu_score_val)
    val_bert_scores.append(avg_bert_score_val)
    val_rouge1_scores.append(avg_rouge1_score_val)
    val_rouge2_scores.append(avg_rouge2_score_val)
    val_rougeL_scores.append(avg_rougeL_score_val)

    # Set the model back to training mode
    model.train()

# Close the tqdm progress bar
data_iterator.close()

Epoch 1:   0%|          | 0/100 [00:01<?, ?it/s, loss=12.2]2024-06-11 13:01:00.601320: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-11 13:01:00.601445: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-11 13:01:00.726488: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Epoch 1: 100%|██████████| 100/100 [02:25<00:00,  1.46s/it, loss=0.0877]


epoch : 1 train_loss : 0.08770941942930222 val_loss : 0.08353108167648315
Validation BLEU Score: 0.0
Validation BERTScore: tensor(0.9858)
Validation ROUGE-1 Score: 0.9959803621532478
Validation ROUGE-2 Score: 0.9958364711772846
Validation ROUGE-L Score: 0.9959803621532478


Epoch 2: 100%|██████████| 100/100 [02:24<00:00,  1.45s/it, loss=0.455]


RuntimeError: Tensors must have same number of dimensions: got 2 and 1

In [21]:
# Calculate average scores for training
avg_train_bleu_score = sum(train_bleu_scores) / len(train_bleu_scores)
avg_train_bert_score = sum(train_bert_scores) / len(train_bert_scores)
avg_train_rouge1_score = sum(train_rouge1_scores) / len(train_rouge1_scores)
avg_train_rouge2_score = sum(train_rouge2_scores) / len(train_rouge2_scores)
avg_train_rougeL_score = sum(train_rougeL_scores) / len(train_rougeL_scores)

print("Average Training BLEU Score:", avg_train_bleu_score)
print("Average Training BERTScore:", avg_train_bert_score)
print("Average Training ROUGE-1 Score:", avg_train_rouge1_score)
print("Average Training ROUGE-2 Score:", avg_train_rouge2_score)
print("Average Training ROUGE-L Score:", avg_train_rougeL_score)

# Calculate average scores for validation
avg_val_bleu_score = sum(val_bleu_scores) / len(val_bleu_scores)
avg_val_bert_score = sum(val_bert_scores) / len(val_bert_scores)
avg_val_rouge1_score = sum(val_rouge1_scores) / len(val_rouge1_scores)
avg_val_rouge2_score = sum(val_rouge2_scores) / len(val_rouge2_scores)
avg_val_rougeL_score = sum(val_rougeL_scores) / len(val_rougeL_scores)

print("Average Validation BLEU Score:", avg_val_bleu_score)
print("Average Validation BERTScore:", avg_val_bert_score)
print("Average Validation ROUGE-1 Score:", avg_val_rouge1_score)
print("Average Validation ROUGE-2 Score:", avg_val_rouge2_score)
print("Average Validation ROUGE-L Score:", avg_val_rougeL_score)

Average Training BLEU Score: 0.0017047553984649772
Average Training BERTScore: tensor(0.7555)
Average Training ROUGE-1 Score: 0.4488093882145418
Average Training ROUGE-2 Score: 0.43119072490326354
Average Training ROUGE-L Score: 0.4487952588043352
Average Validation BLEU Score: 0.0
Average Validation BERTScore: tensor(0.9858)
Average Validation ROUGE-1 Score: 0.9959803621532478
Average Validation ROUGE-2 Score: 0.9958364711772846
Average Validation ROUGE-L Score: 0.9959803621532478


In [22]:
 # Save model weights
torch.save(model.state_dict(), 'QNA.pth')

In [23]:
# Load tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Load model architecture
model = GPT2WithPromptTuning(gpt2_model, soft_prompt_embeddings)
print("Before ",model)
# Load the saved model weights
model.load_state_dict(torch.load('QNA.pth'))
print("After ",model)

# Move model to the desired device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Before  GPT2WithPromptTuning(
  (gpt2_model): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(50257, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  )
  (soft_prompt

GPT2WithPromptTuning(
  (gpt2_model): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(50257, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  )
  (soft_prompt_embeddi

In [25]:
input_text = "Question: [Which NFL team represented the NFC at Super Bowl 50?] Context: [Carolina Panthers]"
input_ids = tokenizer.encode(input_text, return_tensors='tf')


In [26]:
input_ids.shape

TensorShape([1, 21])

In [31]:
# Move soft_prompt_embeddings tensor to the same device as gpt2_embeddings
soft_prompt_embeds = soft_prompt_embeddings(soft_prompt_ids)
# Unsqueeze to make it 3D

print(soft_prompt_embeds.shape)
# Concatenate the embeddings



torch.Size([6, 768])


In [30]:
soft_prompt_ids = torch.tensor([0, 1, 2, 3, 4, 5]).to(device)
soft_prompt_ids.shape

torch.Size([6])

In [47]:
import tensorflow as tf

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

AttributeError: 'GPT2WithPromptTuning' object has no attribute 'transformer'

In [32]:

# Forward pass
with torch.no_grad():
    outputs = model(input_ids,soft_prompt_ids)
    logits = outputs.logits if hasattr(outputs, "logits") else outputs.last_hidden_state

# Get predicted token IDs
predicted_token_ids = torch.argmax(logits, dim=None).squeeze()


TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not tensorflow.python.framework.ops.EagerTensor

In [55]:
gpt2_embeddings.shape

torch.Size([1, 6, 768])

In [56]:
soft_prompt_embeds.shape


torch.Size([1, 6, 768])

In [44]:
model

GPT2WithPromptTuning(
  (gpt2_model): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(50257, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  )
  (soft_prompt_embeddi

In [91]:
foundational_outputs_prompt = get_outputs(model, input_ids, max_new_tokens=100)

print(tokenizer.batch_decode(foundational_outputs_prompt, skip_special_tokens=True))

NameError: name 'get_outputs' is not defined

In [43]:
from transformers import pipeline

In [76]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

AttributeError: 'GPT2WithPromptTuning' object has no attribute 'config'

In [78]:
q= "question:Which NFL team won Super Bowl 50?"
a = "answer : Denver Broncos"
prompt = f"Based on the answer to this question, respond with a complete phrase and no explanation {q} {a}"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200,temperature = 0)
result = pipe(prompt)
print(result)

AttributeError: 'GPT2WithPromptTuning' object has no attribute 'config'

In [None]:
from transformers import GPT2Tokenizer, TFGPT2Model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2Model.from_pretrained('gpt2')
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='tf')
output = model(encoded_input)
print(output.decode)