In [2]:
pip install datasets



In [3]:
pip install rouge_score



Getting the Libraries and the Data

In [4]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

data = pd.read_json("hf://datasets/toughdata/quora-question-answer-dataset/Quora-QuAD.jsonl", lines=True)

In [5]:
data.head(2)

Unnamed: 0,question,answer
0,Why whenever I get in the shower my girlfriend...,Isn’t it awful? You would swear that there was...
1,"What is a proxy, and how can I use one?",A proxy server is a system or router that prov...


In [6]:
# df.sample(30).to_csv("quora_data.csv")

In [7]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download NLTK data
nltk.download('stopwords')
nltk.download('wordnet')


# Display basic information about the dataset
data.info()
# print(data.head())




<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56402 entries, 0 to 56401
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   question  56402 non-null  object
 1   answer    56402 non-null  object
dtypes: object(2)
memory usage: 881.4+ KB


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Data Exploration, Cleaning, and Preprocessing:

In [8]:
# Remove any irrelevant information, e.g., unnamed columns
data = data.loc[:, ~data.columns.str.contains('^Unnamed')]

# Drop rows with missing values
data.dropna(inplace=True)

# Display basic statistics of the dataset


In [9]:
data.describe()


Unnamed: 0,question,answer
count,56402,56402
unique,3234,54726
top,Would Hillary Clinton have made a better Presi...,No\n
freq,106,89


In [10]:
#missing value check
data.isnull().sum()

question    0
answer      0
dtype: int64

In [11]:
# Initialize stop words and lemmatizer
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# Function for text preprocessing
def preprocess_text(text):
    # Remove special characters and digits
    text = re.sub(r'\W', ' ', str(text))
    text = re.sub(r'\d', ' ', text)

    # Tokenize the text
    tokens = text.split()

    # Remove stop words and apply lemmatization
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.lower() not in stop_words]

    # Join tokens back to string
    text = ' '.join(tokens)
    return text




In [12]:
# Apply preprocessing to questions and answers
data['question'] = data['question'].apply(preprocess_text)
data['answer'] = data['answer'].apply(preprocess_text)



In [13]:
# Display the cleaned data
data.head(4)

Unnamed: 0,question,answer
0,whenever get shower girlfriend want join,awful would swear enough hot water go around
1,proxy use one,proxy server system router provides gateway us...
2,song lyric someone left cake rain,MacArthur Park
3,owner adult website called http matureanallove...,let apps liers put add site Like one say free ...


Model Selection and Evaluation:

T-5small model

In [14]:
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_metric

# Define a function to test models
def test_model(model_name, test_data):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    nlp_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

    predictions = []
    references = []
    for index, row in test_data.iterrows():
        question = row['question']
        reference = row['answer']
        generated_answer = nlp_pipeline(question)[0]['generated_text']
        predictions.append(generated_answer)
        references.append(reference)

    return predictions, references



In [15]:

# Load a sample of the test data
test_data = data.sample(n=1200)

# Test a specific model
model_name = "t5-small"
predictions, references = test_model(model_name, test_data)

In [16]:
# # Assuming predictions and references are lists of strings that represent integers
# predictions = [int(p) for p in predictions]
# references = [int(r) for r in references]
# print(type(predictions))
# print(type(references))

In [17]:
# Tokenize predictions and references for BLEU score
predictions_tokenized = [pred.split() for pred in predictions]
references_tokenized = [[ref.split()] for ref in references]

In [18]:
from datasets import load_metric

# Initialize metrics
rouge = load_metric("rouge",trust_remote_code=True)
bleu = load_metric("bleu",trust_remote_code=True)
f1_metric = load_metric("f1",trust_remote_code=True)

# Calculate ROUGE score
rouge_score = rouge.compute(predictions=predictions, references=references)

# Calculate BLEU score
bleu_score = bleu.compute(predictions=predictions_tokenized, references=references_tokenized)

def calculate_f1(predictions, references):
    f1_scores = []
    for pred, ref in zip(predictions, references):
        pred_tokens = set(pred.split())
        ref_tokens = set(ref.split())
        common_tokens = pred_tokens.intersection(ref_tokens)

        if len(common_tokens) == 0:
            f1_scores.append(0)
        else:
            precision = len(common_tokens) / len(pred_tokens)
            recall = len(common_tokens) / len(ref_tokens)
            f1_score = 2 * (precision * recall) / (precision * recall)
            f1_scores.append(f1_score)

    return sum(f1_scores) / len(f1_scores)

# Calculate F1 score
f1_score = calculate_f1(predictions, references)


# Display evaluation results
print(f"ROUGE score: {rouge_score}")
print(f"BLEU score: {bleu_score}")
print(f"F1 score: {f1_score}")


ROUGE score: {'rouge1': AggregateScore(low=Score(precision=0.2686726850673228, recall=0.0463463715089995, fmeasure=0.0616426823957022), mid=Score(precision=0.2869123015626015, recall=0.051771838388229637, fmeasure=0.06654352526576389), high=Score(precision=0.304836324292502, recall=0.05750448630737588, fmeasure=0.07200787525091597)), 'rouge2': AggregateScore(low=Score(precision=0.07712094299695411, recall=0.01044635785631799, fmeasure=0.013873384546633739), mid=Score(precision=0.088681345071051, recall=0.013037337420531835, fmeasure=0.01655862589297207), high=Score(precision=0.10087119369600989, recall=0.016660726309139447, fmeasure=0.01972586816916301)), 'rougeL': AggregateScore(low=Score(precision=0.2480564759602034, recall=0.0426275641315048, fmeasure=0.05623833005613463), mid=Score(precision=0.2649587618944239, recall=0.04762451861707248, fmeasure=0.06101576829730182), high=Score(precision=0.28242001282753265, recall=0.05401746231028957, fmeasure=0.06650081062578458)), 'rougeLsum':

In [19]:
import plotly.express as px
# Create a dataframe for the model performance metrics
performance_data = {
    'metric': ['ROUGE', 'BLEU', 'F1'],
    'score': [rouge_score['rouge1'].mid.fmeasure, bleu_score['bleu'], f1_score]
}

performance_df = pd.DataFrame(performance_data)

# Plot the model performance metrics
fig1 = px.bar(performance_df, x='metric', y='score', title='Model Performance Metrics')
fig1.show()


BERT model

In [20]:

# Function to test models and get predictions and references
def test_model_bert(model_name, test_data):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    nlp_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

    predictions = []
    references = []
    for index, row in test_data.iterrows():
        question = row['question']
        reference = row['answer']
        generated_answer = nlp_pipeline(question)[0]['generated_text']
        predictions.append(generated_answer)
        references.append(reference)

    return predictions, references



In [34]:
test_data = data.sample(n=1200)

# Test a specific model-
model_name = "google/bert2bert_L-24_wmt_en_de"
predictions_b, references_b = test_model_bert(model_name, test_data)


In [35]:
# Tokenize predictions and references for BLEU score
predictions_tokenized_b = [pred.split() for pred in predictions_b]
references_tokenized_b = [[ref.split()] for ref in references_b]

# Calculate ROUGE score
rouge_score_b = rouge.compute(predictions=predictions_b, references=references_b)

# Calculate BLEU score
bleu_score_b = bleu.compute(predictions=predictions_tokenized_b, references=references_tokenized_b)

In [36]:
def calculate_f1_b(predictions, references):
    f1_scores = []
    for pred, ref in zip(predictions, references):
        pred_tokens = set(pred.split())
        ref_tokens = set(ref.split())
        common_tokens = pred_tokens.intersection(ref_tokens)

        if len(common_tokens) == 0:
            f1_scores.append(0)
        else:
            precision = len(common_tokens) / len(pred_tokens)
            recall = len(common_tokens) / len(ref_tokens)
            f1_score = 2 * (precision * recall) / (precision + recall)
            f1_scores.append(f1_score)

    return sum(f1_scores) / len(f1_scores)

In [37]:
# Calculate F1 score
f1_score_b = calculate_f1_b(predictions_b, references_b)

# Create a dataframe for the model performance metrics
performance_data_b = {
    'metric': ['ROUGE', 'BLEU', 'F1'],
    'score': [rouge_score_b['rouge1'].mid.fmeasure, bleu_score_b['bleu'], f1_score_b]
}
performance_df_b = pd.DataFrame(performance_data_b)

# Plot the model performance metrics
fig5b = px.bar(performance_df_b, x='metric', y='score', title='Model Performance Metrics')
fig5b.show()

In [26]:
# Load the GPT-2 model and tokenizer
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Hyperparameters to tune
# max_lengths = [50, 100]
# temperatures = [0.7, 1.0]
# top_ks = [30, 50]
# top_ps = [0.8, 0.9]

# Ensure truncation and padding are explicitly set
tokenizer.padding_side = "left"  # To pad on the left
tokenizer.pad_token = tokenizer.eos_token  # Set padding token to eos token

# Function to test models and get predictions and references
def test_model_g(test_data):
    generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=50,
        temperature=0.7,
        top_k=50,
        top_p=0.9

    )

    predictions = []
    references = []
    for index, row in test_data.iterrows():
        question = row['question']
        reference = row['answer']
        # Generate answer using GPT-2
        generated_answer = generator(question, num_return_sequences=1)[0]['generated_text']
        predictions.append(generated_answer)
        references.append(reference)

    return predictions, references



# # Test the GPT-2 model
# predictions_g, references_g = test_model_g(test_data)



In [27]:
# Load a sample of the test data
test_data = data.sample(n=1200)
predictions_g, references_g = test_model_g(test_data)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [28]:
# Tokenize predictions and references for BLEU score
predictions_tokenized_g = [pred.split() for pred in predictions_g]
references_tokenized_g = [[ref.split()] for ref in references_g]

# Calculate ROUGE score
rouge_score_g = rouge.compute(predictions=predictions_g, references=references_g)

# Calculate BLEU score
bleu_score_g = bleu.compute(predictions=predictions_tokenized_g, references=references_tokenized_g)

# Function to calculate F1 score based on word overlap
def calculate_f1_g(predictions, references):
    f1_scores = []
    for pred, ref in zip(predictions, references):
        pred_tokens = set(pred.split())
        ref_tokens = set(ref.split())
        common_tokens = pred_tokens.intersection(ref_tokens)

        if len(common_tokens) == 0:
            f1_scores.append(0)
        else:
            precision = len(common_tokens) / len(pred_tokens)
            recall = len(common_tokens) / len(ref_tokens)
            f1_score = 2 * (precision * recall) / (precision + recall)
            f1_scores.append(f1_score)

    return sum(f1_scores) / len(f1_scores)


In [29]:
# Calculate F1 score
f1_score_g = calculate_f1_g(predictions_g, references_g)

# Create a dataframe for the model performance metrics
performance_data_g = {
    'metric': ['ROUGE', 'BLEU', 'F1'],
    'score': [rouge_score_g['rouge1'].mid.fmeasure, bleu_score_g['bleu'], f1_score_g]
}
performance_df_g = pd.DataFrame(performance_data_g)

# Plot the model performance metrics
fig5_g = px.bar(performance_df_g, x='metric', y='score', title='Model Performance Metrics')
fig5_g.show()


Data Visualization

In [30]:
import plotly.express as px
import plotly.graph_objects as go

# Calculate the length of questions and answers
data['question_length'] = data['question'].apply(lambda x: len(x.split()))
data['answer_length'] = data['answer'].apply(lambda x: len(x.split()))

# Visualize the distribution of question lengths
fig1 = px.histogram(data, x='question_length', nbins=50, title='Distribution of Question Lengths')
fig1.show()

# Visualize the distribution of answer lengths
fig2 = px.histogram(data, x='answer_length', nbins=50, title='Distribution of Answer Lengths')
fig2.show()


In [31]:
from collections import Counter

# Get the most common words in questions
question_words = ' '.join(data['question']).split()
question_word_freq = Counter(question_words)
most_common_question_words = question_word_freq.most_common(20)

# Get the most common words in answers
answer_words = ' '.join(data['answer']).split()
answer_word_freq = Counter(answer_words)
most_common_answer_words = answer_word_freq.most_common(20)

# Convert to dataframes for plotting
question_word_df = pd.DataFrame(most_common_question_words, columns=['word', 'count'])
answer_word_df = pd.DataFrame(most_common_answer_words, columns=['word', 'count'])

# Plot most common words in questions
fig3 = px.bar(question_word_df, x='word', y='count', title='Most Common Words in Questions')
fig3.show()

# Plot most common words in answers
fig4 = px.bar(answer_word_df, x='word', y='count', title='Most Common Words in Answers')
fig4.show()


Getting Insights for t5 model

In [32]:
# Insights extraction
def extract_insights(data, performance_data):
    insights = {}

    # 1. Data Distribution Insights
    insights['data_distribution'] = {}
    insights['data_distribution']['question_length'] = {
        'mean': data['question_length'].mean(),
        'median': data['question_length'].median(),
        'mode': data['question_length'].mode()[0]
    }
    insights['data_distribution']['answer_length'] = {
        'mean': data['answer_length'].mean(),
        'median': data['answer_length'].median(),
        'mode': data['answer_length'].mode()[0]
    }

    # 2. Feature Importance Insights
    insights['feature_importance'] = {}
    insights['feature_importance']['top_question_words'] = question_word_df.head(10).to_dict('records')
    insights['feature_importance']['top_answer_words'] = answer_word_df.head(10).to_dict('records')

    # 3. Model Performance Insights
    insights['model_performance'] = {}
    insights['model_performance']['metrics'] = performance_data.to_dict('records')

    return insights

# Generate insights from the analysis
insights = extract_insights(data, performance_df)

# Display insights
import pprint
pprint.pprint(insights)



{'data_distribution': {'answer_length': {'mean': 88.60994290982589,
                                         'median': 39.0,
                                         'mode': 2},
                       'question_length': {'mean': 7.046718201482217,
                                           'median': 5.0,
                                           'mode': 4}},
 'feature_importance': {'top_answer_words': [{'count': 27124, 'word': 'http'},
                                             {'count': 25152, 'word': 'URL'},
                                             {'count': 25126, 'word': 'one'},
                                             {'count': 24983,
                                              'word': 'LINKED_TEXT'},
                                             {'count': 23062, 'word': 'time'},
                                             {'count': 21475, 'word': 'like'},
                                             {'count': 19223, 'word': 'com'},
                                   

Setting up suggestions

In [33]:
# Suggesting Improvements
def suggest_improvements(insights):
    suggestions = []

    # Based on Data Distribution Insights
    q_len_mean = insights['data_distribution']['question_length']['mean']
    q_len_median = insights['data_distribution']['question_length']['median']
    a_len_mean = insights['data_distribution']['answer_length']['mean']
    a_len_median = insights['data_distribution']['answer_length']['median']

    if q_len_mean > 20:
        suggestions.append("Consider simplifying questions for better model understanding.")
    if a_len_mean > 30:
        suggestions.append("Consider shortening answers to improve response time and relevance.")

    # Based on Feature Importance Insights
    top_question_words = [word['word'] for word in insights['feature_importance']['top_question_words']]
    top_answer_words = [word['word'] for word in insights['feature_importance']['top_answer_words']]

    common_words = set(top_question_words).intersection(set(top_answer_words))
    if common_words:
        suggestions.append(f"Common words found in both questions and answers: {', '.join(common_words)}. Consider focusing on these words for better context understanding.")

    # Based on Model Performance Insights
    rouge_score = next(item for item in insights['model_performance']['metrics'] if item["metric"] == "ROUGE")['score']
    bleu_score = next(item for item in insights['model_performance']['metrics'] if item["metric"] == "BLEU")['score']
    f1_score = next(item for item in insights['model_performance']['metrics'] if item["metric"] == "F1")['score']

    if rouge_score < 0.5:
        suggestions.append("Improve ROUGE score by fine-tuning the model with more contextually relevant data.")
    if bleu_score < 0.5:
        suggestions.append("Improve BLEU score by augmenting the training data with varied phrasing.")
    if f1_score < 0.5:
        suggestions.append("Improve F1 score by balancing the dataset and enhancing training examples.")

    return suggestions

# Generate suggestions for improvements
suggestions = suggest_improvements(insights)

# Display suggestions
pprint.pprint(suggestions)


['Consider shortening answers to improve response time and relevance.',
 'Common words found in both questions and answers: people, like, get, one, '
 'would. Consider focusing on these words for better context understanding.',
 'Improve ROUGE score by fine-tuning the model with more contextually relevant '
 'data.',
 'Improve BLEU score by augmenting the training data with varied phrasing.']


Using BERT model

GPT-2 Hyperpameter tuning model
**bold text**

In [None]:
# Load the GPT-2 model and tokenizer
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Hyperparameters to tune
# max_lengths = [50, 100]
# temperatures = [0.7, 1.0]
# top_ks = [30, 50]
# top_ps = [0.8, 0.9]


# Function to test models and get predictions and references
def test_model_g(test_data, max_length, temperature, top_k, top_p):
    generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=50,
        temperature=0.7,
        top_k=50,
        top_p=0.9

    )

    predictions = []
    references = []
    for index, row in test_data.iterrows():
        question = row['question']
        reference = row['answer']
        # Generate answer using GPT-2
        generated_answer = generator(question, num_return_sequences=1)[0]['generated_text']
        predictions.append(generated_answer)
        references.append(reference)

    return predictions, references

# Load a sample of the test data
test_data = data.sample(n=100)

# # Test the GPT-2 model
# predictions_g, references_g = test_model_g(test_data)



Using hyperparameter Tuning

In [None]:
def calculate_f1_g(predictions, references):
    f1_scores = []
    for pred, ref in zip(predictions, references):
        pred_tokens = set(pred.split())
        ref_tokens = set(ref.split())
        common_tokens = pred_tokens.intersection(ref_tokens)

        if len(common_tokens) == 0:
            f1_scores.append(0)
        else:
            precision = len(common_tokens) / len(pred_tokens)
            recall = len(common_tokens) / len(ref_tokens)
            f1_score = 2 * (precision * recall) / (precision + recall)
            f1_scores.append(f1_score)

    return sum(f1_scores) / len(f1_scores)
# Perform hyperparameter tuning
best_scores = {"rouge": 0, "bleu": 0, "f1": 0}
best_params = {}
for max_length in max_lengths:
    for temperature in temperatures:
        for top_k in top_ks:
            for top_p in top_ps:
                print(f"Testing max_length={max_length}, temperature={temperature}, top_k={top_k}, top_p={top_p}")
                predictions_g, references_g = test_model_g(test_data, max_length, temperature, top_k, top_p)

                # Tokenize predictions and references for BLEU score
                predictions_tokenized_g = [pred.split() for pred in predictions_g]
                references_tokenized_g = [[ref.split()] for ref in references_g]

                # Calculate ROUGE score
                rouge_score = rouge.compute(predictions=predictions_g, references=references_g)

                # Calculate BLEU score
                bleu_score = bleu.compute(predictions=predictions_tokenized_g, references=references_tokenized_g)

                # Calculate F1 score
                f1_score = calculate_f1_g(predictions_g, references_g)

                # Update best scores and params if improved
                if rouge_score['rouge1'].mid.fmeasure > best_scores["rouge"]:
                    best_scores["rouge"] = rouge_score['rouge1'].mid.fmeasure
                    best_params["rouge"] = (max_length, temperature, top_k, top_p)

                if bleu_score['bleu'] > best_scores["bleu"]:
                    best_scores["bleu"] = bleu_score['bleu']
                    best_params["bleu"] = (max_length, temperature, top_k, top_p)

                if f1_score > best_scores["f1"]:
                    best_scores["f1"] = f1_score
                    best_params["f1"] = (max_length, temperature, top_k, top_p)

# Print the best hyperparameters and corresponding scores
print("Best ROUGE score:", best_scores["rouge"], "with params:", best_params["rouge"])
print("Best BLEU score:", best_scores["bleu"], "with params:", best_params["bleu"])
print("Best F1 score:", best_scores["f1"], "with params:", best_params["f1"])




Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Testing max_length=50, temperature=0.7, top_k=30, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=50, temperature=0.7, top_k=30, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=50, temperature=0.7, top_k=50, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=50, temperature=0.7, top_k=50, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=50, temperature=1.0, top_k=30, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=50, temperature=1.0, top_k=30, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=50, temperature=1.0, top_k=50, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=50, temperature=1.0, top_k=50, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=0.7, top_k=30, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=0.7, top_k=30, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=0.7, top_k=50, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=0.7, top_k=50, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=1.0, top_k=30, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=1.0, top_k=30, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=1.0, top_k=50, top_p=0.8


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Testing max_length=100, temperature=1.0, top_k=50, top_p=0.9


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Best ROUGE score: 0.0666785246180705 with params: (50, 0.7, 50, 0.9)
Best BLEU score: 0.007893788067917563 with params: (100, 0.7, 30, 0.9)
Best F1 score: 0.06595812534733315 with params: (50, 0.7, 50, 0.9)
