## Original Generative

In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from tqdm.notebook import tqdm

# Load pre-trained model and tokenizer
model_name = "distilgpt2"
print(f"Loading the {model_name} model and tokenizer...")
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

# Function to generate title based on tags
def generate_title(tags):
    # Prepare a prompt for the model using the aggregated tags
    prompt = f"Write a title for recipes that mainly involve {', '.join(tags[:-1])}, and {tags[-1]}."
    
    # Encode the prompt to tensor
    print("Encoding the prompt...")
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    
    # Generate text from the model using beam search
    print("Generating title from the model...")
    with torch.no_grad():
        output = model.generate(
            input_ids, 
            max_length=100, 
            num_return_sequences=1, 
            pad_token_id=tokenizer.eos_token_id,
            num_beams=5,
            early_stopping=True
        )
    
    # Decode the generated text
    print("Decoding the generated title...")
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Extract only the generated portion (excluding the prompt)
    generated_title = generated_text.replace(prompt, "").strip()
    return generated_title

# Example with tqdm progress bar
tags_list = [["chocolate", "cake", "easy"], ["chicken", "spicy", "grilled"], ["vegetarian", "healthy", "salad"]]
for tags in tqdm(tags_list, desc="Processing tags"):
    print(f"\nGenerating title for tags: {tags}")
    title = generate_title(tags)
    print(f"Generated Title: {title}")
    print("---------------------------------------------------")


Loading the distilgpt2 model and tokenizer...


Processing tags:   0%|          | 0/3 [00:00<?, ?it/s]


Generating title for tags: ['chocolate', 'cake', 'easy']
Encoding the prompt...
Generating title from the model...
Decoding the generated title...
Generated Title: 
---------------------------------------------------

Generating title for tags: ['chicken', 'spicy', 'grilled']
Encoding the prompt...
Generating title from the model...
Decoding the generated title...
Generated Title: 
---------------------------------------------------

Generating title for tags: ['vegetarian', 'healthy', 'salad']
Encoding the prompt...
Generating title from the model...
Decoding the generated title...
Generated Title: 
---------------------------------------------------


## Summarization

In [2]:
from transformers import BartForConditionalGeneration, BartTokenizer
from tqdm.notebook import tqdm

# Load pre-trained model and tokenizer
model_name = "sshleifer/distilbart-cnn-12-6"
print(f"Loading the {model_name} model and tokenizer...")
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

# Function to generate title based on tags
def generate_title(tags):
    # Prepare a prompt for the model using the aggregated tags
    prompt = f"Write a title for recipes that mainly involve {', '.join(tags[:-1])}, and {tags[-1]}."
    
    # Generate summary from the model
    print("Generating title from the model...")
    with torch.no_grad():
        input_ids = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).input_ids
        summary_ids = model.generate(input_ids, max_length=100, num_beams=5, length_penalty=2.0, early_stopping=True)

    
    # Decode the generated text
    print("Decoding the generated title...")
    generated_title = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return generated_title

# Example with tqdm progress bar
tags_list = [["chocolate", "cake", "easy"], ["chicken", "spicy", "grilled"], ["vegetarian", "healthy", "salad"]]
for tags in tqdm(tags_list, desc="Processing tags"):
    print(f"\nGenerating title for tags: {tags}")
    title = generate_title(tags)
    print(f"Generated Title: {title}")
    print("---------------------------------------------------")


Loading the sshleifer/distilbart-cnn-12-6 model and tokenizer...


Processing tags:   0%|          | 0/3 [00:00<?, ?it/s]


Generating title for tags: ['chocolate', 'cake', 'easy']
Generating title from the model...
Decoding the generated title...
Generated Title:  Write a title for recipes that mainly involve chocolate, cake, and easy. Write one title for a recipe that mainly involves chocolate and cake. Do you know a recipe for easy chocolate recipes? Share it with us on Facebook and Twitter @mailonline.com for more information.
---------------------------------------------------

Generating title for tags: ['chicken', 'spicy', 'grilled']
Generating title from the model...
Decoding the generated title...
Generated Title:  Write a title for recipes that mainly involve chicken, spicy, and grilled. Include recipes that involve chicken and spicy, such as grilled chicken or spicy chicken. Use this title to help you create a new recipe for a new cookbook called "Chilli Grilled" The title is based on recipes that include chicken and grilled chicken.
---------------------------------------------------

Generatin

## Summarization Improved 

In [3]:
from transformers import BartForConditionalGeneration, BartTokenizer
from tqdm.notebook import tqdm
from collections import defaultdict

# Load pre-trained model and tokenizer
model_name = "sshleifer/distilbart-cnn-12-6"
print(f"Loading the {model_name} model and tokenizer...")
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

# Rank tags based on their scores
def rank_tags(tags_data):
    tag_scores = defaultdict(float)
    tag_counts = defaultdict(int)
    
    for tag, score in tags_data:
        tag_scores[tag] += score
        tag_counts[tag] += 1
        
    # Average the scores
    for tag in tag_scores:
        tag_scores[tag] /= tag_counts[tag]
        
    # Sort tags based on scores
    sorted_tags = sorted(tag_scores.keys(), key=lambda x: tag_scores[x], reverse=True)
    
    return sorted_tags[:3]  # Take top 3 tags

# Function to generate title based on tags
def generate_title(tags):
    top_tags = rank_tags(tags)
    
    # Refined prompt
    prompt = f"Summarize recipes with the following attributes in three words: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."
    
    print("Generating title from the model...")
    with torch.no_grad():
        input_ids = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).input_ids
        summary_ids = model.generate(input_ids, 
                                     max_length=50,
                                     min_length=5,  # Explicitly set the minimum length
                                     num_beams=5, 
                                     length_penalty=2.0, 
                                     early_stopping=True,
                                     temperature=0.8,  # Adjust temperature
                                     top_k=20)  # Adjust top_k
    
    # Decode the generated text
    print("Decoding the generated title...")
    generated_title = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return generated_title

# Example with tqdm progress bar
tags_data = [[('chocolate', 0.9), ('cake', 0.8), ('easy', 0.7)],
             [('chicken', 0.9), ('spicy', 0.8), ('grilled', 0.7)],
             [('vegetarian', 0.9), ('healthy', 0.8), ('salad', 0.7)]]

for tags in tqdm(tags_data, desc="Processing tags"):
    print(f"\nGenerating title for tags: {tags}")
    title = generate_title(tags)
    print(f"Generated Title: {title}")
    print("---------------------------------------------------")


Loading the sshleifer/distilbart-cnn-12-6 model and tokenizer...


Processing tags:   0%|          | 0/3 [00:00<?, ?it/s]


Generating title for tags: [('chocolate', 0.9), ('cake', 0.8), ('easy', 0.7)]
Generating title from the model...
Decoding the generated title...
Generated Title:  Summarize recipes with the following attributes in three words: chocolate, cake, and easy.
---------------------------------------------------

Generating title for tags: [('chicken', 0.9), ('spicy', 0.8), ('grilled', 0.7)]
Generating title from the model...
Decoding the generated title...
Generated Title:  Summarize recipes with the following attributes in three words: chicken, spicy, and grilled.
---------------------------------------------------

Generating title for tags: [('vegetarian', 0.9), ('healthy', 0.8), ('salad', 0.7)]
Generating title from the model...
Decoding the generated title...
Generated Title:  Summarize recipes with the following attributes in three words: vegetarian, healthy, and salad.
---------------------------------------------------


## 2

In [4]:
import pandas as pd
from transformers import BartForConditionalGeneration, BartTokenizer
from tqdm.notebook import tqdm
from collections import defaultdict

# Load pre-trained model and tokenizer
model_name = "sshleifer/distilbart-cnn-12-6"
print(f"Loading the {model_name} model and tokenizer...")
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

# Rank tags based on their scores
def rank_tags(tags_data):
    tag_scores = defaultdict(float)
    tag_counts = defaultdict(int)
    
    for tag, score in tags_data:
        tag_scores[tag] += score
        tag_counts[tag] += 1
        
    # Average the scores
    for tag in tag_scores:
        tag_scores[tag] /= tag_counts[tag]
        
    # Sort tags based on scores
    sorted_tags = sorted(tag_scores.keys(), key=lambda x: tag_scores[x], reverse=True)
    
    return sorted_tags[:3]  # Take top 3 tags

# Function to generate title based on tags
def generate_title(tags):
    top_tags = rank_tags(tags)
    
    print(f"Top Tags = {top_tags}")
    
    # Refined prompt
    #prompt = f"Summarize recipes with the following attributes in three words: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."
    
    #1
    #prompt = f"Provide a three-word essence of recipes featuring: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."
    #2
    #prompt = f"In just three words, capture the spirit of recipes that incorporate: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."  
    #3
    #prompt = f"Give a three-word snapshot of dishes primarily using: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."    
    #4
    #prompt = f"Encapsulate in three words the core of recipes centered around: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."
    #5
    #prompt = f"Boil down the essence of recipes highlighting: {', '.join(top_tags[:-1])}, and {top_tags[-1]} into three words."
    #6
    #prompt = f"How would you describe in three words the dishes that are rich in: {', '.join(top_tags[:-1])}, and {top_tags[-1]}?"
    #7
    #prompt = f"If you had to pick three words to represent recipes with: {', '.join(top_tags[:-1])}, and {top_tags[-1]}, what would they be?"
    #8
    #prompt = f"Capture the main theme of recipes that include: {', '.join(top_tags[:-1])}, and {top_tags[-1]} in just three words."
    #9
    #prompt = f"Give a brief three-word overview of dishes that are all about: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."  
    #10
    prompt = f"Distill the culinary essence of dishes starring: {', '.join(top_tags[:-1])}, and {top_tags[-1]} into a three-word snapshot."    
    
    
    print(prompt)
    
    
    
    
    
    print("Generating title from the model...")
    with torch.no_grad():
        input_ids = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).input_ids
        summary_ids = model.generate(input_ids, 
                                     max_length=50,
                                     min_length=5,
                                     num_beams=5, 
                                     length_penalty=2.0, 
                                     early_stopping=True,
                                     temperature=1.2,
                                     top_k=30,
                                     top_p=0.95)
    
    # Decode the generated text
    print("Decoding the generated title...")
    generated_title = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return generated_title

# Load the dataset
df = pd.read_csv("examples.csv")

def process_row(row):
    # Extract classes and scores
    classes = row["Predicted Classes"].split(", ")
    scores = list(map(float, row["Raw Similarity Scores"].split(", ")))
    
    # Pair classes with scores
    tags_scores = list(zip(classes, scores))
    
    return tags_scores

# Process the DataFrame to get paired classes and scores
df["Tags"] = df.apply(process_row, axis=1)

# Randomly select 10 recipes
selected_recipes = df.sample(10)

# Aggregate tags from selected recipes
all_tags = []
for _, row in selected_recipes.iterrows():
    all_tags.extend(row["Tags"])

# Generate title using aggregated tags
print("\nGenerating title for aggregated tags from 10 recipes:")
print(f"All tags =  {all_tags}")
title = generate_title(all_tags)
print(f"Generated Title: {title}")
print("---------------------------------------------------")


Loading the sshleifer/distilbart-cnn-12-6 model and tokenizer...

Generating title for aggregated tags from 10 recipes:
All tags =  [('Pasta Primavera', 0.65137124), ('Homemade Pasta', 0.6164523), ('Pasta Salads', 0.60546464), ('Ravioli', 0.5996085), ('Chicken Salads', 0.5977807), ('Flan', 0.59315884), ('Chocolate Fudge', 0.53663886), ('Spanish Rice', 0.50429004), ('Cheese Fondue', 0.47348017), ('Chicken Cacciatore', 0.47016305), ('Pulled Pork', 0.6835002), ('Pork Tenderloin', 0.5930016), ('Pork Shoulder', 0.5721208), ('Ground Pork', 0.57033366), ('Pork Ribs', 0.5457189), ("Shepherd's Pie", 0.75314134), ('Sweet Potato Pie', 0.59521466), ('Mincemeat Pie', 0.59072185), ('Apple Pie', 0.5759548), ('Rhubarb Pie', 0.5733078), ('Rhubarb Pie', 0.6407865), ('Creme Brulee', 0.5217254), ("Shepherd's Pie", 0.5121667), ('Breakfast Casseroles', 0.50978476), ('Blueberry Pie', 0.5065649), ('Panini', 0.50231516), ('Casseroles', 0.4743449), ('Rice Casseroles', 0.4580714), ('Breakfast Casseroles', 0.4566

## 3

In [5]:
import pandas as pd
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from tqdm.notebook import tqdm
from collections import defaultdict

# Load GPT-2 model and tokenizer
model_name = "gpt2-medium"
print(f"Loading the {model_name} model and tokenizer...")
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

# Rank tags based on their scores
def rank_tags(tags_data):
    tag_scores = defaultdict(float)
    tag_counts = defaultdict(int)
    
    for tag, score in tags_data:
        tag_scores[tag] += score
        tag_counts[tag] += 1
        
    # Average the scores
    for tag in tag_scores:
        tag_scores[tag] /= tag_counts[tag]
        
    # Sort tags based on scores
    sorted_tags = sorted(tag_scores.keys(), key=lambda x: tag_scores[x], reverse=True)
    
    return sorted_tags[:3]  # Take top 3 tags

# Function to generate title based on tags
def generate_title(tags):
    top_tags = rank_tags(tags)
    
    print(f"Top Tags = {top_tags}")
    
    # Refined prompt
    prompt = f"Provide a three-word essence of recipes featuring: {', '.join(top_tags[:-1])}, and {top_tags[-1]}."
    print(prompt)
    
    print("Generating title from the model...")
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    with torch.no_grad():
        output = model.generate(input_ids, 
                                max_length=100, 
                                temperature=1.2, 
                                top_k=30, 
                                top_p=0.95, 
                                num_return_sequences=1)
    
    # Decode the generated text
    print("Decoding the generated title...")
    generated_title_full = tokenizer.decode(output[0], skip_special_tokens=True)
    generated_title = generated_title_full[len(prompt):].strip()
    
    return generated_title

# Load the dataset
df = pd.read_csv("examples.csv")

def process_row(row):
    # Extract classes and scores
    classes = row["Predicted Classes"].split(", ")
    scores = list(map(float, row["Raw Similarity Scores"].split(", ")))
    
    # Pair classes with scores
    tags_scores = list(zip(classes, scores))
    
    return tags_scores

# Process the DataFrame to get paired classes and scores
df["Tags"] = df.apply(process_row, axis=1)

# Randomly select 10 recipes
selected_recipes = df.sample(10)

# Aggregate tags from selected recipes
all_tags = []
for _, row in selected_recipes.iterrows():
    all_tags.extend(row["Tags"])

# Generate title using aggregated tags
print("\nGenerating title for aggregated tags from 10 recipes:")
print(f"All tags =  {all_tags}")
title = generate_title(all_tags)
print(f"Generated Title: {title}")
print("---------------------------------------------------")

Loading the gpt2-medium model and tokenizer...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Generating title for aggregated tags from 10 recipes:
All tags =  [('Turkey Brines', 0.47434875), ('Beef Tenderloin', 0.4702204), ('Filet Mignon', 0.46177846), ('Pork Tenderloin', 0.44696778), ('Beef Stews', 0.41616905), ('Brownies', 0.65157694), ('Peanut Butter Cookies', 0.5190474), ('Chocolate Fudge', 0.51230067), ('Chocolate Cakes', 0.50989646), ('Breakfast Casseroles', 0.48654664), ('Quinoa', 0.52076924), ('Chilaquiles', 0.51118153), ('Fruit Salads', 0.49696106), ('Cranberry Sauces', 0.45759338), ('Key Lime Pie', 0.4526069), ('Chicken Salads', 0.5600661), ('Chicken and Dumplings', 0.5522183), ('Buffalo Chicken Dips', 0.5382873), ('Chicken Noodle Soups', 0.5198916), ('Ground Chicken', 0.5193598), ('Spanish Rice', 0.6158781), ('Rice Casseroles', 0.5451129), ('Shrimp and Grits', 0.5381528), ('Fried Rice', 0.533735), ('Rice Pilaf', 0.53004956), ('Apple Pie', 0.6685284), ('Slab Pie', 0.6171397), ('Pecan Pie', 0.59832394), ('Pie Crusts', 0.59374917), ('Rhubarb Pie', 0.5916555), ('Chicke

## 4

In [6]:
import pandas as pd
from transformers import BartForConditionalGeneration, BartTokenizer
from tqdm.notebook import tqdm
from collections import defaultdict

# Load pre-trained model and tokenizer
model_name = "facebook/bart-large-cnn"
print(f"Loading the {model_name} model and tokenizer...")
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

def rank_tags(tags_data):
    tag_scores = defaultdict(float)
    tag_counts = defaultdict(int)
    
    for tag, score in tags_data:
        tag_scores[tag] += score
        tag_counts[tag] += 1
        
    for tag in tag_scores:
        tag_scores[tag] /= tag_counts[tag]
        
    sorted_tags = sorted(tag_scores.keys(), key=lambda x: tag_scores[x], reverse=True)
    
    return sorted_tags[:3]

def generate_title(tags):
    top_tags = rank_tags(tags)
    print(f"Top Tags = {top_tags}")
    prompt = f"Recipes highlighting: {', '.join(top_tags[:-1])}, and {top_tags[-1]}. Describe in a few words."
    print(prompt)
    
    with torch.no_grad():
        input_ids = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).input_ids
        summary_ids = model.generate(input_ids, 
                                     max_length=100,
                                     min_length=10,
                                     num_beams=7, 
                                     length_penalty=1.5, 
                                     early_stopping=True,
                                     temperature=0.9,
                                     top_k=30,
                                     top_p=0.95)
    
    generated_title = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return generated_title

df = pd.read_csv("examples.csv")

def process_row(row):
    classes = row["Predicted Classes"].split(", ")
    scores = list(map(float, row["Raw Similarity Scores"].split(", ")))
    tags_scores = list(zip(classes, scores))
    return tags_scores

df["Tags"] = df.apply(process_row, axis=1)
selected_recipes = df.sample(10)

all_tags = []
for _, row in selected_recipes.iterrows():
    all_tags.extend(row["Tags"])

print("\nGenerating title for aggregated tags from 10 recipes:")
print(f"All tags =  {all_tags}")
title = generate_title(all_tags)
print(f"Generated Title: {title}")
print("---------------------------------------------------")


Loading the facebook/bart-large-cnn model and tokenizer...

Generating title for aggregated tags from 10 recipes:
All tags =  [('Pot Roast', 0.5545994), ('Beef Stews', 0.50860476), ('Beef Recipes', 0.5063336), ('Ground Turkey', 0.4858606), ('Stuffed Peppers', 0.484233), ('Truffles', 0.6374452), ('Chocolate Cakes', 0.44036984), ('Chocolate Fudge', 0.43471563), ('Coffee Cakes', 0.40213692), ('Baked Beans', 0.39306107), ('Chocolate Fudge', 0.5347718), ('Creme Brulee', 0.5260148), ('Cheese Fondue', 0.5215686), ('Key Lime Pie', 0.50871575), ('Truffles', 0.5070548), ('Cheesecakes', 0.5782757), ('Strawberry Shortcakes', 0.51190835), ('Cheese Fondue', 0.5086514), ('Strawberry Pie', 0.48128277), ('Cherry Pie', 0.4651184), ('Pancit', 0.5637951), ('Lasagna', 0.55923486), ('Pasta Carbonara', 0.5531048), ('Fettuccini', 0.54990935), ('Panini', 0.5405712), ('Pasta Salads', 0.62125194), ('Chicken Salads', 0.587828), ('Broccoli Salads', 0.5715915), ('Fruit Salads', 0.5669699), ('Potato Salads', 0.55193