In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
from transformers import BartTokenizer, BartForConditionalGeneration, DataCollatorForSeq2Seq, Trainer, TrainingArguments

# Load your data
df = pd.read_csv("kaggle.csv")
df.dropna(subset=['company', 'description', 'slogan'], inplace=True)

# Prepare the dataset by adding a combined text column and labels
df['text'] = df['company'] + " described as: " + df['description']
df['labels'] = df['slogan']

# Split data into training and evaluation sets
train_df, eval_df = train_test_split(df, test_size=0.1)

# Create datasets
train_dataset = Dataset.from_pandas(train_df[['text', 'labels']])
eval_dataset = Dataset.from_pandas(eval_df[['text', 'labels']])

In [2]:
# Initialize tokenizer and model
model_name = "facebook/bart-large"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Define tokenization and encoding function for datasets
def tokenize_function(examples):
    model_inputs = tokenizer(examples['text'], max_length=512, truncation=True, padding="max_length")
    # Tokenize the labels as well
    labels = tokenizer(examples['labels'], max_length=512, truncation=True, padding="max_length").input_ids
    model_inputs['labels'] = labels
    return model_inputs

# Apply function to datasets
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_eval_dataset = eval_dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/4509 [00:00<?, ? examples/s]

Map:   0%|          | 0/501 [00:00<?, ? examples/s]

In [3]:
# Training arguments
training_args = TrainingArguments(
    output_dir='./bartresults',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=3,
    load_best_model_at_end=True,
    logging_dir='./logs',
    logging_steps=50,
    eval_steps=50,
    save_steps=50,
    warmup_steps=500,
    prediction_loss_only=True,
    fp16=True  # Assuming CUDA and compatible GPU
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model)
)

# Train the model
trainer.train()

# Save the model and tokenizer
model.save_pretrained('./final_bart_model')
tokenizer.save_pretrained('./final_bart_model')

  0%|          | 0/1692 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


{'loss': 15.1772, 'grad_norm': 15.295199394226074, 'learning_rate': 4.5e-06, 'epoch': 0.09}
{'loss': 9.6389, 'grad_norm': 76.83898162841797, 'learning_rate': 9.5e-06, 'epoch': 0.18}
{'loss': 6.1698, 'grad_norm': 40.09737014770508, 'learning_rate': 1.45e-05, 'epoch': 0.27}
{'loss': 4.3322, 'grad_norm': 42.46813201904297, 'learning_rate': 1.9500000000000003e-05, 'epoch': 0.35}
{'loss': 1.8672, 'grad_norm': 21.56826400756836, 'learning_rate': 2.45e-05, 'epoch': 0.44}
{'loss': 0.2469, 'grad_norm': 1.3921730518341064, 'learning_rate': 2.95e-05, 'epoch': 0.53}
{'loss': 0.108, 'grad_norm': 0.9650602340698242, 'learning_rate': 3.45e-05, 'epoch': 0.62}
{'loss': 0.0817, 'grad_norm': 0.5644066333770752, 'learning_rate': 3.9500000000000005e-05, 'epoch': 0.71}
{'loss': 0.0749, 'grad_norm': 0.4798884093761444, 'learning_rate': 4.4500000000000004e-05, 'epoch': 0.8}
{'loss': 0.0756, 'grad_norm': 0.7262111306190491, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.89}
{'loss': 0.0713, 'grad_norm': 0

  0%|          | 0/63 [00:00<?, ?it/s]

Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'eval_loss': 0.06451698392629623, 'eval_runtime': 99.8792, 'eval_samples_per_second': 5.016, 'eval_steps_per_second': 0.631, 'epoch': 1.0}
{'loss': 0.0666, 'grad_norm': 0.4116945266723633, 'learning_rate': 4.6015100671140944e-05, 'epoch': 1.06}
{'loss': 0.0615, 'grad_norm': 0.3676556348800659, 'learning_rate': 4.391778523489933e-05, 'epoch': 1.15}
{'loss': 0.062, 'grad_norm': 0.4194311499595642, 'learning_rate': 4.1820469798657716e-05, 'epoch': 1.24}
{'loss': 0.0612, 'grad_norm': 0.3710213303565979, 'learning_rate': 3.972315436241611e-05, 'epoch': 1.33}
{'loss': 0.0631, 'grad_norm': 0.407609760761261, 'learning_rate': 3.76258389261745e-05, 'epoch': 1.42}
{'loss': 0.0604, 'grad_norm': 0.37391895055770874, 'learning_rate': 3.552852348993288e-05, 'epoch': 1.51}
{'loss': 0.0616, 'grad_norm': 0.7479476928710938, 'learning_rate': 3.343120805369128e-05, 'epoch': 1.6}
{'loss': 0.0591, 'grad_norm': 0.3569755554199219, 'learning_rate': 3.133389261744967e-05, 'epoch': 1.68}
{'loss': 0.0641, 'gra

  0%|          | 0/63 [00:00<?, ?it/s]

Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'eval_loss': 0.060410358011722565, 'eval_runtime': 99.8441, 'eval_samples_per_second': 5.018, 'eval_steps_per_second': 0.631, 'epoch': 2.0}
{'loss': 0.0485, 'grad_norm': 0.4170510470867157, 'learning_rate': 2.2944630872483224e-05, 'epoch': 2.04}
{'loss': 0.0439, 'grad_norm': 0.6888560652732849, 'learning_rate': 2.0847315436241613e-05, 'epoch': 2.13}
{'loss': 0.0447, 'grad_norm': 0.4819971024990082, 'learning_rate': 1.8750000000000002e-05, 'epoch': 2.22}
{'loss': 0.045, 'grad_norm': 0.48874902725219727, 'learning_rate': 1.6652684563758387e-05, 'epoch': 2.3}
{'loss': 0.0454, 'grad_norm': 0.28788232803344727, 'learning_rate': 1.455536912751678e-05, 'epoch': 2.39}
{'loss': 0.0444, 'grad_norm': 0.2739470899105072, 'learning_rate': 1.2458053691275167e-05, 'epoch': 2.48}
{'loss': 0.0443, 'grad_norm': 0.3478606939315796, 'learning_rate': 1.0360738255033558e-05, 'epoch': 2.57}
{'loss': 0.0568, 'grad_norm': 0.2987494468688965, 'learning_rate': 8.263422818791947e-06, 'epoch': 2.66}
{'loss': 0.04

  0%|          | 0/63 [00:00<?, ?it/s]

Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'eval_loss': 0.06017742678523064, 'eval_runtime': 109.1397, 'eval_samples_per_second': 4.59, 'eval_steps_per_second': 0.577, 'epoch': 3.0}


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


{'train_runtime': 19500.1913, 'train_samples_per_second': 0.694, 'train_steps_per_second': 0.087, 'train_loss': 1.1543080033844526, 'epoch': 3.0}


('./final_bart_model\\tokenizer_config.json',
 './final_bart_model\\special_tokens_map.json',
 './final_bart_model\\vocab.json',
 './final_bart_model\\merges.txt',
 './final_bart_model\\added_tokens.json')

In [7]:
from transformers import BartTokenizer, BartForConditionalGeneration
import torch

# Function to load the model and tokenizer
def load_model(model_path):
    tokenizer = BartTokenizer.from_pretrained(model_path)
    model = BartForConditionalGeneration.from_pretrained(model_path)
    return model, tokenizer

# Function to generate a slogan
def generate_slogan(model, tokenizer, company_name, description, device='cuda'):
    # Combine company name and description into a single input text
    input_text = f"{company_name} described as: {description}"
    
    # Encode the input text
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    
    # Generate output ids, correctly using the temperature parameter
    output_ids = model.generate(
        input_ids,
        max_length=80,
        num_beams=5,
        early_stopping=True,
        do_sample=True,
        temperature=0.7,  # Ensures some creativity
        top_k=50,
        top_p=0.95
    )
    
    # Decode the output ids to a string
    slogan = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return slogan

# Example usage
if __name__ == "__main__":
    # Path to your trained model
    model_path = './final_bart_model'
    
    # Load model and tokenizer
    model, tokenizer = load_model(model_path)
    
    # Ensure the model is on the appropriate device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    
    # Input your own company name and description
    company_name = "SkyCompute Technologies"
    description = "SkyCompute Technologies pioneers advanced cloud solutions to streamline business operations and enhance data management. Our flagship product, the 'SkyNode', integrates AI capabilities seamlessly into your infrastructure, supported by our commitment to security, performance, and eco-friendly practices. We empower organizations to innovate and scale efficiently, ensuring a robust digital transformation journey."
    
    # Generate slogan
    slogan = generate_slogan(model, tokenizer, company_name, description, device)
    print("Generated Slogan:", slogan)


Generated Slogan: SkyCompute Technologies | AI enabled cloud solutions for your business
