In [None]:
import pandas as pd
import re 
from sklearn.model_selection import train_test_split
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from nltk.translate.bleu_score import sentence_bleu
from datasets import load_dataset
import torch



dataset = load_dataset('Kaludi/Customer-Support-Responses')


tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token 
model = GPT2LMHeadModel.from_pretrained('gpt2')

def tokenize_function(examples):
    queries = examples['query']
    responses = examples['response']
    model_inputs = tokenizer(queries, padding='max_length', truncation=True, max_length=512)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(responses, padding='max_length', truncation=True, max_length=512)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(tokenize_function,batched=True)

train_encoding = tokenized_dataset['train']
val_encoding = tokenized_dataset['train']


training_args = TrainingArguments(
                output_dir = './results',
                num_train_epochs = 1,
                per_device_train_batch_size = 8,
                per_device_eval_batch_size = 8,
                warmup_steps = 500,
                weight_decay=0.01,
                logging_dir = './logs'
)

trainer = Trainer(
          model=model,
          args=training_args,
          train_dataset=train_encoding,
          eval_dataset = val_encoding
)

trainer.train()


def evaluate_bleu(reference, generated):
    reference = reference.split()
    generated = generated.split()
    score = sentence_bleu([reference], generated)
    return score

# Example evaluation
def generate_response(query):
    inputs = tokenizer(query, return_tensors='pt', padding=True, truncation=True, max_length=512)
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

query = val_encoding['query'][0]
reference = val_encoding['response'][0]
generated = generate_response(query)

bleu_score = evaluate_bleu(reference, generated)
print(f"BLEU score: {bleu_score}")

In [None]:
import ipywidgets as widgets
from IPython.display import display

def generate_response(query):
    inputs = tokenizer(query,return_tensors='pt',padding=True,truncation=True, max_length =512)
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0],skip_special_tokens=True)
    return response

query_input = widgets.Text(
    value = '',
    placeholder = 'Type your query here...',
    description = 'Query:',
    disabled = False
)

response_output = widgets.Textarea(
    value = '',
    placeholder = 'Response will appear here',
    description = 'Response:',
    disabled = True
)

def on_button_click(b):
    query = query_input.value
    response = generate_response(query)
    response_output.value = response

button = widgets.Button(description = 'Generate Response')
button.on_click(on_button_click)

display(query_input,button,response_output)
