In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from fuzzywuzzy import fuzz
import torch
import json
import random

# Load the fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained("./fine-tuned-gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("./fine-tuned-gpt2")

# Add padding token if not already present
if tokenizer.pad_token_id is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id

# Load intents from JSON file
with open('intents.json', 'r') as file:
    intents = json.load(file)

# Function to match user input with patterns and return an appropriate response using fuzzy matching
def get_intent_response(message):
    best_match = None
    best_score = 0
    threshold = 70  # Define a threshold for fuzzy matching

    for intent in intents['intents']:
        for pattern in intent['patterns']:
            # Use fuzzy matching to find the best match
            score = fuzz.ratio(pattern.lower(), message.lower())
            if score > best_score and score > threshold:
                best_score = score
                best_match = intent

    # Return a random response from the best matching intent, if found
    if best_match:
        return random.choice(best_match['responses'])
    return None

# Function to generate a response using the fine-tuned model with enhanced logic
def generate_response(input_text):
    # Adjust input for more context-aware responses
    input_text = f"Question: {input_text} Answer:"  # Adding explicit guidance for the model

    # Generate response with adjusted settings to reduce repetition
    inputs = tokenizer(input_text, return_tensors='pt', truncation=True, padding='max_length', max_length=150)
    outputs = model.generate(
        inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,  # More focused response generation
        top_p=0.9,        # Narrower range for better relevance
        top_k=50,         # Lower k for better control
        repetition_penalty=1.15,  # To avoid repetition in responses
        pad_token_id=pad_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    # Post-process to filter out irrelevant or off-topic responses
    response = response.replace("User:", "").replace("Bot:", "").strip()
    if len(response.split()) < 5 or "Question:" in response or "Answer:" in response:
        response = "I'm not sure how to answer that. Could you please clarify your question?"

    return response


print("GO! Bot is running...")

while True:
    message = input("You: ")
    # First try to get a response from the intents JSON
    intent_response = get_intent_response(message)
    if intent_response:
        res = intent_response
    else:
        # Fallback to the model-generated response if no intent matches
        res = generate_response(message)
    
    print(f"Bot: {res}")
