In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Working GPT2 model

## Classifier

### Installs

In [None]:
!pip install transformers Levenshtein symspellpy
!pip install sentence-transformers
!pip install rapidfuzz

In [None]:
import json
import numpy as np
import re
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from symspellpy import SymSpell, Verbosity
import pkg_resources
import nltk
from nltk.corpus import words as nltk_words
from Levenshtein import distance as levenshtein_distance
from sklearn.feature_extraction.text import TfidfVectorizer
# Download NLTK words
nltk.download('words')

### Initializing the Classifier & Loading Intents

In [None]:
# Initialize SymSpell
sym_spell = SymSpell(max_dictionary_edit_distance=3, prefix_length=7)
dictionary_path = pkg_resources.resource_filename("symspellpy", "frequency_dictionary_en_82_765.txt")
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

# Load intents.json
with open('/content/drive/MyDrive/shared_folder/data/ft_intents.json', 'r') as f:
    data = json.load(f)

# Extract patterns and tags
patterns = []
tags = []
for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(pattern)
        tags.append(intent['tag'])

print(f"Loaded {len(patterns)} patterns across {len(set(tags))} unique tags.")

### Generating a Custom Dictionary

In [None]:
# Clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Generate dynamic dictionary with automatic typo generation
def generate_custom_dictionary():
    # Extract key terms from patterns
    key_terms = set()
    for pattern in patterns:
        words = re.findall(r'\b\w+\b', clean_text(pattern))
        key_terms.update([w for w in words if len(w) > 2])

    # Add NLTK English words
    english_vocab = set(w.lower() for w in nltk_words.words())
    key_terms.update(english_vocab)

    # TF-IDF to identify frequent terms
    temp_vectorizer = TfidfVectorizer(stop_words='english')
    temp_matrix = temp_vectorizer.fit_transform(patterns)
    feature_names = temp_vectorizer.get_feature_names_out()
    term_scores = temp_matrix.sum(axis=0).A1
    frequent_terms = {feature_names[i] for i in np.argsort(term_scores)[-50:]}  # Top 50 terms

    # Generate typos dynamically
    def generate_typos(word):
        typos = set()
        # Vowel swaps
        vowels = 'aeiou'
        for i, char in enumerate(word):
            if char in vowels:
                for new_vowel in vowels:
                    typo = word[:i] + new_vowel + word[i+1:]
                    if typo != word:
                        typos.add(typo)
        # Letter drops
        for i in range(len(word)):
            typo = word[:i] + word[i+1:]
            if typo:
                typos.add(typo)
        # Transpositions
        for i in range(len(word)-1):
            typo = word[:i] + word[i+1] + word[i] + word[i+2:]
            typos.add(typo)
        # Common substitutions (e.g., r→e for prescription)
        substitutions = {'r': 'e', 'e': 'r', 'l': 'i', 'i': 'l'}
        for i, char in enumerate(word):
            if char in substitutions:
                typo = word[:i] + substitutions[char] + word[i+1:]
                typos.add(typo)
        return typos

    custom_dict = []
    for term in key_terms:
        freq = 1000 if term in frequent_terms else 500
        custom_dict.append((term, freq))
        # Generate typos for frequent terms
        if term in frequent_terms:
            typos = generate_typos(term)
            for typo in typos:
                if len(typo) > 2 and levenshtein_distance(typo, term) <= 3:
                    custom_dict.append((typo, 100, term))

    # Load into SymSpell
    for term, freq, *correct in custom_dict:
        sym_spell.create_dictionary_entry(term, freq)
        if correct:
            sym_spell.create_dictionary_entry(correct[0], freq + 100)

generate_custom_dictionary()
print("Generated dynamic dictionary with automatic typos for SymSpell.")

### Text Cleaning and Spell Correction

In [None]:
# Initialize SBERT
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')

# TF-IDF for keyword boosting and contextual scoring
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
tfidf_matrix = vectorizer.fit_transform(patterns)
tfidf_features = vectorizer.get_feature_names_out()

# Context-aware spelling correction
def correct_spelling(text):
    if len(text.split()) <= 2 and text.lower() in ['hello', 'hi', 'goodbye', 'thanks', 'bye']:
        return text
    # Get SymSpell suggestions
    suggestions = sym_spell.lookup_compound(text, max_edit_distance=3)
    if not suggestions:
        return text
    candidates = [s.term for s in suggestions[:3]]  # Top 3 candidates
    if len(candidates) == 1:
        return candidates[0]
    # Score candidates by TF-IDF similarity to patterns
    candidate_tfidf = vectorizer.transform(candidates).toarray()
    similarities = cosine_similarity(candidate_tfidf, tfidf_matrix).max(axis=1)
    best_idx = np.argmax(similarities)
    return candidates[best_idx]

# Relaxed gibberish detection
def is_gibberish(text):
    cleaned = re.sub(r'[^a-zA-Z\s]', '', text).lower()
    tokens = cleaned.split()
    if not tokens:
        return True
    english_vocab = set(w.lower() for w in nltk_words.words())
    for token in tokens:
        if token in english_vocab or any(levenshtein_distance(token, clean_text(p)) <= 3 for p in patterns):
            return False
    return True

# Get SBERT embeddings
def get_sbert_embeddings(texts, batch_size=32):
    embeddings = []
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        batch_embeddings = sbert_model.encode(batch_texts, convert_to_numpy=True, show_progress_bar=False)
        embeddings.append(batch_embeddings)
    return np.vstack(embeddings)

### Precompute Pattern Embeddings

In [None]:
# Precompute pattern embeddings
cleaned_patterns = [clean_text(p) for p in patterns]
pattern_embeddings = get_sbert_embeddings(cleaned_patterns)
print(f"Computed SBERT embeddings for {len(patterns)} patterns.")

### Predict Best Matching Pattern

In [None]:
# Keyword-boosted matching
def find_best_pattern(user_input, bert_threshold=0.65, levenshtein_weight=0.2, tfidf_weight=0.3):
    if is_gibberish(user_input):
        return user_input, None, None, 0.0, "Gibberish detected"

    corrected_input = correct_spelling(user_input)
    if is_gibberish(corrected_input):
        return corrected_input, None, None, 0.0, "Corrected input is gibberish"

    cleaned_input = clean_text(corrected_input)
    input_embedding = get_sbert_embeddings([cleaned_input])

    # SBERT similarity
    sbert_similarities = cosine_similarity(input_embedding, pattern_embeddings)[0]

    # Levenshtein similarity
    levenshtein_scores = [1 - (levenshtein_distance(cleaned_input, clean_text(p)) / max(len(cleaned_input), len(clean_text(p)))) for p in patterns]

    # TF-IDF similarity
    input_tfidf = vectorizer.transform([corrected_input]).toarray()
    tfidf_similarities = cosine_similarity(input_tfidf, tfidf_matrix)[0]

    # Combine scores
    combined_scores = [(sbert * (1 - levenshtein_weight - tfidf_weight) + lev * levenshtein_weight + tfidf * tfidf_weight)
                      for sbert, lev, tfidf in zip(sbert_similarities, levenshtein_scores, tfidf_similarities)]
    best_idx = np.argmax(combined_scores)
    best_score = combined_scores[best_idx]

    if best_score < bert_threshold:
        return corrected_input, None, None, best_score, "Below similarity threshold"
    return corrected_input, patterns[best_idx], tags[best_idx], best_score, "Match found"

### Testing

In [None]:
# Run tests
correct_matches = 0
print("\nPredictions for Standard Sentences:")
for sentence, expected_intent in test_sentences:
    corrected_input, matched_pattern, matched_tag, similarity, status = find_best_pattern(sentence)
    is_correct = matched_tag == expected_intent if matched_tag else False
    if is_correct:
        correct_matches += 1
    print(f"User Input: {sentence}")
    print(f"Corrected Input: {corrected_input}")
    if matched_pattern:
        print(f"Matched Pattern: {matched_pattern}")
        print(f"Predicted Tag: {matched_tag} (Expected: {expected_intent}, {'Correct' if is_correct else 'Incorrect'})")
    else:
        print(f"⚠️ No match: {status} (Expected: {expected_intent})")
    print(f"Similarity Score: {similarity:.4f}")
    print("-" * 50)

print("\nPredictions for Typo-Heavy Sentences:")
for sentence, expected_intent in typo_sentences:
    corrected_input, matched_pattern, matched_tag, similarity, status = find_best_pattern(sentence)
    is_correct = matched_tag == expected_intent if matched_tag else False
    if is_correct:
        correct_matches += 1
    print(f"User Input: {sentence}")
    print(f"Corrected Input: {corrected_input}")
    if matched_pattern:
        print(f"Matched Pattern: {matched_pattern}")
        print(f"Predicted Tag: {matched_tag} (Expected: {expected_intent}, {'Correct' if is_correct else 'Incorrect'})")
    else:
        print(f"⚠️ No match: {status} (Expected: {expected_intent})")
    print(f"Similarity Score: {similarity:.4f}")
    print("-" * 50)

## Preprocessing

### Defining the List of Possible Emojis

In [None]:
# Mapping emojis to placeholders
emoji_map = {
    "😊": "[smile]",
    "😄": "[big_smile]",
    "🌟": "[star]",
    "📋": "[clipboard]",
    "💸": "[money]",
    "📅": "[calendar]",
    "💊": "[pill]",
    "📍": "[location_pin]",
    "🗺": "[map]",
    "📆": "[calendar_alt]",
    "🚑": "[ambulance]",
    "📞": "[phone]",
    "❤":  "[heart]",
    "🎁": "[gift]",
    "📶": "[signal]",
    "🍽": "[meal]",
    "🚗": "[car]",
    "🦽": "[wheelchair]",
    "🤠": "[cowboy]",
    "🚀": "[rocket]",
    "🎉": "[celebration]",
    "🕒": "[clock]",
    "🎒": "[backpack]",
    "💖": "[sparkling_heart]",
    "🏥": "[hospital]",
}

### Making all of the training data

In [None]:
FINE_TUNING = False

!pip install datasets

# Imports
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from datasets import Dataset
import json
import random
random.seed(75)

# Function to replace emojis with placeholders
def replace_emojis(text):
    # Replacing all emojis
    for emoji, replacement in emoji_map.items():
        text = text.replace(emoji, replacement)
    return text

# Loading the data file
def load_intents(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# If we want to use the fine tuning data or generalized data
if FINE_TUNING:
    intents_data = load_intents('/content/drive/MyDrive/shared_folder/data/ft_intents.json')
else:
    intents_data = load_intents('/content/drive/MyDrive/shared_folder/data/intents_expanded.json')

# Gets all messages from a tag from the data file
def get_messages_by_tags(tags):
    matched_intents = []
    for tag in tags:
        for intent in intents_data.get('intents', []):
            if intent['tag'] == tag:
                matched_intents.append(intent)
    return matched_intents

# Creates all possible dialogue pairs in LangChain messages, including multi-turn context chains
def build_conversation(intent, all_intents, max_depth=2, current_depth=0):
    """
    Recursively builds conversations based on context chains.
    Limits depth to avoid infinite loops or overly long dialogues.
    """
    conversations = []

    # Has a max depth for conversations so the model has "memory"
    if current_depth > max_depth:
        return conversations

    # Getting the data from the JSON
    patterns = intent['patterns']
    responses = intent['responses']
    context_tags = intent.get('context', [])

    # If theres no context, its only one user and ai message
    if not context_tags or context_tags == [""]:
        for pattern in patterns:
            for response in responses:
                conversations.append([
                    HumanMessage(content=pattern),
                    AIMessage(content=response)
                ])
    else:
        # If theres context, then build the message chain with the appropriate context
        for tag in context_tags:
            context_intents = get_messages_by_tags([tag])
            for ctx_intent in context_intents:
                previous_convos = build_conversation(ctx_intent, all_intents, max_depth, current_depth + 1)
                for convo in previous_convos:
                    for pattern in patterns:
                        for response in responses:
                            new_convo = convo + [
                                HumanMessage(content=pattern),
                                AIMessage(content=response)
                            ]
                            conversations.append(new_convo)

    return conversations

# Generate all dialogue pairs
def create_dialogue_pairs(intents):
    training_data = []
    for intent in intents.get('intents', []):
        # All of the possible data is too large, we only want to take 10% of the data ~3 million conversations
        if random.random() < 0.1:
            convos = build_conversation(intent, intents)
            training_data.extend(convos)
    return training_data

# Converting LangChain messages to a long string (what GPT2 expects)
def flatten_message_chain(messages):
    result = []
    for msg in messages:
        if isinstance(msg, SystemMessage):
            result.append(f"System: {replace_emojis(msg.content)}")  # <<< apply emoji replacement
        elif isinstance(msg, HumanMessage):
            result.append(f"User: {replace_emojis(msg.content)}")    # <<< apply emoji replacement
        elif isinstance(msg, AIMessage):
            result.append(f"System: {replace_emojis(msg.content)}")  # <<< apply emoji replacement
    return "\n".join(result)

# Creating the dataset from the strings
dialogues = [flatten_message_chain(convo) for convo in create_dialogue_pairs(intents_data)]
data = {"text": dialogues}
dataset = Dataset.from_dict(data).shuffle(seed=75)  # Shuffling data so that we dont train only on beginning

# Verify the dataset is correct
print(f"There are {len(dataset['text'])} generated conversations")
print(f"Here are the first 4 sample conversations: {dataset['text'][0:4]}")

### Tokenizing the dataset

In [None]:
from transformers import GPT2Tokenizer

# Load the pretrained GPT2 Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# We can use the eos token as the pad token since GPT2 doesnt have a pad token
tokenizer.pad_token = tokenizer.eos_token

# We need to tokenize the data because we cant put raw text into the model
def tokenize_function(examples):
    outputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)
    # Use input_ids as labels for language modeling.
    outputs["labels"] = outputs["input_ids"].copy()
    return outputs

# Saves the tokenized dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)
if FINE_TUNING:
  tokenized_dataset.to_parquet("/content/drive/MyDrive/shared_folder/data/tokenized_ft_dataset.parquet")
else:
  tokenized_dataset.to_parquet("/content/drive/MyDrive/shared_folder/data/tokenized_intents_expanded_dataset.parquet")

## Training the Model

### Training the General Model

#### Loading the Generalized Tokenized Data

In [None]:
# Saves time so we dont have to rebuild all training data and tokenize it
from datasets import Dataset
tokenized_dataset = Dataset.from_parquet("/content/drive/MyDrive/shared_folder/data/tokenized_intents_expanded_dataset.parquet")

#### Training the Model with Generalized Data

In [None]:
RESUME_FROM_CHECKPOINT = False

# Importing all needed libraries
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments

# Load the pretrained GPT2 Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# We can use the eos token as the pad token since GPT2 doesnt have a pad token
tokenizer.pad_token = tokenizer.eos_token

# This uses the GPT2 architecture but allows us to adjust the hyperparameters to what we want
config = GPT2Config(
    vocab_size=tokenizer.vocab_size, # Number of unique tokens in the tokenizer
    n_positions=512, # Max sequence length the model can handle
    n_ctx=512, # Max context window of the model
    n_embd=768, # Dimensionality of embeddings and hidden layers
    n_layer=12, # Number of transformer blocks
    n_head=12, # Number of attention heads per layer
    pad_token_id=tokenizer.eos_token_id, # Token to pad the sequences
)

checkpoint_dir = "/content/drive/MyDrive/shared_folder/outputs/expanded_intents_checkpoints"

# Setting the right paths if resuming or not
if RESUME_FROM_CHECKPOINT:
  model = GPT2LMHeadModel.from_pretrained(checkpoint_dir, config=config)
else:
  # This is creating a GPT2 model with randomly initialized weights so that we are training from scratch
  model = GPT2LMHeadModel(config)

# These are the hyperparameters that are used for training
training_args = TrainingArguments(
  num_train_epochs=1, # Not enough time to train more than one epoch
  per_device_train_batch_size=32, # How many sentences will be loaded into the model in parallel during training
  learning_rate=5e-4, # Small LR with large samples should be good
  warmup_steps=50, # The learning rate will scale up to the full amount over this amount of steps
  save_steps=1000, # Checkpoint the model every 1000 steps
  output_dir = checkpoint_dir, # Where to save the checkpoints
  report_to=[] # This is needed to disable WandB
)

# Initializing a trainer with the model and training args
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Setting the training checkpoint if were resuming from checkpoint
if RESUME_FROM_CHECKPOINT:
  trainer.train(resume_from_checkpoint=RESUME_FROM_CHECKPOINT)
else:
  trainer.train()

### Loading Model and Fine Tuning

#### Loading the Tokenized Dataset for Fine Tuning

In [None]:
from datasets import Dataset

tokenized_dataset = Dataset.from_parquet("/content/drive/MyDrive/shared_folder/data/tokenized_ft_dataset.parquet")

#### Initializing the Generalized Model for Fine Tuning

In [None]:
RESUME_FROM_CHECKPOINT = True
pth_checkpoint_path = "/content/drive/MyDrive/shared_folder/outputs/expanded_intents_checkpoints/model path file/gpt2_checkpoint_weights.pth"

# Importing all needed libraries
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
import torch

# Load the pretrained GPT2 Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # GPT2 doesn't have a pad token

# GPT2 configuration
config = GPT2Config(
    vocab_size=tokenizer.vocab_size,
    n_positions=512,
    n_ctx=512,
    n_embd=768,
    n_layer=12,
    n_head=12,
    pad_token_id=tokenizer.eos_token_id,
)

# Initialize model
model = GPT2LMHeadModel(config)

if RESUME_FROM_CHECKPOINT:
    # Load the model weights from a .pth file
    state_dict = torch.load(pth_checkpoint_path, map_location="cpu")
    model.load_state_dict(state_dict)

# Directory to store new checkpoints
checkpoint_dir = '/content/drive/MyDrive/shared_folder/outputs/ft_outputs'

# Training arguments
training_args = TrainingArguments(
    num_train_epochs=1,
    per_device_train_batch_size=32,
    learning_rate=5e-3 * (32/128), # Tweaking the learning rate by the ratio of new batch size / old batch size
    warmup_steps=50,
    save_steps=1000,
    output_dir=checkpoint_dir,
    report_to=[]
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

trainer.train()

## Inference on Model

In [None]:
import torch
import json
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config

enable_classifier = False
PTH_FILE = '/content/gpt2_checkpoint.pth'

# Mapping emojis to placeholders
emoji_map = {
    "😊": "[smile]",
    "😄": "[big_smile]",
    "🌟": "[star]",
    "📋": "[clipboard]",
    "💸": "[money]",
    "📅": "[calendar]",
    "💊": "[pill]",
    "📍": "[location_pin]",
    "🗺": "[map]",
    "📆": "[calendar_alt]",
    "🚑": "[ambulance]",
    "📞": "[phone]",
    "❤":  "[heart]",
    "🎁": "[gift]",
    "📶": "[signal]",
    "🍽": "[meal]",
    "🚗": "[car]",
    "🦽": "[wheelchair]",
    "🤠": "[cowboy]",
    "🚀": "[rocket]",
    "🎉": "[celebration]",
    "🕒": "[clock]",
    "🎒": "[backpack]",
    "💖": "[sparkling_heart]",
    "🏥": "[hospital]",
}

# Invert map for placeholder to emoji
placeholder_to_emoji = {value:key for key,value in emoji_map.items()}

# Function to take in a path and load the model and tokenizer
def load_model(path_file):
    # Load the pretrained GPT2 Tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    tokenizer.pad_token = tokenizer.eos_token  # GPT2 doesn't have a pad token

    # GPT2 configuration
    config = GPT2Config(
        vocab_size=tokenizer.vocab_size,
        n_positions=512,
        n_ctx=512,
        n_embd=768,
        n_layer=12,
        n_head=12,
        pad_token_id=tokenizer.eos_token_id,
    )

    # Initialize model
    model = GPT2LMHeadModel(config)

    # Load the model weights from a .pth file
    state_dict = torch.load(path_file, map_location="cpu")
    model.load_state_dict(state_dict)

    return tokenizer, model

def stream_generate_response(input_text, tokenizer,  model, max_length=512, top_k=10, temperature=0.5):
    # Formatting the input sequence for the GPT2 tokenizer
    prompt = f"User: {input_text}\nSystem:"
    # Encoding the data
    generated = tokenizer.encode(prompt, return_tensors="pt")
    # This is the cache for the past responses
    past = None

    # This will hold tokens so that we can see them and replace placeholders
    buffer = ""
    print("CareCompanion:", end="", flush=True)

    def flush_safe(flush_text):
        # Go through each placeholder and replace it with the right emoji
        for placeholder, emoji in placeholder_to_emoji.items():
            flush_text = flush_text.replace(placeholder, emoji)
        print(flush_text, end="", flush=True)

    # Turning off training
    with torch.no_grad():
        # Goes until eos token or max length reached
        for _ in range(max_length):
            # If it is the first step, feed in the whole prompt
            if past is None:
                out = model(generated, use_cache=True)
            else:
                # Otherwise, feed in only the last token
                out = model(generated[:, -1:], past_key_values=past, use_cache=True)

            # Update the cached values
            past = out.past_key_values

            # I only want the predictions for the very next token, not all of them
            logits = out.logits[:, -1, :]
            # Taking temp into account
            logits = logits / temperature

            # This is only keeping the top k tokens
            if top_k > 0:
                # Built-in torch func to get the top k tokens and their indicies
                topk_vals, topk_idx = torch.topk(logits, top_k, dim=-1)
                # Creating new tensor full of -inf
                mask = torch.full_like(logits, float("-inf"))
                # Only update the top k token scores and keep everything else at -inf
                mask.scatter_(1, topk_idx, topk_vals)
                # Replace with the newly updated logits
                logits = mask

            # Converting the logits to a probability dist where all probs sum to 1
            probs = torch.softmax(logits, dim=-1)
            # Sample just one token
            next_token = torch.multinomial(probs, num_samples=1)

            # Stop on EOS token
            if next_token.item() == tokenizer.eos_token_id:
                break

            # Decode the generated token
            token_text = tokenizer.decode(next_token[0])
            # Add the new token to the generated string
            generated = torch.cat((generated, next_token), dim=1)

            # Add decoded text to the buffer
            buffer += token_text

            # If newline is detected, it means the chatbot turn is over and to stop
            if "\n" in buffer:
                # Find index of newline
                idx = buffer.index("\n")
                # Flush up to newline
                flush_safe(buffer[:idx])
                # Reset buffer
                buffer = ""
                # Break out of generation
                break

            # Finding both a matching '[' and ']'
            last_open = buffer.rfind('[')
            last_close = buffer.rfind(']')

            # Only flushes what needs to be flushed
            if last_open > last_close:
                flush_idx = last_open
            else:
                flush_idx = len(buffer)

            # Checks if there is some text that needs to be flushed
            if flush_idx > 0:
                # Only flush what we know is safe
                flush_safe(buffer[:flush_idx])
                # Keep everything else in buffer
                buffer = buffer[flush_idx:]

    # Flushing everything left in buffer at the end
    if buffer:
        flush_safe(buffer)
    # Makes it prettier
    print()

if __name__ == "__main__":
    # Loading tokenizer and model from a .pth file
    tokenizer, model = load_model(PTH_FILE)
    # Putting model in evaluation mode
    model.eval()
    print('Now speaking with CareCompanion, type \'Bye\' to quit')

    # Until user quits
    while True:
        user_input = input("User: ")

        # If we want the classifier
        if enable_classifier:
          corrected_input, matched_pattern, matched_tag, similarity, status = find_best_pattern(user_input)
          user_input = corrected_input
          if similarity < 0.3:
            print("CareCompanion: Oops, I didn't quite catch that, champ! 😊 Can you rephrase? 🌟")
            continue

        # Stream the response to the user
        stream_generate_response(user_input, tokenizer, model, temperature=0.1)

        # If the user wants to leave, exit the chatbot
        if (user_input.lower() == 'bye'):
          break
