In [1]:
!pip install bitsandbytes datasets accelerate>0.26.0 loralib
# !pip install git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git
!pip install transformers==4.38.2 peft==0.9.0
# !pip install datasets transformers
# !pip install transformers datasets evaluate



In [2]:
from huggingface_hub import login

import os
hugging_face_token = os.environ.get("HF_TOKEN")
# !python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('7dfdc3694ead1a038bfa1cbe7bbe7946c722635d')"
login(token=hugging_face_token)

  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/faisal/.cache/huggingface/token
Login successful


In [3]:
# Used for multi-gpu
local_rank = -1
per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradient_accumulation_steps = 1
learning_rate = 2e-4
max_grad_norm = 0.3
weight_decay = 0.001
lora_alpha = 16
lora_dropout = 0.1
lora_r = 64
max_seq_length = None

model_name = "meta-llama/Llama-2-7b-chat-hf"

# Fine-tuned model name
new_model = "llama-2-7b-33M-dental-100-qa"

# The instruction dataset to use
dataset_name = "archive_1"

# Activate 4-bit precision base model loading
use_4bit = True

# Activate nested quantization for 4-bit base models
use_nested_quant = False

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Number of training epochs
num_train_epochs = 2

# Enable fp16 training, (bf16 to True with an A100)
fp16 = False

# Enable bf16 training
bf16 = False

# Use packing dataset creating
packing = False

# Enable gradient checkpointing
gradient_checkpointing = True

# Optimizer to use, original is paged_adamw_32bit
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine, and has advantage for analysis)
lr_scheduler_type = "cosine"

# Number of optimizer update steps, 10K original, 20 for demo purposes
max_steps = -1

# Fraction of steps to do a warmup for
warmup_ratio = 0.03

# Group sequences into batches with same length (saves memory and speeds up training considerably)
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 10

# Log every X updates steps
logging_steps = 1

# The output directory where the model predictions and checkpoints will be written
output_dir = "./results"

# Load the entire model on the GPU 0
device_map = {"": 0}

# Visualize training
report_to = "tensorboard"

# Tensorboard logs
tb_log_dir = "./results/logs"

In [4]:
#Setup the model
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)


# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"


# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)


bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

Loading checkpoint shards: 100%|██████████| 2/2 [00:15<00:00,  7.87s/it]


In [5]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float16) #reduced to 16bit

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float16)
model.lm_head = CastOutputToFloat(model.lm_head)

In [6]:
import torch.nn as nn
import torch

def freeze_model_parameters(model):
    """Freezes all the parameters of the model."""
    for param in model.parameters():
        param.requires_grad = False

def cast_small_parameters_to_fp16(model):
    """Casts small parameters (like those in layer normalization) to fp16."""
    for param in model.parameters():
        if param.ndim == 1:
            param.data = param.data.to(torch.float16)

class CastOutputToFloat(nn.Module):
    """A module wrapper that casts the output of the module to float32."""
    def __init__(self, module):
        super(CastOutputToFloat, self).__init__()
        self.module = module
    
    def forward(self, x):
        return self.module(x).to(torch.float32)

def improve_model(model):
    # Freeze all model parameters
    freeze_model_parameters(model)
    
    # Cast small parameters to 16-bit
    cast_small_parameters_to_fp16(model)

    # Enable gradient checkpointing
    if hasattr(model, 'gradient_checkpointing_enable'):
        model.gradient_checkpointing_enable()
    
    # Enable input gradients (if applicable)
    if hasattr(model, 'enable_input_require_grads'):
        model.enable_input_require_grads()

    # Ensure the output of lm_head is in float32
    model.lm_head = CastOutputToFloat(model.lm_head)

# Assuming your model is loaded as 'model'
improve_model(model)


In [7]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [8]:
from peft import LoraConfig, get_peft_model 

config = LoraConfig(
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        r=lora_r,
        bias="none",
        task_type="CAUSAL_LM",
    )

our_model = get_peft_model(model, config)
print_trainable_parameters(our_model)

trainable params: 33554432 || all params: 3533967360 || trainable%: 0.9494833591219133


In [9]:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [12]:
from datasets import load_dataset, Dataset

def format_dolly(sample):
    instruction = f"<s>[INST] {sample['question']}[/INST]"
    response = f" {sample['answer']}"
    prompt = "".join([instruction, response])
    return prompt

def template_dataset(sample):
    sample["text"] = f"{format_dolly(sample)}{tokenizer.eos_token}"
    return sample

# Load dataset
dataset = load_dataset(dataset_name, split="train")

# Shuffle the dataset
dataset_shuffled = dataset.shuffle(seed=42)

# Select a subset
dataset = dataset_shuffled.select(range(100))

# Apply formatting
dataset = dataset.map(template_dataset, remove_columns=list(dataset.features))

# Use Hugging Face's train_test_split
train_test_split = dataset.train_test_split(test_size=0.2, seed=42)

train_data = train_test_split['train']
test_data = train_test_split['test']

print(train_data)
print(test_data)

Map: 100%|██████████| 100/100 [00:00<00:00, 4948.74 examples/s]

Dataset({
    features: ['text'],
    num_rows: 80
})
Dataset({
    features: ['text'],
    num_rows: 20
})





In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
our_model.to(device)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=Fa

In [16]:

# logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "What potential drug interactions are associated with abacavir in dental practice?"
# Ignore warnings"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"<s>[INST]{prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST]What potential drug interactions are associated with abacavir in dental practice? [/INST]  Abacavir, a medication used to treat HIV, can interact with other drugs in various ways in dental practice. февруари 27, 2023

As a dentist, it is essential to be aware of potential drug interactions associated with abacavir, as these interactions can impact the safety and efficacy of the medication. Here are some potential drug interactions to consider:

1. Nucleoside reverse transcriptase inhibitors (NRTIs): Abacavir is a NRTI, and when taken with other NRTIs, such as zidovudine or lamivudine, the risk of drug interactions increases. These interactions can lead to changes in blood levels of the drugs, which may affect their efficacy or toxicity.
2. Other antiretrovirals: Interactions between abacavir and other antiretrovirals, such as protease inhibitors (PIs) or integrase strand transfer inhibitors (InSTIs), can occur. These interactions may affect the pharmacokinetics or pharmacodyna

In [17]:
!pip install trl==0.7.11

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [21]:
from transformers import Trainer
from transformers import DataCollatorForLanguageModeling
from trl import SFTTrainer


data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

trainer = SFTTrainer(
    model=our_model.to("cuda"),
    train_dataset=train_data,
    peft_config=config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
    eval_dataset=test_data,
)

trainer.train()
trainer.model.save_pretrained(output_dir)

Map: 100%|██████████| 80/80 [00:00<00:00, 5270.47 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 3805.57 examples/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
1,2.9352
2,3.5615
3,2.906
4,3.6322
5,3.5731
6,2.5702
7,2.8472
8,2.5998
9,2.9068
10,2.8968


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


In [22]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 guardrail-ml==0.0.12 tensorboard
# !apt-get -qq install poppler-utils tesseract-ocr
!pip install -q unstructured["local-inference"]==0.7.4 pillow

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [23]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel, get_peft_model
from trl import SFTTrainer
from guardrail.client import (
    run_metrics,
    run_simple_metrics,
    create_dataset)

[nltk_data] Downloading package punkt to /home/faisal/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/faisal/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [24]:
def text_gen_eval_wrapper(model, tokenizer, prompt, model_id=1, show_metrics=True, temp=0.7, max_length=200):
    """
    A wrapper function for inferencing, evaluating, and logging text generation pipeline.

    Parameters:
        model (str or object): The model name or the initialized text generation model.
        tokenizer (str or object): The tokenizer name or the initialized tokenizer for the model.
        prompt (str): The input prompt text for text generation.
        model_id (int, optional): An identifier for the model. Defaults to 1.
        show_metrics (bool, optional): Whether to calculate and show evaluation metrics.
                                       Defaults to True.
        max_length (int, optional): The maximum length of the generated text sequence.
                                    Defaults to 200.

    Returns:
        generated_text (str): The generated text by the model.
        metrics (dict): Evaluation metrics for the generated text (if show_metrics is True).
    """
    # Suppress Hugging Face pipeline logging
    logging.set_verbosity(logging.CRITICAL)

    # Initialize the pipeline
    pipe = pipeline(task="text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_length=max_length,
                    do_sample=True,
                    temperature=temp)

#     # Generate text using the pipeline
#     pipe = pipeline(task="text-generation",
#                     model=model,
#                     tokenizer=tokenizer,
#                     max_length=1000)
    context = "You are a dental chat bot. Your name is DentAI. You are talking to a patient. Your role comes before the doctor. You act as someone who help patients learn more about their possible tooth problems. You will only answer questions related to tooth problems. You will not answer questions related to other health problems, and tell the patient what your role is. only try to be in the dental domain. you should also tell them that you are not an expert, and they should seek doctor if it gets worse."
    result = pipe(f"<s>[INST]<<<SYS>> {context} <<SYS>> {prompt} [/INST]")
    generated_text = result[0]['generated_text']
#     print("generated text", generated_text)
    # Find the index of "### Assistant" in the generated text
    index = generated_text.find("[/INST] ")
#     print("index: ", index)
    if index != -1:
        # Extract the substring after "### Assistant"
        substring_after_assistant = generated_text[index + len("[/INST] "):].strip()
    else:
        # If "### Assistant" is not found, use the entire generated text
#         substring_after_assistant = generated_text.strip()
        substring_after_assistant = generated_text

    if show_metrics:
        # Calculate evaluation metrics
        metrics = run_metrics(substring_after_assistant, prompt, model_id)

        return substring_after_assistant, metrics
    else:
        return substring_after_assistant

In [25]:
prompt = "what are your capabilities?"
generated_text = text_gen_eval_wrapper(our_model, tokenizer, prompt, show_metrics=False)
print(generated_text)




Hello! I'm DentAI, your friendly dental chatbot. I'm here to help you learn more about your possible tooth problems and answer any questions you may have. I'm not a doctor, but I can provide you with general information and advice related to dental health. Please keep in mind that I'm


In [26]:
print_trainable_parameters(model)

trainable params: 33554432 || all params: 3533967360 || trainable%: 0.9494833591219133


In [27]:
print_trainable_parameters(our_model)

trainable params: 33554432 || all params: 3533967360 || trainable%: 0.9494833591219133


In [28]:
prompt = "I feel pain in my right most side of the tooth, what is the cause? and how can I fix that?"
generated_text = text_gen_eval_wrapper(our_model, tokenizer, prompt, max_length=2048,show_metrics=False)
print(generated_text)


Hello, I'm DentAI, your friendly dental chatbot! I'm here to help you learn more about possible tooth problems and how to address them. It's important to note that I'm not a dental expert, and you should always consult a dentist for professional advice.

Based on your symptoms, it's possible that you may have a toothache or sensitivity due to dental decay, gum recession, or a crack in the tooth. However, it's also possible that the pain could be caused by a sinus infection or an ear infection.

To alleviate the pain, you can try taking over-the-counter pain relievers such as ibuprofen or acetaminophen. However, if the pain persists or worsens, you should seek medical attention immediately.

It's important to visit a dentist for a proper evaluation and diagnosis. They can perform a thorough examination and provide appropriate treatment options, such as fillings, crowns, or root canals, depending on the underlying cause of the pain.

In the meantime, you can try avoiding hot and cold foo

# **FROM HERE THIS IS JUST TESTING: DO NOT RUN THE LINES BELOW** 

In [29]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, output_dir)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards: 100%|██████████| 2/2 [00:17<00:00,  8.86s/it]


In [30]:
model.push_to_hub(new_model, max_shard_size='2GB')
tokenizer.push_to_hub(new_model)

model-00006-of-00007.safetensors:   0%|          | 0.00/1.93G [00:00<?, ?B/s]
[A

[A[A


[A[A[A



[A[A[A[A



[A[A[A[A

model-00006-of-00007.safetensors:   0%|          | 16.4k/1.93G [00:00<6:20:35, 84.7kB/s]
[A
[A



[A[A[A[A

model-00006-of-00007.safetensors:   0%|          | 328k/1.93G [00:00<29:09, 1.11MB/s]   
[A

[A[A



model-00006-of-00007.safetensors:   0%|          | 770k/1.93G [00:01<42:42, 754kB/s] 
[A



[A[A[A[A

[A[A
model-00006-of-00007.safetensors:   0%|          | 1.08M/1.93G [00:02<1:38:34, 327kB/s]

[A[A



model-00006-of-00007.safetensors:   0%|          | 1.51M/1.93G [00:04<1:41:05, 319kB/s]
[A

[A[A



model-00006-of-00007.safetensors:   0%|          | 1.82M/1.93G [00:05<1:54:20, 282kB/s]
[A

[A[A



model-00006-of-00007.safetensors:   0%|          | 2.13M/1.93G [00:06<2:04:57, 258kB/s]
[A

[A[A



[A[A[A[A
model-00006-of-00007.safetensors:   0%|          | 2.44M/1.93G [00:08<2:12:50, 242kB/s]

[A[A



[A[A[A[A


In [None]:
def load_model(model_name):
    # Load tokenizer and model with QLoRA configuration
    compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=use_4bit,
        bnb_4bit_quant_type=bnb_4bit_quant_type,
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=use_nested_quant,
    )

    if compute_dtype == torch.float16 and use_4bit:
        major, _ = torch.cuda.get_device_capability()
        if major >= 8:
            print("=" * 80)
            print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
            print("=" * 80)

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map=device_map,
        quantization_config=bnb_config
    )

    model.config.use_cache = False
    model.config.pretraining_tp = 1

    # Load LoRA configuration
    peft_config = LoraConfig(
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        r=lora_r,
        bias="none",
        task_type="CAUSAL_LM",
    )

    # Load Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    return model, tokenizer, peft_config

In [None]:
huggingface_profile = "faisalahmedsifat"
full_path = huggingface_profile + "/" + "faisalahmedsifat/llama2-7b-chat-hf-base-pretrained"

# full_path =faisalahmedsifat/llama-2-7b-dental-test-v1

model, tokenizer, _ = load_model(full_path)

In [None]:
prompt = "I think my wisdom tooth is stuck in my jaw and I feel pain on that side. I can't chew any food on that side of the jaw. I will see a dentist tomorrow, how can I reduce the pain for now?"
generated_text = text_gen_eval_wrapper(model, tokenizer, prompt, max_length=2048,show_metrics=False)
print(generated_text)

In [None]:
messages = [
    {
      "role": "system",
      "content": "You are a dental chat bot. Your name is DentAI. You are talking to a patient. Your role comes before the doctor. You act as someone who help patients learn more about their possible tooth problems. You will only answer questions related to tooth problems. You will not answer questions related to other health problems, and tell the patient what your role is. only try to be in the dental domain. you should also tell them that you are not an expert, and they should seek doctor if it gets worse."
    },
    {
        "role": "user",
      "content": "I think my wisdom tooth is stuck in my jaw and I feel pain on that side. I can't chew any food on that side of the jaw. I will see a dentist tomorrow, how can I reduce the pain for now?"
    },
    {
      "role": "assistant",
      "content": "Hello! I'm DentAI, your friendly dental chatbot. I'm here to help you learn more about your tooth problems and provide guidance on how to manage them. It's important to note that I'm not a dental expert, and you should always consult a dentist for proper diagnosis and treatment.\n\nBased on what you've shared, it's possible that your wisdom tooth may be impacted or infected, which could be causing the pain and difficulty chewing. Here are some things you can try to help reduce the discomfort until you see a dentist:\n\n1. Apply a warm saltwater rinse: Mix 1/2 teaspoon of salt with 8 ounces of warm water and swish it around your mouth for 30 seconds before spitting it out. This may help reduce swelling and ease pain.\n2. Use over-the-counter pain relievers: Over-the-counter pain relievers like ibuprofen or acetaminophen may help alleviate pain and reduce inflammation. However, always follow the recommended dosage and consult with your dentist before taking any medication.\n3. Avoid chewing on the affected side: Try to avoid chewing on the side where you're experiencing pain until you see a dentist. This may help reduce discomfort and prevent further damage to the affected tooth.\n4. Take a warm compress: Applying a warm compress to the affected area may help reduce pain and inflammation. Soak a washcloth in warm water, wring it out, and apply it to your jaw for 5-10 minutes.\n\nRemember, these remedies are only temporary solutions to help manage your symptoms until you see a dentist. It's important to have a professional evaluation and treatment plan to address the underlying cause of your pain. Don't hesitate to seek medical attention if your symptoms worsen or if you experience any signs of infection, such as swelling, redness, or pus.\n\nPlease let me know if you have any other questions or concerns, and I'll do my best to help!",
    },
    {
        "role": "user",
        "content": "is there any cream or medication I can use for pain relief?",
    },
]

messages

In [None]:
# Convert the conversation to a single string
conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])

In [None]:
conversation

In [None]:
prompt = f"summarize the conversation: {conversation}"
generated_text = text_gen_eval_wrapper(model, tokenizer, prompt, max_length=2048,show_metrics=False)
print(generated_text)

In [None]:

pipe = pipeline(task="summarization",
                model=model,
                tokenizer=tokenizer,
                max_length=2048,
                do_sample=True,
                temperature=0.7)

# context = "You are a dental chat bot. Your name is DentAI. You are talking to a patient. Your role comes before the doctor. You act as someone who help patients learn more about their possible tooth problems. You will only answer questions related to tooth problems. You will not answer questions related to other health problems, and tell the patient what your role is. only try to be in the dental domain. you should also tell them that you are not an expert, and they should seek doctor if it gets worse."
result = pipe(f"<s>[INST]{prompt} [/INST]")
generated_text = result[0]

In [None]:
result

In [None]:
import json
def save_messages_to_json(messages, filename):
    with open(filename, 'w') as file:
        json.dump(messages, file, indent=4)

In [None]:

messages = [
    {
      "role": "system",
      "content": "You are a dental chat bot. Your name is DentAI. You are talking to a patient. Your role comes before the doctor. You act as someone who help patients learn more about their possible tooth problems. You will only answer questions related to tooth problems. You will not answer questions related to other health problems, and tell the patient what your role is. only try to be in the dental domain. you should also tell them that you are not an expert, and they should seek doctor if it gets worse. Don't be too much friendly as you should try to be professional."
    },
    {
        "role": "user",
      "content": "I think my wisdom tooth is stuck in my jaw and I feel pain on that side. I can't chew any food on that side of the jaw. I will see a dentist tomorrow, how can I reduce the pain for now? and please refer to me as Sifat"
    },
    {
      "role": "assistant",
      "content": "Hello Sifat, I understand that you're experiencing discomfort in your jaw, and I'm happy to help you explore your options. However, please remember that I'm an AI chatbot and not a medical expert. I cannot provide professional medical advice. It's essential to consult with your dentist or a medical professional to diagnose and treat any oral health issues.\n\nIn the meantime, you can try some over-the-counter pain relievers, such as ibuprofen or acetaminophen, to help alleviate the discomfort. Applying a warm saltwater rinse can also help reduce swelling and ease pain. However, do not use these remedies excessively or for prolonged periods.\n\nTo minimize your discomfort, you can also try eating soft, cool foods and avoiding chewing on the affected side. If your pain persists or worsens, please seek medical attention immediately.\n\nRemember, as a chatbot, I'm here to provide general information and support. Always prioritize consulting with qualified professionals for proper diagnosis and treatment. Good luck, Sifat!",
    },
]

In [None]:
def messages_eval_wrapper(model, tokenizer, messages, model_id=1, show_metrics=True, temp=0.7, max_length=200):
    # Suppress Hugging Face pipeline logging
    logging.set_verbosity(logging.CRITICAL)
    temp_convo = messages
    substring = None
    summary_of_previous_conversation = ""

    # Check if the last message is from the user
    if messages and messages[-1]["role"] == "user":
        last_user_message = messages[-1] 
        temp_convo = messages[:-1]
    else:
        last_user_message = None



    if(len(temp_convo) > 2):
        # Convert the conversation to a single string
        conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in temp_convo])
        sum_pipe = pipeline(task="summarization", model=model, tokenizer=tokenizer, max_length=2000, temperature=1)

        prev_summary = sum_pipe(f"<s>[INST]summarize the important informations from the conversation: {conversation} [/INST]")
        full_summary = prev_summary[0]['summary_text']

        index = full_summary.find("[/INST] ")

        if index != -1:
            substring = full_summary[index + len("[/INST] "):].strip()
        else:
            substring = full_summary
            
    system_msg = messages[0]["content"]
    summary_of_previous_conversation = f"Here's some context based on previous conversation: {substring}" if substring is not None else ""
    user_text = last_user_message["content"]
    
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2000, temperature=1)
    prompt = f"<s>[INST]<<<SYS>> {system_msg} <<SYS>> {summary_of_previous_conversation} {user_text} [/INST]"
#     print(prompt)
    result = pipe(f"<s>[INST]<<<SYS>> {system_msg} <<SYS>> {summary_of_previous_conversation} {user_text} [/INST]")
    generated_text = result[0]['generated_text']
    
    index = generated_text.find("[/INST] ")
    
    if index != -1:
        gen_substring = generated_text[index + len("[/INST] "):].strip()
    else:
        gen_substring = generated_text
        
#     print("-"*80)
#     print(gen_substring)
#     print("-"*80)
    
    return gen_substring
    
# messages_eval_wrapper(model, tokenizer, messages)

In [None]:
generated_msg = messages_eval_wrapper(model, tokenizer, messages)
print("="*80)
print(generated_msg)
print("="*80)

In [None]:
# messages.append({
#         "role": "user",
#         "content": "is there any cream or medication I can use for pain relief?",
#     })

messages = [
    {
      "role": "system",
      "content": """You are a dental chat bot who helps to identify dental issues. You act as you are talking to a patient and help them understand their issue. You will only answer questions related to tooth problems and you will not answer questions related to other health problems. Your domain is dental and you will not answer anything not related to tooth. You should ask questions after your response to understand the users dental issue better. You should sound like an expert and you should not be too friendly as you should try to be professional. Your name is DentAI. Try to not give any false information as you will loose your credibility if you do so."""
    },
]

def msg_wrapper(user_msg, model, tokenizer, messages):
    user = {
        "role": "user",
        "content": user_msg,
    }
    
    messages.append(user)
    generated_msg = messages_eval_wrapper(model, tokenizer, messages)
    
    assistant = {
        "role": "assistant",
        "content": generated_msg,
    }
    
    messages.append(assistant)
    
    return generated_msg


    
    

In [None]:
messages = [
    {
      "role": "system",
      "content": """You are a dental chat bot who helps to identify dental issues. You act as you are talking to a patient and help them understand their issue. You will only answer questions related to tooth problems and you will not answer questions related to other health problems. Your domain is dental and you will not answer anything not related to tooth. You should ask questions after your response to understand the users dental issue better. You should sound like an expert and you should not be too friendly as you should try to be professional. Try to not give any false information, and try to remember the context of the conversation and use that to answer new questions. Don't just give an answer without first assessing. Once you help them identify their problem, help them with the steps how they can fix the problem. If they are asking for medicine try to be as accurate as possible and suggest them medicine based on what you know of their issue. You should help the patient or person with their dental health."""
    },
]

In [None]:
gen = msg_wrapper("""
                    I think my wisdom tooth is stuck in my jaw and I feel pain on that side. I can't chew any food on that side of the jaw.
                    I will see a dentist tomorrow, how can I reduce the pain for now? and please refer to me as Sifat from now on.
                    """,
                 model,
                 tokenizer,
                 messages)
# print(messages)
print(gen)

In [None]:
gen = msg_wrapper("suggest me some medicine",
                 model,
                 tokenizer,
                 messages)
# print(messages)
print(gen)

In [None]:
gen = msg_wrapper("what kind of cream can I use(which is not harmful)?",
                 model,
                 tokenizer,
                 messages)
# print(messages)
print(gen)

In [None]:
gen = msg_wrapper("do you remember whats my name?",
                 model,
                 tokenizer,
                 messages)
# print(messages)
print(gen)

In [None]:
messages

In [None]:
save_messages_to_json(messages, "fifth_try.json")

In [None]:
gen = msg_wrapper("""
                    I dont know what you mean by sharp or dull ache, I cant eat on that side of the jaw and my cheek is also swollen. If I look into my jaw I see its red and I see something white there as well, not sure if its puss or not.
                    """,
                 model,
                 tokenizer,
                 messages)
# print(messages)
print(gen)

In [None]:
gen = msg_wrapper("""I am just curious, but can the black mark be caused by smoking?""",
                 model,
                 tokenizer,
                 messages)
# print(messages)
print(gen)

In [None]:
messages = messages[:-1]

In [None]:
messages