In [2]:
import pandas as pd

# Installing relevant packages

In [3]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
!pip install pyarrow==11.0.0

# Defining model and tokenizer

In [4]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.43.4.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


# Adding LoRA adapters  in order to update 1 to 10% of all the parameters

In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2024.8 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


# Using the Alpaca dataset

In [6]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("yahma/alpaca-cleaned", split = "train")


Generating train split:   0%|          | 0/51760 [00:00<?, ? examples/s]

# Loading dataset

In [7]:
df = pd.read_json("hf://datasets/asbjoernrubek/clinical_nutritional_coach_formatted/transcripts_formatted.jsonl", lines=True)

In [None]:
df.head()

# Data Preprocessing

In [8]:
# Reading and loading dataset
df = pd.read_json("hf://datasets/asbjoernrubek/clinical_nutritional_coach_formatted/transcripts_formatted.jsonl", lines=True)
df = df.iloc[:1001,:]

# Sorting out patients' words (labels) and dietitians' responses (outputs)
def processed_data(df):
  inputs = []
  labels = []

  for index, row in df.iterrows():
    conversation = row['conversation']

    conversation_processed = conversation.replace('<s>[INST]', 'patient:').replace('[/INST]','Nutritionist:')

    input_text = conversation_processed.split('Nutritionist:')[0].strip()
    label_text = 'Nutritionist:' + conversation_processed.split('Nutritionist:')[1].strip() if 'Nutritionist:' in conversation_processed else ''

    inputs.append(input_text)
    labels.append(label_text)
  return inputs, labels

inputs, labels = processed_data(df)
print(inputs[:5])
print(labels[:5])

f_labels = []
for label in labels:
  label = label.split('<')[0].split('Nutritionist:')[1]
  f_labels.append(label)

data = pd.DataFrame({'output': f_labels, 'input': inputs, 'instruction': '["Act like a friendly dietician", "A very intelligent patient dietician","Never give dangerous advice"]'})

["patient: I'm facing a lot of issues with managing my daily calories. I’m not sure what my goals should be.", "patient: I've been trying to keep a steady workout routine for a while now, but it feels like I'm just resisting change instead of embracing it. Every time I make a little progress, something comes up, and I fall right back into old habits. It's really frustrating because it's starting to feel like a cycle of failure that I can't seem to break out of. I could use some help to figure out how to make exercise a more consistent part of my life.", "patient: I'm struggling with overcoming my habits of large portions at meals. Can you give me some suggestions?", "patient: I'm really focused on maintaining my current weight, but I find sticking to my goals is tough when it comes to portion sizes. I need some help with that.", "patient: Hi, I'm really focusing on managing my weight and I'm hoping for success, but it's been overwhelming. Any guidance?"]
["Nutritionist:It's really grea

In [None]:
data.head()

# Transforming into pandas datasset

In [9]:
from datasets import Dataset
dataset = Dataset.from_pandas(data)

In [None]:
dataset

In [None]:
dataset[:3]

# Mapping

In [10]:
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/1001 [00:00<?, ? examples/s]

In [None]:
dataset[:3]


### Train the model


In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 12,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/1001 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

# Train model

In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,001 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 12
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.6636
2,2.6324
3,2.5546
4,2.3493
5,2.2175
6,2.022
7,1.8685
8,1.5989
9,1.3392
10,1.2662


In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

# Installing streamlit

In [13]:
%%capture
!pip install streamlit

In [14]:
import streamlit as st

# User chatbot

In [16]:
# def chatbot_response(user_input):
#     # Process the user input and generate a response
#     response = f"Thank you for sharing. You mentioned: {user_input}"
#     return response


# Generateresponse

In [68]:
user_input = input("Enter your prompt")

Enter your promptI am a 30 years old woman who finds it difficult to eat a healthy diet. help me with a meal plan that will keep me healthy and strong


In [73]:
def generate_response(user_input):
  # alpaca_prompt = Copied from above
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
  inputs = tokenizer(
  [
      alpaca_prompt.format(
          "", # instruction
          user_input, # input
          "", # output - leave this blank for generation!
      )
  ], return_tensors = "pt").to("cuda")

  outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
  response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
  response_text = response[0].split("Response:")[-1].strip()
  return response_text

In [70]:
generate_response(user_input)

"Sure! I'm happy to help you with a meal plan that will keep you healthy and strong. Let's start by setting some goals. What are your goals for your meal plan?"

# Saving, loading finetuned models


In [74]:
model.save_pretrained("nutritionist_model") # Local saving
tokenizer.save_pretrained("nutritionist_model")

('nutritionist_model/tokenizer_config.json',
 'nutritionist_model/special_tokens_map.json',
 'nutritionist_model/tokenizer.json')

In [24]:
!pip install streamlit
!pip install pyngrok




# GUI

In [90]:
# %%writefile app.py

# import streamlit as st
# import torch

# # Mock FastLanguageModel for demonstration
# class FastLanguageModel:
    #@staticmethod
#     def from_pretrained(model_name, max_seq_length):
#         return "mock_model", "mock_tokenizer"

    #@staticmethod
#     def for_inference(model):
#         pass

# # Initialize the model and tokenizer
# model_name = "nutritionist_model"
# #model_name = saved_model
# max_seq_length = 2048

# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name=model_name,
#     max_seq_length=max_seq_length,
# )


# # Set the device to GPU if available, otherwise CPU
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model  # Skipping .to(device) as model is mocked

# alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

# ### Instruction:
# {}

# ### Input:
# {}

# ### Response:
# {}"""

# EOS_TOKEN = "<|endoftext|>"

# def generate_response(user_input):
#   # alpaca_prompt = Copied from above
#   FastLanguageModel.for_inference(model) # Enable native 2x faster inference
#   inputs = tokenizer(
#   [
#       alpaca_prompt.format(
#           "", # instruction
#           user_input, # input
#           "", # output - leave this blank for generation!
#       )
#   ], return_tensors = "pt").to("cuda")

#   outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
#   response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
#   response_text = response[0].split("Response:")[-1].strip()
#   return response_text

# # def generate_response(user_input):
# #     FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
# #     inputs = {
# #         "input_ids": torch.tensor([[0]]).to(device)  # Mock input tensor for demonstration
# #     }

# #     # Mock model output
# #     outputs = ["Response: Here is the generated response for your input."]
# #     response = outputs[0].split("Response:")[-1].strip()
# #     return response

# # Streamlit app configuration
# st.set_page_config(
#     page_title="Nutritional Coach Chatbot",
#     page_icon="🥗",
#     layout="centered",
#     initial_sidebar_state="auto",
# )

# # App title and description
# st.title("Nutritional Coach Chatbot")
# st.markdown("""
# Welcome to the Nutritional Coach Chatbot!
# Here you can ask any questions regarding your dietary needs and receive expert advice.
# Just type in your query below and get personalized responses!
# """)

# # Input form for user query
# st.header("Ask your question")
# user_input = st.text_area("Type your question here:", placeholder="I am having challenges controlling my weight")

# # Button to submit query
# if st.button("Get Response"):
#     if user_input:
#         with st.spinner("Generating response..."):
#             response = generate_response(user_input)
#         st.success("Here's the advice from your nutritional coach:")
#         st.write(response)
#     else:
#         st.warning("Please enter a question before submitting.")

# # Footer
# st.markdown("""
# ---
# **Disclaimer:** The advice provided by this chatbot is not a substitute for professional medical advice, diagnosis, or treatment. Always seek the advice of your physician or other qualified health provider with any questions you may have regarding a medical condition.
# """)



%%writefile app.py

import streamlit as st
import torch

# Mock FastLanguageModel for demonstration
class FastLanguageModel:
    @staticmethod
    def from_pretrained(model_name, max_seq_length):
        return MockModel(), MockTokenizer()

    @staticmethod
    def for_inference(model):
        pass

class MockModel:
    def generate(self, **kwargs):
        # Mock implementation of the generate method
        return torch.tensor([[0]])

class MockTokenizer:
    def __call__(self, texts, return_tensors):
        # Mock implementation of the tokenizer
        return {"input_ids": torch.tensor([[0]])}

    def batch_decode(self, outputs, skip_special_tokens):
        # Mock implementation of decoding
        return ["Response: Here is the generated response for your input."]

# Initialize the model and tokenizer
model_name = "nutritionist_model"
max_seq_length = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
)

# Set the device to GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device) if hasattr(model, 'to') else model

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def generate_response(user_input):
    FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
    input_text = alpaca_prompt.format("", user_input, "")
    inputs = tokenizer([input_text], return_tensors="pt")

    # Move tensors to the appropriate device
    for key in inputs:
        inputs[key] = inputs[key].to(device)

    outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True)
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    response_text = response[0].split("Response:")[-1].strip()
    return response_text

# Streamlit app configuration
st.set_page_config(
    page_title="Nutritional Coach Chatbot",
    page_icon="🥗",
    layout="centered",
    initial_sidebar_state="auto",
)

# App title and description
st.title("Nutritional Coach Chatbot")
st.markdown("""
Welcome to the Nutritional Coach Chatbot!
Here you can ask any questions regarding your dietary needs and receive expert advice.
Just type in your query below and get personalized responses!
""")

# Input form for user query
st.header("Ask your question")
user_input = st.text_area("Type your question here:", placeholder="I am having challenges controlling my weight")

# Button to submit query
if st.button("Get Response"):
    if user_input:
        with st.spinner("Generating response..."):
            response = generate_response(user_input)
        st.success("Here's the advice from your nutritional coach:")
        st.write(response)
    else:
        st.warning("Please enter a question before submitting.")

# Footer
st.markdown("""
---
**Disclaimer:** The advice provided by this chatbot is not a substitute for professional medical advice, diagnosis, or treatment. Always seek the advice of your physician or other qualified health provider with any questions you may have regarding a medical condition.
""")


Overwriting app.py


# Generates the url

In [91]:
from pyngrok import ngrok

ngrok.set_auth_token("2k27N1Eim5F58064MC5kWNlgm5x_7vVfdEvByVnTzoaFU8h5B")  # Replace with your actual authtoken

# Start ngrok, specifying the port as a keyword argument
public_url = ngrok.connect(8501, bind_tls=True) # The bind_tls ensures the connection is secure
print(f'Public URL: {public_url}')

# Run the Streamlit app
!streamlit run app.py

Public URL: NgrokTunnel: "https://5c5c-35-198-211-78.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.198.211.78:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m
