In [76]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle'):
    for filename in filenames:
        if 'doctor' in filename:
            print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
%%capture
%pip install -U bitsandbytes
%pip install -U transformers
%pip install -U accelerate
%pip install -U peft
%pip install -U trl

In [2]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("hf")
login(token = hf_token)

In [3]:
base_model = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
new_model = "/kaggle/input/finetuning-llama-3/llama-3-8b-chat-doctor/"


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch
from trl import setup_chat_format
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

base_model_reload, tokenizer = setup_chat_format(base_model_reload, tokenizer)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

ValueError: Chat template is already added to the tokenizer. If you want to overwrite it, please set it to None

In [5]:
# Merge adapter with base model
model = PeftModel.from_pretrained(base_model_reload, new_model)

model = model.merge_and_unload()

In [7]:
messages = [{"role": "user", "content": "Hello doctor, I have bad acne. How do I get rid of it?"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=120, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Hello doctor, I have bad acne. How do I get rid of it?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Hello,For mild acne, I suggest you apply a mixture of turmeric powder and ginger paste. For more severe acne, consult a dermatologist and get a proper treatment. Hope I have answered your query. Let me know if I can assist you further. Regards, Dr.assistant

Hi! Welcome to Healthcaremagic! I can understand your concern. I would suggest you the following treatment options: 1. You can apply a topical retinoid, such as adapalene gel, to the affected areas. 2. You can use a topical antibiotic, such as cl


In [8]:
model.save_pretrained("llama-3-8b-chat-doctor")
tokenizer.save_pretrained("llama-3-8b-chat-doctor")



Saving checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

('llama-3-8b-chat-doctor/tokenizer_config.json',
 'llama-3-8b-chat-doctor/special_tokens_map.json',
 'llama-3-8b-chat-doctor/tokenizer.json')

In [9]:
model.push_to_hub("llama-3-8b-chat-doctor", use_temp_dir=False)
tokenizer.push_to_hub("llama-3-8b-chat-doctor", use_temp_dir=False)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]



Saving checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/Anandharaju/llama-3-8b-chat-doctor/commit/4a2a2974db164515e6fd1ee259072503e5ae2330', commit_message='Upload tokenizer', commit_description='', oid='4a2a2974db164515e6fd1ee259072503e5ae2330', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Anandharaju/llama-3-8b-chat-doctor', endpoint='https://huggingface.co', repo_type='model', repo_id='Anandharaju/llama-3-8b-chat-doctor'), pr_revision=None, pr_num=None)

In [12]:
!git clone https://github.com/ggerganov/llama.cpp
%cd llama.cpp

Cloning into 'llama.cpp'...
remote: Enumerating objects: 47024, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 47024 (delta 19), reused 11 (delta 11), pack-reused 46996 (from 3)[K
Receiving objects: 100% (47024/47024), 98.91 MiB | 18.66 MiB/s, done.
Resolving deltas: 100% (33767/33767), done.
/kaggle/working/llama.cpp


In [13]:
!pwd

/kaggle/working/llama.cpp


In [18]:
!ls -lh /kaggle/working/llama-3-8b-chat-doctor

total 15G
-rw-r--r-- 1 root root  688 Mar 24 21:57 config.json
-rw-r--r-- 1 root root  121 Mar 24 21:57 generation_config.json
-rw-r--r-- 1 root root 4.7G Mar 24 21:58 model-00001-of-00004.safetensors
-rw-r--r-- 1 root root 4.7G Mar 24 21:58 model-00002-of-00004.safetensors
-rw-r--r-- 1 root root 4.6G Mar 24 21:59 model-00003-of-00004.safetensors
-rw-r--r-- 1 root root 1.1G Mar 24 21:59 model-00004-of-00004.safetensors
-rw-r--r-- 1 root root  24K Mar 24 21:59 model.safetensors.index.json
-rw-r--r-- 1 root root 5.1K Mar 24 22:00 README.md
-rw-r--r-- 1 root root  301 Mar 24 22:00 special_tokens_map.json
-rw-r--r-- 1 root root  50K Mar 24 22:00 tokenizer_config.json
-rw-r--r-- 1 root root  17M Mar 24 22:00 tokenizer.json


In [22]:
!ls -lh /kaggle/working/llama.cpp

total 716K
-rw-r--r--  1 root root  47K Mar 24 22:01 AUTHORS
-rwxr-xr-x  1 root root  21K Mar 24 22:01 build-xcframework.sh
drwxr-xr-x  2 root root 4.0K Mar 24 22:01 ci
drwxr-xr-x  2 root root 4.0K Mar 24 22:01 cmake
-rw-r--r--  1 root root 7.3K Mar 24 22:01 CMakeLists.txt
-rw-r--r--  1 root root 4.6K Mar 24 22:01 CMakePresets.json
-rw-r--r--  1 root root  437 Mar 24 22:01 CODEOWNERS
drwxr-xr-x  4 root root 4.0K Mar 24 22:01 common
-rw-r--r--  1 root root 6.4K Mar 24 22:01 CONTRIBUTING.md
-rwxr-xr-x  1 root root 243K Mar 24 22:01 convert_hf_to_gguf.py
-rwxr-xr-x  1 root root  18K Mar 24 22:01 convert_hf_to_gguf_update.py
-rwxr-xr-x  1 root root  19K Mar 24 22:01 convert_llama_ggml_to_gguf.py
-rwxr-xr-x  1 root root  19K Mar 24 22:01 convert_lora_to_gguf.py
drwxr-xr-x  4 root root 4.0K Mar 24 22:01 docs
drwxr-xr-x 45 root root 4.0K Mar 24 22:01 examples
-rw-r--r--  1 root root 1.6K Mar 24 22:01 flake.lock
-rw-r--r--  1 root root 7.3K Mar 24 22:01 flake.nix
drwxr-xr-x  5 root root 4.0K M

In [None]:
%pip install -r requirements.txt

In [26]:
%cd /kaggle/working/llama.cpp
!python convert_hf_to_gguf.py /kaggle/input/llama-3-8b-chat-doctor --outfile /kaggle/working/ai_doctor.gguf --outtype f16

/kaggle/working/llama.cpp


In [102]:
!ls -lh /kaggle/input/finetuning-llama-3/llama-3-8b-chat-doctor

total 161M
-rw-r--r-- 1 nobody nogroup  880 Mar 24 18:49 adapter_config.json
-rw-r--r-- 1 nobody nogroup 161M Mar 24 18:49 adapter_model.safetensors
drwxr-xr-x 2 nobody nogroup    0 Mar 24 18:49 checkpoint-450
-rw-r--r-- 1 nobody nogroup 5.1K Mar 24 18:49 README.md


In [3]:
%cd /kaggle/input/finetuning-llama-3

/kaggle/input/finetuning-llama-3


In [12]:
!ls -lh

total 352K
-rw-r--r-- 1 nobody nogroup    0 Mar 24 23:36 custom.css
drwxr-xr-x 3 nobody nogroup    0 Mar 24 23:36 llama-3-8b-chat-doctor
-rw-r--r-- 1 nobody nogroup  31K Mar 24 23:36 __notebook__.ipynb
-rw-r--r-- 1 nobody nogroup  651 Mar 24 23:36 __output__.json
-rw-r--r-- 1 nobody nogroup 313K Mar 24 23:36 __results__.html
drwxr-xr-x 3 nobody nogroup    0 Mar 24 23:36 wandb


In [4]:
!zip -r /kaggle/working/doctor.zip llama-3-8b-chat-doctor

  adding: llama-3-8b-chat-doctor/ (stored 0%)
  adding: llama-3-8b-chat-doctor/adapter_model.safetensors (deflated 7%)
  adding: llama-3-8b-chat-doctor/adapter_config.json (deflated 56%)
  adding: llama-3-8b-chat-doctor/README.md (deflated 66%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/ (stored 0%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/adapter_model.safetensors (deflated 7%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/trainer_state.json (deflated 83%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/training_args.bin (deflated 51%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/adapter_config.json (deflated 56%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/README.md (deflated 65%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/tokenizer.json (deflated 85%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/tokenizer_config.json (deflated 96%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/scheduler.pt (deflated 56%)
  adding: llama-3-8b-chat-doctor/checkpoint-450/

In [5]:
!ls -lh /kaggle/working/doctor.zip

-rw-r--r-- 1 root root 591M Mar 24 23:45 /kaggle/working/doctor.zip


In [6]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/doctor.zip')