In [1]:
!pip install -qqq transformers --progress-bar off
!pip install -qqq bitsandbytes --progress-bar off  # If you used 4-bit quantization
!pip install -qqq peft torch --progress-bar off
!pip install -qqq gradio --progress-bar off

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
distributed 2023.7.1 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.
kfp 2.5.0 requires google-cloud-storage<3,>=2.2.1, but you have google-cloud-storage 1.44.0 which is incompatible.
kfp 2.5.0 requires urllib3<2.0.0, but you have urllib3 2.2.1 which is incompatible.
raft-dask 23.8.0 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.
spacy 3.7.3 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.
tensorflow 2.15.0 requires keras<2.16,>=2.15.0, but you have keras 3.2.1 which is incompatible.
weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.
ydata-profiling 4.6.4 requires numpy<1.26,>=1.16.0, but you have numpy 1.2

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
from huggingface_hub import login

In [3]:
# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
hf_token = "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"

login(hf_token)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
"""
Paths to saved model and tokenizer
"""

# Model name we want to use
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
# The directory where the model and tokenizer are saved
source_dir = "/kaggle/input/llama3-ig-ad-generation-task/experiments/"
checkpoint_dir = source_dir + "checkpoint-483/"
# source_dir = "/kaggle/input/llama3-ig-ad-generation-task/experiments" 

In [6]:
# Quantize the base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    use_safetensors=True,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map="auto",
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir)

# Load the PEFT configuration
peft_config = PeftConfig.from_pretrained(source_dir)

# Load the adapter
model = PeftModel.from_pretrained(base_model, source_dir)

# Ensure the tokenizer padding
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Now the model and tokenizer are loaded and ready for inference

In [7]:
default_system_prompt = """
Write an engaging Instagram post caption about the given input. You can generate a few heashtags.
""".strip()

In [8]:
def generate_prompt(conversation: str, system_prompt: str = default_system_prompt) -> str:
    return f"""### Instruction: {system_prompt}

### Input:
{conversation.strip()}

### Response:
""".strip()

def clean_generated_text(text: str) -> str:
    # Remove duplicate hashtags
    hashtags = set()
    cleaned_text = []
    for word in text.split():
        if word.startswith("#"):
            if word.lower() not in hashtags:
                hashtags.add(word.lower())
                cleaned_text.append(word)
        else:
            cleaned_text.append(word)
            
    # There may be to that function, 
    # but for now we'll proccess the duplicates
    return " ".join(cleaned_text)

def generate_post(model, text: str):
    inputs = tokenizer(text, return_tensors="pt").to(device)
    inputs_length = len(inputs["input_ids"][0])
    with torch.no_grad():
        outputs = model.generate(**inputs, 
                                 max_new_tokens=100, 
                                 temperature=0.7, 
                                 top_p=0.95)
    generated_text = tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)
    return clean_generated_text(generated_text)


In [9]:
# Test the function with a sample instruction
sample_instruction = "Create a new post about the 'Adventure' model backpack with 25 liters capacity for $200, perfect for climbers."
prompt = generate_prompt(sample_instruction)
generated_post = generate_post(model, prompt)
print("Generated Post Content:\n", generated_post)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
2024-05-24 11:48:10.105413: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-24 11:48:10.105523: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-24 11:48:10.232639: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Generated Post Content:
 🌄 Who's ready to conquer new peaks? The 'Adventure' model is here for those who love to take the road less traveled - a comfortable, weather-resistant backpack with a trekking main compartment and trekking shoulder straps, ready for your next 3-day peak climb or week-long trek. With a 25-liter capacity and a 200 dollar price tag, the 'Adventure' is the perfect companion for anyone looking for a reliable daypack that has plenty of space for your gear.


In [10]:
# Gradio interface
import gradio as gr


def gradio_interface(instruction):
    prompt = generate_prompt(instruction)
    return generate_post(model,prompt)

interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=3, placeholder="Enter instruction here..."),
    outputs="text"
)



In [11]:
# Launch gradio interface
interface.launch()

Running on local URL:  http://127.0.0.1:7860
Kaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Running on public URL: https://61aa336793b716414d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
