In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


# **READY THE MODEL AND TOKENIZER**

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    #model_name = "unsloth/Llama-3.2-3B-Instruct",
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    #model_name = "hyadess/UAP-EEE-llama-3.1-8b-16_bit_merged",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
)

# **DATA PREP**

In [None]:
import pandas as pd

In [None]:
image_df=pd.read_csv("/content/with_urls.csv")


In [None]:
image_df

# **INFERENCE**

In [None]:
import re

In [None]:
def extract_last_section(text):
  pattern = r"<\|end_header_id\|>(.*?)<\|eot_id\|>"
  matches = re.findall(pattern, text, re.DOTALL)
  last_section = matches[-1].strip() if matches else ""
  return last_section



In [None]:
from datasets import load_dataset

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def generate_response(messages):
  inputs = tokenizer.apply_chat_template(
      messages,
      tokenize = True,
      add_generation_prompt = True, # Must add for generation
      return_tensors = "pt",
  ).to("cuda")

  outputs = model.generate(input_ids = inputs, max_new_tokens = 1024, use_cache = True)
  text=tokenizer.batch_decode(outputs)[0]
  text = extract_last_section(text)
  return text

In [None]:
results=[]
for index, row in image_df.iterrows():

    name= row["name"]
    name = name.split(".")[0]
    explanation = row["explanation"]


    messages=[
        {"from": "human", "value": "describe the topic "+name+" in 300 words.The topic should be described in the context of electrical and electronic engineering. Explanation of an image on that topic is also given below for further context:\n" + explanation+"\n\n The description should cover the usage of the algorithm, system or device described by the provided context and the topic name. Avoid including the image explanaton in the description. Avoid including anything outside the given context."},
    ]

    response=generate_response(messages)
    results.append(response)

    print(f"======================================================topic description for image {index}=====================================================================")
    print(response)





image_df['topic_description'] = results



image_df.to_csv("/content/drive/MyDrive/image_with_topic_description.csv", index=False)

print("Completions generated and saved")