<a href="https://colab.research.google.com/github/ajaysuseel/MiniProject_AD/blob/main/hazard_detection_using_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from PIL import Image
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BlipProcessor, BlipForConditionalGeneration

In [3]:
from google.colab import drive
import shutil
import os

# 🚀 Mount Google Drive
drive.mount('/content/drive')

# 🔹 Source (Google Drive folder)
src_folder = "/content/drive/MyDrive/models/models"

# 🔹 Destination (Colab models folder)
dest_folder = "/content/models"

# # 🗑️ Remove existing folder if it exists
# if os.path.exists(dest_folder):
#     shutil.rmtree(dest_folder)
#     print("⚠️ Existing 'models' folder deleted.")

# 🔄 Copy the folder from Drive
shutil.copytree(src_folder, dest_folder)
print("✅ Models folder copied successfully from Drive!")

Mounted at /content/drive
✅ Models folder copied successfully from Drive!


In [4]:
# Load the finetuned BLIP model and processor
blip_processor = BlipProcessor.from_pretrained("/content/models/finetuned_blip1")
blip_model = BlipForConditionalGeneration.from_pretrained("/content/models/finetuned_blip1")

# Load a lightweight LLM (e.g., distilgpt2)
llm_tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
llm_model = AutoModelForCausalLM.from_pretrained("distilgpt2")

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [12]:
import requests
from PIL import Image
from io import BytesIO

def load_image(image_path_or_url):
    """
    Loads an image from a local file or URL.
    """
    if image_path_or_url.startswith("http://") or image_path_or_url.startswith("https://"):
        response = requests.get(image_path_or_url)
        response.raise_for_status()  # Check for request errors
        image = Image.open(BytesIO(response.content)).convert("RGB")
    else:
        image = Image.open(image_path_or_url).convert("RGB")
    return image

In [13]:
def generate_caption(image_path_or_url):
    """
    Generate a caption for the input image using the finetuned BLIP model.
    """
    image = load_image(image_path_or_url)
    inputs = blip_processor(image, return_tensors="pt")
    outputs = blip_model.generate(**inputs)
    caption = blip_processor.decode(outputs[0], skip_special_tokens=True)
    return caption

In [28]:
def detect_hazard(caption):
    """
    Use the lightweight LLM to determine if the caption indicates a hazard.
    The prompt includes few-shot examples to guide the model.
    """
    few_shot_examples = (
        "Example 1:\n"
        "Caption: 'a car driving on a clear road'\n"
        "Hazard: No Hazard. The scene is typical and safe for autonomous driving.\n\n"
        "Example 2:\n"
        "Caption: 'a car skidding on an icy road with debris'\n"
        "Hazard: Hazard. The conditions indicate a dangerous scenario.\n\n"
        "Example 3:\n"
        "Caption: 'a man riding a horse drawn carriage down a road'\n"
        "Hazard: No Hazard. Although unusual, this scene does not necessarily pose a hazard for autonomous driving.\n\n"
        "Example 4:\n"
        "Caption: 'An animal crossing a road'\n"
        "Hazard: Hazard. Animal will most likely collide with car.\n\n"
        "Example 5:\n"
        "Caption: 'A toy or object lying in front of road'\n"
        "Hazard: Hazard. Car may collide with it.\n\n"
    )

    prompt = (
        few_shot_examples +
        f"Caption: '{caption}'\nHazard:"
    )

    # Ensure the tokenizer has a pad token
    llm_tokenizer.pad_token = llm_tokenizer.eos_token

    # Tokenize the prompt with padding to generate an attention mask
    encoded_input = llm_tokenizer(prompt, return_tensors="pt", padding=True)
    input_ids = encoded_input.input_ids
    attention_mask = encoded_input.attention_mask

    # Generate the LLM response, now using the attention mask
    output_ids = llm_model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=250,  # Increased max_length for longer responses
        do_sample=True,
        temperature=0.7
    )
    response = llm_tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Basic parsing: look for the first occurrence of "Hazard:" output after the few-shot examples
    if "Hazard:" in response:
        extracted = response.split("Hazard:", 1)[-1].strip()
        if extracted.startswith("Hazard"):
            hazard = True
        elif extracted.startswith("No Hazard"):
            hazard = False
        else:
            hazard = None
    else:
        hazard = None

    return hazard, response


In [19]:
def main(image_path):
    # Generate caption from the image
    caption = generate_caption(image_path)
    print("Generated Caption:", caption)

    # Use the caption to detect hazard
    hazard, explanation = detect_hazard(caption)
    print("Hazard Detected:", hazard)
    #print("LLM Explanation:", explanation)

In [9]:
IMAGE_PATH="https://raw.githubusercontent.com/ajaysuseel/MiniProject_AD/main/raw_data/pranav/images/file203.jpg"


In [30]:
image_path="/content/new.jpg"

In [31]:
if __name__ == "__main__":
    # Replace with your image file path
    image_file = image_path
    main(image_file)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Caption: a group of gi
Hazard Detected: False
