<a href="https://colab.research.google.com/github/cadairhinojosa/SCM-Practice-Cassandra-Adair-Hinojosa/blob/main/SeeingThroughWords.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Install the necessary transformers and packages
!pip install transformers torch torchvision --quiet
!pip install ultralytics --quiet
!pip install gradio --quiet


In [None]:
#import transformers and packages
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
from ultralytics import YOLO
import gradio as gr
import torch


In [None]:
#Setting up the BlipProcessor for image captioning
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model_caption = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")


In [None]:
#setting up the Yolo Model for object data detection
model_yolo = YOLO("yolov8n.pt")  # Use the lightweight version


In [None]:
#creating danger keywords
danger_keywords = [
    'fire', 'gun', 'knife', 'weapon', 'explosion', 'smoke',
    'car', 'truck', 'bus', 'train', 'crowd', 'bear', 'dog',
    'cliff', 'motorcycle', 'police car', 'rain', 'drugs','police light', 'danger sign',
    'blood', 'crime scene', 'police', 'handcuffs', 'broken', 'glass', 'ambulance', 'crime', 'fight'
]


In [None]:
#Take an image and generate a text description
def generate_caption(image):
    inputs = processor(images=image, return_tensors="pt")
    out = model_caption.generate(**inputs)
    return processor.decode(out[0], skip_special_tokens=True)


In [None]:
# detect the danger key words and check if they match with crime scene words
#If else to assist with policy danger key word matching with crime scence to print proper output

def detect_danger(image):
    results = model_yolo(image)
    labels = []
    for r in results:
        labels += [model_yolo.names[int(c)] for c in r.boxes.cls]

    matched = [item for item in labels if item.lower() in danger_keywords]

    # Check for specific crime scene keywords first, excluding 'police'
    crime_scene_keywords = ['gun', 'knife', 'blood', 'fight', 'handcuffs','police']
    if any(word in matched for word in crime_scene_keywords):
        return f"🔴 Possible Crime Scene: {', '.join(set(matched))}"
    # Check if 'police' is present without other crime scene keywords
    elif 'police' in matched and not any(word in matched for word in crime_scene_keywords):
        return f"🟡 Caution: {', '.join(set(matched))}"
    # Check for other danger keywords
    elif any(word in matched for word in danger_keywords if word not in crime_scene_keywords and word != 'police'):
         return f"🟡 Caution: {', '.join(set(matched))}"
    else:
        return "🟢 Safe"

In [None]:
#Process the image for analysis - Generate a text caption, asses the safety status and display both outputs
def analyze_image(img):
    caption = generate_caption(img)
    danger = detect_danger(img)

    # Optional: flag certain keywords from the caption
    crime_words = ['arrest', 'weapon', 'blood', 'shooting']
    if any(word in caption.lower() for word in crime_words):
        danger = "🔴 Possible Crime Scene (based on caption)"

    return f"Caption: {caption}\n\nSafety Status: {danger}"

    return f"Caption: {caption}\n\nSafety Status: {danger}"


In [None]:
#Utlizing Gradio library to create a user friendly web interface
import gradio as gr

gr.Interface(
    fn=analyze_image,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="🧠 Crime Scene Caption & Detection App",
    description="Upload an image to receive a description and a danger warning."
).launch()
