<a href="https://colab.research.google.com/github/YunusTamboli/Image-nature-detection/blob/main/imagenaturedetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!pip install easyocr pillow transformers torchvision torchaudio --quiet

import easyocr
import PIL.Image as Image
import cv2
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from google.colab import files
import io
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode

# Load EasyOCR reader
reader = easyocr.Reader(['en'])

# Load BLIP model for image understanding
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Function to capture image from camera (fixed double capture issue)
def capture_image():
    js_code = Javascript('''
        async function takePhoto() {
          const div = document.createElement('div');
          const capture = document.createElement('button');
          capture.textContent = 'Capture';
          div.appendChild(capture);
          document.body.appendChild(div);

          const video = document.createElement('video');
          video.style.display = 'block';
          document.body.appendChild(video);

          const stream = await navigator.mediaDevices.getUserMedia({ video: true });
          video.srcObject = stream;
          await video.play();

          google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

          await new Promise((resolve) => capture.onclick = resolve);

          const canvas = document.createElement('canvas');
          canvas.width = video.videoWidth;
          canvas.height = video.videoHeight;
          canvas.getContext('2d').drawImage(video, 0, 0);
          stream.getVideoTracks()[0].stop();

          const dataUrl = canvas.toDataURL('image/jpeg');
          return dataUrl;
        }
    ''')
    display(js_code)
    data = eval_js('takePhoto()')  # Call only once here
    binary = b64decode(data.split(',')[1])
    filename = 'captured_image.jpg'
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

# Function to upload image from file
def upload_image():
    uploaded = files.upload()
    for fn in uploaded.keys():
        return fn

# Function to extract text or generate AI-based name
def process_image(image_path):
    img = Image.open(image_path)

    # Extract text
    text = reader.readtext(image_path, detail=0)
    text_content = " ".join(text).strip()

    if text_content:
        filename = "_".join(text_content.split())[:50] + ".jpg"
        print(f"📝 Extracted Text: {text_content}")
        print(f"📂 Suggested File Name: {filename}")
    else:
        # AI caption generation
        inputs = processor(img, return_tensors="pt")
        out = model.generate(**inputs)
        caption = processor.decode(out[0], skip_special_tokens=True)
        filename = "_".join(caption.split())[:50] + ".jpg"
        print(f"🤖 AI Guess: {caption}")
        print(f"📂 Suggested File Name: {filename}")

# Loop for repeated processing
while True:
    print("\nChoose option:\n1 - Capture from camera\n2 - Upload from file\n0 - Exit")
    choice = input("Enter choice: ")

    if choice == "1":
        img_path = capture_image()
        process_image(img_path)
    elif choice == "2":
        img_path = upload_image()
        process_image(img_path)
    elif choice == "0":
        print("Exiting...")
        break
    else:
        print("Invalid choice, try again.")





Choose option:
1 - Capture from camera
2 - Upload from file
0 - Exit
Enter choice: 1


<IPython.core.display.Javascript object>

🤖 AI Guess: a man and woman are standing in front of a wall
📂 Suggested File Name: a_man_and_woman_are_standing_in_front_of_a_wall.jpg

Choose option:
1 - Capture from camera
2 - Upload from file
0 - Exit
