In [None]:
### Cell : Import libraries
import torch
import torchvision
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import json
import os

### Cell : Load and prepare YOLO model (using pre-trained)
print("Loading YOLOv8 model...")
yolo_model = YOLO('yolov8n.pt')  # Using nano version for faster inference

# Test YOLO on sample image
print("YOLO model loaded successfully!")

### Cell : Load and prepare Text Generation model
print("Loading GPT-2 model for text generation...")
text_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
text_tokenizer.pad_token = text_tokenizer.eos_token
text_model = GPT2LMHeadModel.from_pretrained('gpt2')

print("Text generation model loaded successfully!")

### Cell 5: Save models in appropriate formats
print("Saving models...")

# Create directories
os.makedirs('../saved_models', exist_ok=True)
os.makedirs('../saved_models/text_generator', exist_ok=True)

# Save YOLO model (PyTorch format)
torch.save(yolo_model.model.state_dict(), '../saved_models/yolo_model.pt')

# Save text generation model and tokenizer
text_model.save_pretrained('../saved_models/text_generator/')
text_tokenizer.save_pretrained('../saved_models/text_generator/')

print("Models saved successfully!")

### Cell : Test the models
def test_yolo_detection(image_path):
    """Test YOLO object detection"""
    results = yolo_model(image_path)
    detected_objects = []
    
    for result in results:
        boxes = result.boxes
        for box in boxes:
            class_id = int(box.cls[0])
            class_name = yolo_model.names[class_id]
            confidence = float(box.conf[0])
            detected_objects.append({
                'class': class_name,
                'confidence': confidence
            })
    
    return detected_objects

def generate_description(detected_objects):
    """Generate description based on detected objects"""
    objects_list = ", ".join([obj['class'] for obj in detected_objects[:5]])
    prompt = f"In this image, I can see {objects_list}. This scene appears to be"
    
    inputs = text_tokenizer.encode(prompt, return_tensors='pt', max_length=512, truncation=True)
    
    with torch.no_grad():
        outputs = text_model.generate(
            inputs,
            max_length=150,
            num_return_sequences=1,
            temperature=0.7,
            pad_token_id=text_tokenizer.eos_token_id,
            do_sample=True
        )
    
    description = text_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return description

# Test with a sample image (you can upload any image)
print("Testing pipeline...")
# For testing, you can use any image from COCO dataset or your own

Loading YOLOv8 model...
YOLO model loaded successfully!
Loading GPT-2 model for text generation...




Text generation model loaded successfully!
Saving models...
Models saved successfully!
Testing pipeline...
