In [6]:
# ================== 1. Setup ==================
!pip install gtts
!pip install pycocotools --quiet
import os, json, shutil, random
import torch
import torchvision
from torchvision import transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
from google.colab import drive
from gtts import gTTS
from IPython.display import Audio, display

drive.mount('/content/drive')

# ================== 2. Download COCO val2017 ==================
!wget -q http://images.cocodataset.org/zips/val2017.zip
!unzip -q val2017.zip
!wget -q http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip -q annotations_trainval2017.zip

# ================== 3. Select Target Classes ==================
target_classes = ['person', 'car', 'dog', 'cat', 'bicycle']
coco_annotation_file = 'annotations/instances_val2017.json'

with open(coco_annotation_file, 'r') as f:
    coco_data = json.load(f)

category_id_to_name = {cat['id']: cat['name'] for cat in coco_data['categories']}
category_name_to_id = {v: k for k, v in category_id_to_name.items()}
target_class_ids = [category_name_to_id[c] for c in target_classes]

# ================== 4. Collect Image -> Label ==================
image_id_to_label = {}
used_image_ids = set()

for ann in coco_data['annotations']:
    if ann['category_id'] in target_class_ids:
        image_id = ann['image_id']
        if image_id not in image_id_to_label:  # 1 label per image
            image_id_to_label[image_id] = ann['category_id']
            used_image_ids.add(image_id)

# ================== 5. Prepare Dataset ==================
save_dir = '/content/coco_classification'
images_dir = os.path.join(save_dir, 'images')
os.makedirs(images_dir, exist_ok=True)

image_id_to_filename = {img['id']: img['file_name'] for img in coco_data['images']}
data = []

for image_id, label_id in image_id_to_label.items():
    filename = image_id_to_filename[image_id]
    src = os.path.join('val2017', filename)
    dst = os.path.join(images_dir, filename)
    shutil.copyfile(src, dst)
    data.append((dst, target_class_ids.index(label_id)))  # class index: 0-4

# ================== 6. Split Dataset ==================
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

class COCODataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = COCODataset(train_data, transform=transform_train)
val_dataset = COCODataset(val_data, transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, num_workers=2)

# ================== 7. Model and Training ==================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_model(model_path, num_classes):
    model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
    model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, num_classes)
    model.load_state_dict(torch.load(model_path))
    model.eval()  # ضبط الموديل للتقييم
    return model

# ================== 8. Prediction and Voice ==================

# ترجمات عربية للأصناف
label_translations = {
    'person': 'شخص',
    'car': 'سيارة',
    'dog': 'كلب',
    'cat': 'قطة',
    'bicycle': 'دراجة'
}

def speak_arabic(text, output_path='/content/drive/MyDrive/blind_helper/output.mp3'):
    tts = gTTS(text, lang='ar')
    tts.save(output_path)
    return Audio(output_path)

def load_and_predict(image_path, model_path, target_classes):
    # تحميل الموديل
    model = load_model(model_path, len(target_classes))

    # تحويل الصورة
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    # تحميل الصورة
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0)

    # التوقع
    with torch.no_grad():
        output = model(image_tensor)
        _, predicted = torch.max(output, 1)
        label = target_classes[predicted.item()]
        arabic_label = label_translations.get(label, label)

    print(f"📷 Prediction: {label} → {arabic_label}")
    return speak_arabic(arabic_label)

# ================== 9. Test Image ==================
image_path = '/content/drive/MyDrive/blind_helper/car.jpg'  # ضع المسار الصحيح للصورة
model_path = '/content/drive/MyDrive/NEW_MODEL.pth'  # المسار الصحيح للموديل

# استدعاء الدالة لتوقع الصورة وتحويله لصوت
speak_arabic_result = load_and_predict(image_path, model_path, target_classes)
display(speak_arabic_result)  # هيعرض الصوت بعد التوقع


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
replace val2017/000000212226.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: r
new name: car
replace val2017/000000231527.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000578922.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000062808.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000119038.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000114871.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000463918.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000365745.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000320425.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000481404.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000314294.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace val2017/000000335328.jpg? [y]es, [n