<a href="https://colab.research.google.com/github/arnurabdrakhman-svg/hot-dog-identifier/blob/main/hot_dog_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio torch torchvision pillow numpy
!pip install gradio torch torchvision pillow numpy gTTS

In [None]:
import torch
import gradio as gr
from torchvision import models, transforms
from PIL import Image
from gtts import gTTS
import os

# 1. Load Model
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model.eval()

# 2. Preprocessing
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def predict_with_audio(img):
    if img is None: return "Upload an image!", None

    img_pil = Image.fromarray(img.astype('uint8'), 'RGB')
    input_tensor = preprocess(img_pil).unsqueeze(0)

    with torch.no_grad():
        output = model(input_tensor)

    _, predicted_idx = torch.max(output, 1)

    # Logic
    is_hotdog = (predicted_idx.item() == 934)
    result_text = "YES! It is a Hot Dog!" if is_hotdog else "NOT a Hot Dog."

    # --- BONUS: Audio Output ---
    tts = gTTS(result_text)
    audio_path = "result.mp3"
    tts.save(audio_path)

    return result_text, audio_path

# 3. UI with Audio Component
with gr.Blocks(theme=gr.themes.Glass()) as app:
    gr.Markdown("# ðŸŒ­ HotDog AI ")
    gr.Markdown("Now featuring Audio Feedback and ResNet50 Intelligence.")

    with gr.Row():
        with gr.Column():
            img_input = gr.Image(label="Scan Food")
            btn = gr.Button("Identify", variant="primary")
        with gr.Column():
            result_label = gr.Textbox(label="Result")
            audio_output = gr.Audio(label="Audio Announcement", autoplay=True)

    btn.click(fn=predict_with_audio, inputs=img_input, outputs=[result_label, audio_output])

app.launch(share=True)