In [4]:
%%writefile app.py
import streamlit as st
from PIL import Image
import numpy as np
import tensorflow as tf
import pickle
from tensorflow.keras.applications.efficientnet import EfficientNetB3, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define paths
MODEL_PATH = "/content/image_caption_model.keras"
TOKENIZER_PATH = "/content/tokenizer.pkl"

# Load model and tokenizer
@st.cache_resource
def load_model_and_tokenizer():
    model = tf.keras.models.load_model(MODEL_PATH, compile=False)
    with open(TOKENIZER_PATH, "rb") as f:
        tokenizer = pickle.load(f)
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()
max_length = 34  # Set based on your training

@st.cache_resource
def get_feature_extractor():
    base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(300, 300, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)  # Output shape will be (None, 1536)
    model = Model(inputs=base_model.input, outputs=x)
    return model


feature_extractor = get_feature_extractor()

def extract_features(image):
    image = image.resize((300, 300))  # EfficientNetB3 expects 300x300 input
    image = np.array(image)
    if image.shape[-1] == 4:
        image = image[..., :3]
    image = preprocess_input(image)
    image = np.expand_dims(image, axis=0)
    features = feature_extractor.predict(image)
    features = np.reshape(features, (features.shape[0], -1))  # Flatten to (1, 1536)
    return features

# Generate caption
def generate_caption(model, tokenizer, photo_features, max_length):
    in_text = 'startseq'
    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([photo_features, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = tokenizer.index_word.get(yhat)
        if word is None or word == 'endseq':
            break
        in_text += ' ' + word
    final_caption = in_text.split()[1:]  # remove 'startseq'
    return ' '.join(final_caption)

# Streamlit UI
st.title("🖼️ Image Caption Generator")
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])

if uploaded_file:
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption="Uploaded Image", use_column_width=True)

    with st.spinner("Generating caption..."):
        photo_features = extract_features(image)
        caption = generate_caption(model, tokenizer, photo_features, max_length)

    st.success("Caption Generated!")
    st.write("**Caption:**", caption)



Writing app.py


In [None]:
!streamlit run /content/app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com


34.106.9.153
[1G[0K⠙[1G[0Kyour url is: https://shiny-news-begin.loca.lt
