In [4]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from PIL import UnidentifiedImageError
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input, concatenate
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, concatenate, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score, accuracy_score, roc_curve, auc


In [5]:
# Directory containing images
image_folder = "C:/Users/singh/Downloads/yelp_dataset/photos"
photos_df = pd.read_json("C:/Users/singh/Downloads/yelp_dataset/photos.json", lines=True)

In [6]:

photos_df.head()


Unnamed: 0,photo_id,business_id,caption,label
0,zsvj7vloL4L5jhYyPIuVwg,Nk-SJhPlDBkAZvfsADtccA,Nice rock artwork everywhere and craploads of ...,inside
1,HCUdRJHHm_e0OCTlZetGLg,yVZtL5MmrpiivyCIrVkGgA,,outside
2,vkr8T0scuJmGVvN2HJelEA,_ab50qdWOk0DdB6XOrBitw,oyster shooter,drink
3,pve7D6NUrafHW3EAORubyw,SZU9c8V2GuREDN5KgyHFJw,Shrimp scampi,food
4,H52Er-uBg6rNrHcReWTD2w,Gzur0f0XMkrVxIwYJvOt2g,,food


In [7]:

# Count the number of occurrences of each label
label_counts = photos_df['label'].value_counts()


In [8]:

def load_images_with_metadata(photo_df, image_folder, max_images_per_label=1678):
    images, captions, labels = [], [], []
    label_counts = {}

    for index, row in photo_df.iterrows():
        photo_id = row['photo_id']
        label = row['label']
        caption = row['caption'] if 'caption' in row and isinstance(row['caption'], str) else ""

        if label not in label_counts:
            label_counts[label] = 0

        if label_counts[label] >= max_images_per_label:
            continue

        img_path = os.path.join(image_folder, f"{photo_id}.jpg")

        try:
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img) / 255.0  # Normalize the image
            images.append(img_array)
            captions.append(caption)
            labels.append(label)
            label_counts[label] += 1

        except FileNotFoundError:
            print(f"Image {img_path} not found.")
        except UnidentifiedImageError:
            print(f"Image {img_path} is not a valid image file.")
        except Exception as e:
            print(f"Error loading {img_path}: {e}")

    X = np.array(images)
    y = np.array(labels)
    captions = np.array(captions)  # Convert captions list to a NumPy array

    return X, y, captions


In [9]:
X, y, captions = load_images_with_metadata(photos_df, image_folder)

print(f"Loaded {X.shape[0]} images with shape {X.shape[1:]} and {len(y)} labels.")
print(f"Loaded {len(captions)} captions.")

Image C:/Users/singh/Downloads/yelp_dataset/photos\alXRAhs47jk-sR_fAaaluQ.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\ydm3g1wUWSxJnMPgHk2JhQ.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\J5jXHRbVqeJHAdv65GPnLQ.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\xUxbhVeTug4JZSQJC998Gg.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\RdtzaOdPDJvnKpm1UuznIQ.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\wIfV8E5Fd5dYVJIBsyLf-A.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\6qlRO3OMSCJzQE6SLjioyQ.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\8NPcztQiJ174oQ8EP0_7XQ.jpg not found.
Image C:/Users/singh/Downloads/yelp_dataset/photos\7J70KeQ5ap9mqmatn-A8lA.jpg not found.
Loaded 8390 images with shape (224, 224, 3) and 8390 labels.
Loaded 8390 captions.


In [10]:
unique_labels = np.unique(y)
label_map = {label: idx for idx, label in enumerate(unique_labels)}
y_mapped = np.vectorize(label_map.get)(y)
y_categorical = to_categorical(y_mapped, num_classes=len(unique_labels))

In [11]:

# Split data into training and test sets
X_train, X_test, y_train, y_test, captions_train, captions_test = train_test_split(
    X, y_categorical, captions, test_size=0.2, random_state=42
)


In [13]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Check the types and shapes of your data
print("X_train type:", type(X_train), "shape:", X_train.shape)
print("captions_train type:", type(captions_train), "shape:", captions_train.shape)
print("y_train type:", type(y_train), "shape:", y_train.shape)

# Filter out empty captions
non_empty_indices = [i for i, caption in enumerate(captions_train) if caption.strip() != '']
X_train_clean = X_train[non_empty_indices]
captions_train_clean = captions_train[non_empty_indices]
y_train_clean = y_train[non_empty_indices]

# Check the new shapes
print("Cleaned X_train shape:", X_train_clean.shape)
print("Cleaned captions_train shape:", captions_train_clean.shape)
print("Cleaned y_train shape:", y_train_clean.shape)

# Convert captions to string arrays if they aren't already
captions_train_clean = np.array(captions_train_clean, dtype=str)
captions_test = np.array(captions_test, dtype=str)  # Assuming captions_test is already defined

# Print to confirm
print("Updated captions_train type:", type(captions_train_clean), "shape:", captions_train_clean.shape)
print("Updated captions_test type:", type(captions_test), "shape:", captions_test.shape)

# Text vectorization (assuming you have a text_vectorizer already defined)
# Example: text_vectorizer = tf.keras.layers.TextVectorization(max_tokens=10000)
text_vectorizer = tf.keras.layers.TextVectorization(max_tokens=10000)  # Adjust as needed
text_vectorizer.adapt(captions_train_clean)  # Fit the vectorizer on training captions

# Vectorize and pad captions
padded_train = pad_sequences(text_vectorizer(captions_train_clean), padding='post', maxlen=50)  # Adjust maxlen as needed
padded_test = pad_sequences(text_vectorizer(captions_test), padding='post', maxlen=50)

X_train type: <class 'numpy.ndarray'> shape: (6712, 224, 224, 3)
captions_train type: <class 'numpy.ndarray'> shape: (6712,)
y_train type: <class 'numpy.ndarray'> shape: (6712, 5)
Cleaned X_train shape: (3252, 224, 224, 3)
Cleaned captions_train shape: (3252,)
Cleaned y_train shape: (3252, 5)
Updated captions_train type: <class 'numpy.ndarray'> shape: (3252,)
Updated captions_test type: <class 'numpy.ndarray'> shape: (1678,)


In [14]:

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Embedding, Input, concatenate, LSTM, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [15]:

# Build EfficientNet model
base_model = EfficientNetB0(input_shape=(224, 224, 3), weights='imagenet', include_top=False)

# Add custom classification layers for images
image_input = base_model.input
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global pooling layer
x = Dense(256, activation='relu')(x)  # Increased units for more complexity
x = Dropout(0.5)(x)  # Dropout for regularization
x = Dense(128, activation='relu')(x)  # Additional dense layer

# Text input processing
text_input = Input(shape=(50,), dtype='int32')  # Fixed-length padded captions
embedding_dim = 128
text_vectorized = Embedding(input_dim=10000, output_dim=embedding_dim)(text_input)  # Embed text tokens
text_vectorized = LSTM(128, return_sequences=True)(text_vectorized)  # LSTM layer for capturing sequences
text_vectorized = GlobalAveragePooling1D()(text_vectorized)  # Pooling layer for text features
text_vectorized = Dense(128, activation='relu')(text_vectorized)  # Dense layer for text features
text_vectorized = Dropout(0.5)(text_vectorized)  # Dropout for regularization

# Concatenate image and text features
combined = concatenate([x, text_vectorized])
combined = Dense(256, activation='relu')(combined)  # Further processing
combined = Dropout(0.5)(combined)  # Dropout for regularization
output = Dense(y_train.shape[1], activation='softmax')(combined)  # Output layer (adjust for your number of classes)

# Create the final model
final_model = Model(inputs=[image_input, text_input], outputs=output)

# Compile the model
final_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
final_model.summary()

In [17]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)

# Fit the model
history = final_model.fit(
    [X_train_clean, padded_train],
    y_train_clean,
    validation_data=([X_test, padded_test], y_test),
    epochs=5,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping, reduce_lr]
)



Epoch 1/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 2s/step - accuracy: 0.8242 - loss: 0.5749 - val_accuracy: 0.2056 - val_loss: 2.8805 - learning_rate: 0.0010
Epoch 2/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 2s/step - accuracy: 0.9008 - loss: 0.3295 - val_accuracy: 0.2056 - val_loss: 4.4129 - learning_rate: 0.0010
Epoch 3/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 2s/step - accuracy: 0.9172 - loss: 0.2568 - val_accuracy: 0.2080 - val_loss: 2.3070 - learning_rate: 0.0010
Epoch 4/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 2s/step - accuracy: 0.9388 - loss: 0.1878 - val_accuracy: 0.2074 - val_loss: 4.2897 - learning_rate: 0.0010
Epoch 5/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 2s/step - accuracy: 0.9473 - loss: 0.1711 - val_accuracy: 0.1621 - val_loss: 2.0382 - learning_rate: 0.0010


In [None]:
# Save the model
model_save_path = 'Efficient_Net.h5'  # Define the path to save the model
final_model.save(model_save_path)

print(f"Model saved at {model_save_path}")



Model saved at Efficient_Net.h5
