In [85]:
import sys
import os
sys.path.append(os.path.abspath('/Users/ericxia/school/Math-148-Project/food-classification'))

import json
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from data_utils.utils import keep_existing_photos, visualize_images, downsample_group
from data_utils.dataset import PhotoLabelDataset, MultimodalDataset, stratified_split_dataset
from model.resnet18 import Resnet18FineTuneModel
from model.fusion_model import FusionModel
from model.utils import get_device, train_model_single_epoch, validate_model_single_epoch, save_checkpoint, evaluate_on_test

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchvision import datasets, transforms, models

import torchvision.transforms as transforms
from torchvision.transforms import RandAugment

from torch.utils.data import DataLoader, Dataset, random_split

from transformers import AutoImageProcessor, AutoModelForImageClassification
from sentence_transformers import SentenceTransformer

from sklearn.metrics import classification_report

In [69]:
base_dir = "../"

business_df = pd.read_json(f'{base_dir}data/yelp_dataset/yelp_academic_dataset_business.json', lines=True)
photos_df = pd.read_json(f'{base_dir}data/yelp_photos/photos.json', lines=True)
top_reviews_per_restaurant = pd.read_csv(f'{base_dir}data/yelp_dataset/top_reviews_per_restaurant_with_summary.csv')

photo_dir = f"{base_dir}data/yelp_photos/resized_photos"
photos_df = keep_existing_photos(photos_df, photo_dir)

photos_df = photos_df[photos_df['label'] == 'food'].copy()

categories_df = business_df[['business_id', 'attributes', 'categories']].copy()
photos_df = photos_df.merge(categories_df, on="business_id", how="left")

photos_df = photos_df[photos_df['attributes'].notna()]
photos_df['price_range'] = photos_df['attributes'].apply(lambda x: x.get('RestaurantsPriceRange2'))

photos_df['price_range'] = photos_df['price_range'].astype(int)
photos_df['price_range'] = photos_df['price_range'].replace({2: 1, 3: 2, 4: 2}) 

food_with_reviews_df = photos_df.merge(top_reviews_per_restaurant, on="business_id", how="left")

Checking images: 100%|██████████| 200100/200100 [00:07<00:00, 26596.09it/s]


In [4]:
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    image = Image.open(image_path).convert('RGB')
    input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    
    return image, input_tensor

In [18]:
device = get_device()
processor = AutoImageProcessor.from_pretrained("google/efficientnet-b1")
model = AutoModelForImageClassification.from_pretrained("gabrielganan/efficientnet_b1-food101")
model.eval().to(device)

EfficientNetForImageClassification(
  (efficientnet): EfficientNetModel(
    (embeddings): EfficientNetEmbeddings(
      (padding): ZeroPad2d((0, 1, 0, 1))
      (convolution): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=valid, bias=False)
      (batchnorm): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)
      (activation): SiLU()
    )
    (encoder): EfficientNetEncoder(
      (blocks): ModuleList(
        (0): EfficientNetBlock(
          (depthwise_conv): EfficientNetDepthwiseLayer(
            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))
            (depthwise_conv): EfficientNetDepthwiseConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
            (depthwise_norm): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)
            (depthwise_act): SiLU()
          )
          (squeeze_excite): EfficientNetSqueezeExciteLayer(
            (squeeze): AdaptiveAvgPool2d(output

In [86]:
model = models.efficientnet_b0(pretrained=True)

num_ftrs = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(num_ftrs, 101)  # 101 food classes
)

price_ckpt_20 = torch.load("checkpoints/food101_efficientnetb0/ckpt_20")
model.load_state_dict(price_ckpt_20['model_state_dict'])
model.to(device)
model.eval()



EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [87]:
device = get_device()
model = model.to(device)

In [88]:
label = 'price_range'

val_transform = transforms.Compose([
     transforms.ToTensor(),  # Convert image to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])
dataset = PhotoLabelDataset(food_with_reviews_df, photo_dir, label, transform=val_transform)

batch_size = 64

loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
)

In [97]:
all_predictions = []
all_confidences = []

with torch.no_grad():
    for batch in loader:
        images, _ = batch
        images = images.to(device)
        outputs = model(images)
        probs = F.softmax(outputs, dim=1) 
        predictions = torch.argmax(probs, dim=1)
        confidences = torch.max(probs, dim=1).values

        all_predictions.append(predictions.cpu())
        all_confidences.append(confidences.cpu())

final_predictions = torch.cat(all_predictions, dim=0)
final_confidences = torch.cat(all_confidences, dim=0)

In [98]:
photos_df['food101_predictions'] = final_predictions
photos_df['food101_confidences'] = final_confidences

In [99]:
class_mapping = {}
with open(f'{base_dir}/data/food101/food-101/meta/classes.txt', 'r') as file:
    classes = file.read().splitlines()  # Read all lines and strip newline characters
    for index, class_name in enumerate(classes):
        class_mapping[index] = class_name

In [100]:
photos_df['food101_predictions_decoded'] = photos_df['food101_predictions'].apply(lambda x: class_mapping[x])

In [123]:
photos_df.iloc[2:24]

Unnamed: 0,photo_id,business_id,caption,label,attributes,categories,price_range,food101_predictions,food101_confidences,food101_predictions_decoded
2,Le9rMdT8YFlvqr431LctIQ,BELS5YkYjDFkpbM2FuDdaA,Fried Chick'un Sandwich,food,"{'NoiseLevel': 'u'average'', 'BusinessAcceptsC...","Food, Juice Bars & Smoothies, Vegan, Breakfast...",1,53,0.332358,hamburger
3,9kVdBkGWcKfCFzSwUXjQyw,iryqT-RFwl7erA4MDAuLyg,The Korrito! Korean BBQ sushi made burrito size!,food,"{'BikeParking': 'True', 'BusinessParking': '{'...","Caribbean, Food Stands, Street Vendors, Restau...",1,7,0.999138,bibimbap
4,dxIzVOEowFFsg2RmgFJZdQ,H3WJK0fpoPrIotWqOG_yIA,,food,"{'NoiseLevel': 'u'average'', 'WiFi': ''free'',...","Beer, Wine & Spirits, Burgers, American (New),...",1,11,0.385533,caesar_salad
5,50CbWRa6-tAKRPrq5mbABg,3lW1gubDMpnEIlo45eUKhA,,food,"{'BikeParking': 'True', 'RestaurantsPriceRange...","Mexican, Fast Food, Restaurants",1,7,0.505196,bibimbap
6,e0dD0np3hY3F8LoUtrNoPw,jTFl9Cr7RqZqwjH-CI1ZPw,These cream filled donuts can't be beat! Crea...,food,"{'BikeParking': 'False', 'OutdoorSeating': 'Fa...","Food, Grocery, Restaurants, Bakeries, Gluten-Free",1,29,0.285167,cup_cakes
7,xiyqMEgTl4B4ux047E_zqw,Ghn6flCdNlOchzLA29tkjQ,Piedmont Salad--serious yum factor,food,"{'RestaurantsAttire': ''casual'', 'Restaurants...","American (New), Bars, Nightlife, Italian, Rest...",1,11,0.893825,caesar_salad
8,K33aWlzEqKjuZFjdEuBOAA,LoyKBRN2Sp3J8AuVqD8Kug,,food,"{'RestaurantsGoodForGroups': 'False', 'Restaur...","Restaurants, Mexican, Tacos, Fast Food, Breakf...",1,16,0.937742,cheesecake
9,zNzVcwnSJ4kvjFnANIsIRg,tw_PmFjjCyg0t8Ek_kQz0Q,,food,"{'BikeParking': 'True', 'NoiseLevel': 'u'avera...","Bagels, Pizza, Bakeries, Food, Restaurants, Co...",1,27,0.980893,creme_brulee
10,0hhqi5rESZrXw3rJTrJ1gw,-1PG6k_iezwJmRZLB7f6og,Bulgogi steak tacos. Delicious!!!,food,"{'RestaurantsAttire': ''casual'', 'Ambience': ...","Food, Cocktail Bars, American (New), Restauran...",1,96,0.987984,tacos
11,XnStHMYUJEnb2t-6gCB6Hg,I5jxUHS1Dp2rg40eDzyiCA,Lamb Chops,food,"{'WheelchairAccessible': 'True', 'OutdoorSeati...","Indian, Restaurants",1,65,0.834983,mussels


In [74]:
photos_df.iloc[0].categories

'Live/Raw Food, Restaurants, Seafood, Beer Bar, Beer, Wine & Spirits, Bars, Food, Nightlife'

In [129]:
photos_df.to_csv('photos_df_with_food101_predictions.csv', index=False)

In [128]:
photo_id = "JzLZ17Epk1N3dDNqn1_R4A"
image_path = f"{base_dir}data/yelp_photos/resized_photos/{photo_id}.jpg"
image = Image.open(image_path)
image.show()

In [10]:
input_tensors = []
for index, row in food_with_reviews_df.iterrows():
    image_path = f"{base_dir}data/yelp_photos/resized_photos/{row['photo_id']}.jpg"
    _, input_tensor = preprocess_image(image_path)
    input_tensors.append(input_tensor)
input_batch = torch.stack(input_tensors)

KeyboardInterrupt: 