Direct Regression Nutrition Prediction

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd 'drive/MyDrive/CV_2024/FinalProject/nutri_estimate/nutri_estimate/final_model_combined'
%pwd

/content/drive/MyDrive/CV_2024/FinalProject/nutri_estimate/nutri_estimate/final_model_combined


'/content/drive/MyDrive/CV_2024/FinalProject/nutri_estimate/nutri_estimate/final_model_combined'

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import efficientnet_v2_s
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from PIL import Image
import os
import joblib

# GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

Using device: cuda


In [4]:
transform = transforms.Compose([
    transforms.Resize((320, 320)),
    transforms.ToTensor()
])

In [5]:
scaler_directory = "./scalers"
scalers = {
    file.replace("_scaler.save", ""): joblib.load(os.path.join(scaler_directory, file))
    for file in os.listdir(scaler_directory)
    if file.endswith("_scaler.save")
}

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [6]:
class NutritionDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_path = os.path.join(self.root_dir, row['image_link'])
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        labels = {
            'protein': row['total_protein'],
            'fat': row['total_fat'],
            'carbs': row['total_carb'],
            'mass': row['total_mass']
        }
        return image, labels

In [7]:
df = pd.read_json('../preprocess/filtered_data.json', dtype={'total_mass': 'float64'})
X_test = df[df['split'] == 'test']
root_dir = '../preprocess'
test_dataset = NutritionDataset(X_test, root_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, drop_last=False)

In [8]:
class EfficientNetBase(nn.Module):
    def __init__(self):
        super(EfficientNetBase, self).__init__()
        self.base_model = efficientnet_v2_s(weights=None)
        self.base_model.classifier = nn.Identity()

    def forward(self, x):
        return self.base_model(x)

class MultiTaskModel(nn.Module):
    def __init__(self, base_model):
        super(MultiTaskModel, self).__init__()
        self.base_model = base_model
        in_features = 1280
        self.protein_branch = self._create_branch(in_features)
        self.fat_branch = self._create_branch(in_features)
        self.carbs_branch = self._create_branch(in_features)
        self.mass_branch = self._create_branch(in_features)

    def _create_branch(self, in_features):
        return nn.Sequential(
            nn.Linear(in_features, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        x = self.base_model(x)
        return {
            'protein': self.protein_branch(x),
            'fat': self.fat_branch(x),
            'carbs': self.carbs_branch(x),
            'mass': self.mass_branch(x)
        }

In [9]:
# Load the trained model
checkpoint_path = "./final_model/final_multi_task_model.pth"
base_model = EfficientNetBase()
model = MultiTaskModel(base_model)
state_dict = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(state_dict)
model = model.to(device)
model.eval()

MultiTaskModel(
  (base_model): EfficientNetBase(
    (base_model): EfficientNet(
      (features): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (2): SiLU(inplace=True)
        )
        (1): Sequential(
          (0): FusedMBConv(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
                (2): SiLU(inplace=True)
              )
            )
            (stochastic_depth): StochasticDepth(p=0.0, mode=row)
          )
          (1): FusedMBConv(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): Conv2d(24, 24, kernel_siz

In [10]:
import sys
sys.path.append('/content/drive/MyDrive/CV_2024/FinalProject/nutri_estimate/nutri_estimate/final_model_combined')

In [11]:
!pwd

/content/drive/MyDrive/CV_2024/FinalProject/nutri_estimate/nutri_estimate/final_model_combined


In [12]:
!dir

final_model  multi_regression_depth_combine.ipynb  scalers  utilities


In [16]:
%pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
!pip install openmim
!mim install mmengine
!mim install mmcv==2.1.0
!pip install addict yapf timm torchvision

Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch==2.1.0
  Downloading https://download.pytorch.org/whl/cu121/torch-2.1.0%2Bcu121-cp310-cp310-linux_x86_64.whl (2200.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 GB[0m [31m953.5 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.16.0
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.16.0%2Bcu121-cp310-cp310-linux_x86_64.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchaudio==2.1.0
  Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.1.0%2Bcu121-cp310-cp310-linux_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m90.9 MB/s[0m eta [36m0:00:00[0m
Collecting triton==2.1.0 (from torch==2.1.0)
  Downloading https://download.pytorch.org/whl/triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64

Collecting openmim
  Downloading openmim-0.3.9-py2.py3-none-any.whl.metadata (16 kB)
Collecting colorama (from openmim)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting model-index (from openmim)
  Downloading model_index-0.1.11-py3-none-any.whl.metadata (3.9 kB)
Collecting opendatalab (from openmim)
  Downloading opendatalab-0.0.10-py3-none-any.whl.metadata (6.4 kB)
Collecting ordered-set (from model-index->openmim)
  Downloading ordered_set-4.1.0-py3-none-any.whl.metadata (5.3 kB)
Collecting pycryptodome (from opendatalab->openmim)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting openxlab (from opendatalab->openmim)
  Downloading openxlab-0.1.2-py3-none-any.whl.metadata (3.8 kB)
Collecting filelock~=3.14.0 (from openxlab->opendatalab->openmim)
  Downloading filelock-3.14.0-py3-none-any.whl.metadata (2.8 kB)
Collecting oss2~=2.17.0 (from openxlab->opendatalab->openmim)
  Downloading oss

Looking in links: https://download.openmmlab.com/mmcv/dist/cu121/torch2.1.0/index.html
Collecting mmengine
  Downloading mmengine-0.10.5-py3-none-any.whl.metadata (20 kB)
Collecting addict (from mmengine)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting yapf (from mmengine)
  Downloading yapf-0.43.0-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Downloading mmengine-0.10.5-py3-none-any.whl (452 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m452.3/452.3 kB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Downloading yapf-0.43.0-py3-none-any.whl (256 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.2/256.2 kB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: addict, yapf, mmengine
Successfully installed addict-2.4.0 mmengine-0.10.5 yapf-0

In [13]:
import os
import torch
from torchvision import transforms
import numpy as np
from sklearn.metrics import mean_absolute_error
from utilities.load_model import load_depth_model, load_custom_model
import joblib

depth_model = load_depth_model()
multi_task_model_path = os.path.join("final_model", "final_multi_task_model.pth")
model = load_custom_model(multi_task_model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
depth_model = depth_model.to(device)

# Load scalers for inverse scaling later
scaler_directory = "scalers"
scalers = {}
for scaler_file in os.listdir(scaler_directory):
    scaler_name = scaler_file.replace("_scaler.save", "")
    scaler_path = os.path.join(scaler_directory, scaler_file)
    scalers[scaler_name] = joblib.load(scaler_path)

# Constants for Raspberry Pi camera, test set images come from Nutrition5k data that uses Raspberry Pi
FOCAL_LENGTH_MM = 2.75  # in mm
SENSOR_WIDTH_MM = 7.4   # in mm

# Eval loop
y_true, y_pred = {k: [] for k in ['protein', 'fat', 'carbs', 'mass']}, {k: [] for k in ['protein', 'fat', 'carbs', 'mass']}

with torch.no_grad():
    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.to(device)
        input_dict = {"input": images}
        output = depth_model.inference(input_dict)
        pred_depth = output[0]  # Depth prediction

        # Extract depth scaling factors for each image in the batch
        batch_scaling_factors = []
        for img_idx in range(images.size(0)):
            height, width = images[img_idx].shape[1:3]
            depth_map = pred_depth[img_idx].squeeze().cpu().numpy()

            # Focal length + scaling logic
            focal_length = FOCAL_LENGTH_MM
            sensor_width = SENSOR_WIDTH_MM
            focal_length = focal_length / sensor_width * width if focal_length else 1.0
            canonical_to_real_scale = focal_length / 1000.0 if focal_length else 1.0
            depth_map = depth_map * canonical_to_real_scale
            depth_map = np.clip(depth_map, 0, 300)

            center_x, center_y = depth_map.shape[1] // 2, depth_map.shape[0] // 2
            center_depth = depth_map[center_y, center_x] / 1000  # convert mm to meters
            scaling_factor = center_depth / 0.4

            batch_scaling_factors.append(scaling_factor)
            print(f"Image {batch_idx * images.size(0) + img_idx}: Center Depth (m) = {center_depth:.4f}, Scaling Factor = {scaling_factor:.4f}")
            print(f"  Depth Map Min = {depth_map.min():.4f}, Max = {depth_map.max():.4f}")

        outputs = model(images)

        for img_idx in range(images.size(0)):
            for key in outputs:
                raw_predictions = outputs[key][img_idx].cpu().numpy()
                scaled_predictions = raw_predictions * batch_scaling_factors[img_idx]

                # Model trained on scaled data, outputs normalized data so need to inverse the scaling
                scaler_key = f"total_{key[:-1]}" if key == "carbs" else f"total_{key}"
                if scaler_key in scalers:
                    scaled_predictions = scalers[scaler_key].inverse_transform(
                        scaled_predictions.reshape(-1, 1)
                    ).flatten()

                print(f"Predictions Shape for Key {key}, Image {img_idx}: {scaled_predictions.shape}")
                y_pred[key].append(scaled_predictions.reshape(1))
                print(f"Label Shape for Key {key}, Image {img_idx}: {labels[key][img_idx].shape}")
                y_true[key].append(np.array([labels[key][img_idx].cpu().numpy()]))

                print(f"Image {batch_idx * images.size(0) + img_idx}, Key: {key}")
                print(f"  Scaling Factor: {batch_scaling_factors[img_idx]:.4f}")
                print(f"  Scaled Predictions: {scaled_predictions}")
                print(f"  True Values: {labels[key][img_idx].cpu().numpy()}")

# Combine predictions and true values
for key in y_pred:
    y_pred[key] = np.concatenate(y_pred[key], axis=0)
    y_true[key] = np.concatenate(y_true[key], axis=0)
    print(f"Combined Values for Key: {key}")
    print(f"  Predictions - Min={y_pred[key].min():.2f}, Max={y_pred[key].max():.2f}")
    print(f"  True Values - Min={y_true[key].min():.2f}, Max={y_true[key].max():.2f}")

# Calculate and print MAE
for key in y_pred:
    mae = mean_absolute_error(y_true[key], y_pred[key])
    print(f"{key.capitalize()} MAE: {mae:.2f}")

Using cache found in /root/.cache/torch/hub/yvanyin_metric3d_main
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Image 9485, Key: protein
  Scaling Factor: 0.0002
  Scaled Predictions: [8.070388]
  True Values: 19.408112938
Predictions Shape for Key fat, Image 13: (1,)
Label Shape for Key fat, Image 13: torch.Size([])
Image 9485, Key: fat
  Scaling Factor: 0.0002
  Scaled Predictions: [4.507807]
  True Values: 3.534456612
Predictions Shape for Key carbs, Image 13: (1,)
Label Shape for Key carbs, Image 13: torch.Size([])
Image 9485, Key: carbs
  Scaling Factor: 0.0002
  Scaled Predictions: [11.449503]
  True Values: 2.233993911
Predictions Shape for Key mass, Image 13: (1,)
Label Shape for Key mass, Image 13: torch.Size([])
Image 9485, Key: mass
  Scaling Factor: 0.0002
  Scaled Predictions: [156.25307]
  True Values: 78.0
Predictions Shape for Key protein, Image 14: (1,)
Label Shape for Key protein, Image 14: torch.Size([])
Image 9486, Key: protein
  Scaling Factor: 0.0003
  Scaled Predictions: [5.15501]
  True Values: 19.408112938
