### Library Installations

In [None]:
#!pip install fiftyone clip sentence-transformers > /dev/null

### Library Imports 

In [None]:
from google.colab import drive
import fiftyone as fo
import numpy as np
import clip
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
from scipy.stats import spearmanr
import torch.nn as nn
import torch.nn.functional as F
import random
import torch.optim as optim

drive.mount("/content/drive")

In [None]:
dataset_path = "/content/drive/MyDrive/all_data"
loaded_dataset = fo.Dataset.from_dir(
    dataset_dir=dataset_path, dataset_type=fo.types.FiftyOneDataset, name="all_data"
)


In [None]:
len(loaded_dataset)

### Transform the Dataset 

In [None]:
# Complete ingredient translation and cleanup
def clean_ingredient_name(name):
    """Remove whitespace and normalize formatting"""
    cleaned = name.strip().lower()
    # Remove trailing parentheses artifacts
    if cleaned.endswith(")"):
        cleaned = cleaned.rstrip(")")
    return cleaned


# German to English translation dictionary
ingredient_mapping = {
    # Basic ingredients
    "apfelessig": "apple-vinegar",
    "apfelmus": "applesauce",
    "blumenkohl": "cauliflower",
    "brokkoli": "broccoli",
    "dill": "dill",
    "erbsen": "peas",
    "eisbergsalat": "iceberg-lettuce",
    "kartoffeln": "potatoes",
    "kartoffelwürfel": "diced-potatoes",
    "kartoffelpüree": "mashed-potatoes",
    "kohlrabi": "kohlrabi",
    "möhre": "carrot",
    "nudeln": "noodles",
    "paprika": "bell-pepper",
    "pilze": "mushrooms",
    "reis": "rice",
    "rosenkohl": "brussels-sprouts",
    "rotkohl": "red-cabbage",
    "spinat": "spinach",
    "zucchini": "zucchini",
    "zwiebel": "onion",
    "sultaninen": "raisins",
    "grüne bohnen": "green-beans",
    "grünkohl": "kale",
    "wirsing": "savoy-cabbage",
    "sauerkraut": "sauerkraut",
    "krautsalat": "coleslaw",
    "cherrytomate halbiert": "cherry-tomatoes-halved",
    "gekochtes ei": "boiled-egg",
    # Meat and fish
    "hähnchen": "chicken",
    "hähnchenschnitzel": "chicken-schnitzel",
    "hähnchenstreifen": "chicken-strips",
    "rinderbraten": "roast-beef",
    "rindergulasch": "beef-goulash",
    "rinderroulade": "beef-roulade",
    "schweinelachssteak": "pork-loin-steak",
    "schweinenackenbraten": "pork-neck-roast",
    "kasseler": "smoked-pork-chop",
    "frikadelle": "meatball",
    "hackbraten": "meatloaf",
    "seelachs": "pollock",
    "buntbarsch": "perch",
    "heringsstipp": "herring-salad",
    # Sausages and processed meat
    "schinken mettwurst": "ham-sausage",
    "speckwürfel": "bacon-cubes",
    # Sauces and condiments
    "bechamel": "bechamel-sauce",
    "bratenjus": "gravy",
    "braune sauce": "brown-sauce",
    "currysauce": "curry-sauce",
    "helle sauce": "light-sauce",
    "malzbier-senf-sauce": "malt-beer-mustard-sauce",
    "dunkle balsamico-sauce": "dark-balsamic-sauce",
    "tomaten-curry-sauce": "tomato-curry-sauce",
    "tomatensauce": "tomato-sauce",
    "vanillesauce": "vanilla-sauce",
    "zitronensauce": "lemon-sauce",
    "geräucherte paprikasauce": "smoked-paprika-sauce",
    "senf": "mustard",
    "dressing portion": "dressing-portion",
    # Dairy and cream
    "sahne": "cream",
    "pflanzencreme": "plant-based-cream",
    "gouda gerieben": "grated-gouda",
    # Grains and starches
    "haferbrei": "oatmeal",
    "linsen": "lentils",
    "eierspätzle": "egg-spaetzle",
    "schupfnudeln": "potato-noodles",
    "semmelknödel": "bread-dumplings",
    "dampfnudel": "steamed-dumpling",
    "reibekuchen": "potato-pancakes",
    # Spices
    "kümmel gemahlen": "ground-caraway",
    # Handle duplicates that already exist in English
    "goulash": "beef-goulash",
    "chickpeas": "chickpeas",
    "lentil-stew": "lentil-stew",
}


def create_cleaned_ingredient_mapping(raw_ingredients):
    """
    Create mapping from original ingredient names to cleaned/translated versions
    """
    ingredient_cleanup_map = {}

    for ing in raw_ingredients:
        cleaned = clean_ingredient_name(ing)

        # Apply translation if exists
        if cleaned in ingredient_mapping:
            final_name = ingredient_mapping[cleaned]
        else:
            # Keep English names as-is, just cleaned
            final_name = cleaned.replace(
                " ", "-"
            )  # Convert spaces to hyphens for consistency

        ingredient_cleanup_map[ing] = final_name

    return ingredient_cleanup_map


# Usage in your code:
raw_ingredients = {ing for s in loaded_dataset for ing in s["ingredient_name"]}
ingredient_cleanup_map = create_cleaned_ingredient_mapping(raw_ingredients)

# Get unique cleaned ingredients
cleaned_ingredients = set(ingredient_cleanup_map.values())
all_ing = sorted(cleaned_ingredients)
ing2idx = {ing: i for i, ing in enumerate(all_ing)}
idx2ing = {i: ing for ing, i in ing2idx.items()}

print(f"Reduced from {len(raw_ingredients)} to {len(all_ing)} unique ingredients")
print("\nSample mappings:")
for orig, cleaned in list(ingredient_cleanup_map.items())[:10]:
    print(f"'{orig}' -> '{cleaned}'")

In [None]:
# # Clone the original dataset
# cleaned_dataset = loaded_dataset.clone("food_waste_cleaned")

# # Apply transformations to the cloned dataset
# for sample in cleaned_dataset:
#     original_ingredients = sample["ingredient_name"]
#     cleaned_ingredients = [ingredient_cleanup_map[ing] for ing in original_ingredients]
#     sample["ingredient_name"] = cleaned_ingredients
#     sample.save()

# # Use the cleaned dataset for training
# loaded_dataset = cleaned_dataset

# # Export your dataset to Drive
# export_path = "/content/drive/MyDrive/cleaned_dataset_final"

# # Export as FiftyOne dataset (preserves all metadata, embeddings, etc.)
# cleaned_dataset.export(
#     export_dir=export_path,
#     dataset_type=fo.types.FiftyOneDataset
# )

# print(f"Dataset exported to: {export_path}")

### Load the clean data for training 

In [None]:
dataset_path = "/content/drive/MyDrive/cleaned_dataset_final"
loaded_dataset = fo.Dataset.from_dir(
    dataset_dir=dataset_path,
    dataset_type=fo.types.FiftyOneDataset,
    name="cleaned_dataset_final",
)

In [None]:
# Load CLIP model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clip_model, preprocess = clip.load("ViT-B/32", device=device)

# Get all ingredients (same as before)
all_ing = sorted({ing for s in loaded_dataset for ing in s["ingredient_name"]})
ing2idx = {ing: i for i, ing in enumerate(all_ing)}
idx2ing = {i: ing for ing, i in ing2idx.items()}

# Create CLIP text embeddings
print("Encoding ingredients with CLIP...")
ingredient_texts = [
    f"a photo of {ing}" for ing in all_ing
]  # Add context for better embeddings

# Encode in batches to avoid memory issues
batch_size = 64
embeddings = []

with torch.no_grad():
    for i in range(0, len(ingredient_texts), batch_size):
        batch_texts = ingredient_texts[i : i + batch_size]
        tokenized = clip.tokenize(batch_texts).to(device)
        batch_embeddings = clip_model.encode_text(tokenized)
        embeddings.append(batch_embeddings.float().cpu())

emb_matrix = torch.cat(embeddings, dim=0).numpy()
NUM_ING = len(ing2idx)
print(f"Created embeddings shape: {emb_matrix.shape}")
print(ing2idx)

In [None]:
class FoodWasteTorchDataset(Dataset):
    def __init__(self, view, img_tfms):
        self.view = view
        self.ids = view.values("id")
        self.img_tfms = img_tfms

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        s = self.view[self.ids[idx]]
        img = self.img_tfms(Image.open(s.filepath).convert("RGB"))

        # ingredient embeddings (variable-length)
        emb = torch.from_numpy(
            np.stack([emb_matrix[ing2idx[i]] for i in s["ingredient_name"]])
        )

        # regression target (g of waste per ingredient)
        tgt = torch.zeros(len(ing2idx), dtype=torch.float32)
        for ing, amt in zip(s["ingredient_name"], s["return_quantity"]):
            if amt is not None:
                tgt[ing2idx[ing]] = amt
        return img, emb, tgt


def collate(batch):
    imgs, embs, tgts = zip(*batch)
    return torch.stack(imgs), list(embs), torch.stack(tgts)


tfms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

train_view = loaded_dataset.match({"split": "train"})
test_view = loaded_dataset.match({"split": "test"})
train_dataset = FoodWasteTorchDataset(train_view, tfms)
test_dataset = FoodWasteTorchDataset(test_view, tfms)
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    collate_fn=collate,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    collate_fn=collate,
)

In [None]:
def jitter_targets(targets, epsilon=1.0):
    mask = targets > 0
    noise = torch.empty_like(targets).uniform_(-epsilon, epsilon)
    jittered = targets + noise * mask.float()
    return torch.clamp(jittered, min=0.0)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
freq = torch.zeros(NUM_ING, dtype=torch.long)

for sample in train_dataset:
    freq += sample[2] > 0

w = 1.0 / torch.log(freq.float() + 2)
w = w / w.mean()

weight_vec = w.to(device)
print("max weight:", weight_vec.max().item(), "min weight:", weight_vec.min().item())

In [None]:
class FoodWastePredictor(nn.Module):
    """
    Baseline: ResNet image encoder + pooled SBERT ingredient encoder
    """

    def __init__(self, *, embedding_dim: int = 512, num_ingredients: int):
        super().__init__()

        # ---------- image branch ----------
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        img_feats = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(img_feats, 128)  # → (B, 128)

        # ---------- ingredient branch ----------
        self.ing_mlp = nn.Sequential(  # (B, 512) → (B, 128)
            nn.Linear(embedding_dim, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
        )

        # ---------- fusion & head ----------
        self.fusion = nn.Sequential(
            nn.Linear(128 + 128, 128), nn.ReLU(inplace=True), nn.Dropout(0.2)
        )
        self.q_proj = nn.Linear(128, embedding_dim, bias=False)
        self.regressor = nn.Linear(128, num_ingredients)  # (B,N_ing)

    def forward(self, images, artikel_emb, return_total=True):
        """
        images: Tensor [B, 3, H, W]
        artikel_emb: Tensor [B, L, 512] or list of [L_i, 512] tensors
        """
        # --- image path ---
        img_feat = self.resnet(images)  # (B,128)

        # ----- text path -----
        if isinstance(artikel_emb, list):  # list → pad
            artikel_emb = nn.utils.rnn.pad_sequence(artikel_emb, batch_first=True)
        # ing_feat = self.ing_mlp(artikel_emb.mean(dim=1))    # (B,128)
        token_feat = self.ing_mlp(artikel_emb)  # (B,L,128)
        query = self.q_proj(img_feat).unsqueeze(2)
        attn_logits = (artikel_emb @ query).squeeze(2)
        attn_weights = torch.softmax(attn_logits, dim=1)
        ing_feat = (attn_weights.unsqueeze(2) * token_feat).sum(dim=1)  # (B,128)
        # ----- fuse & slot predictions -----
        combined = torch.cat((img_feat, ing_feat), dim=1)
        fused = self.fusion(combined)  # (B, 128)
        return F.relu(self.regressor(fused))


def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


set_seed(42)
embedding_dim = 512  # SBERT output size
model = FoodWastePredictor(embedding_dim=embedding_dim, num_ingredients=NUM_ING)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Define Loss Function (Mean Squared Error for regression)
# With reduction='none' returns the element-wise squared error
criterion = nn.L1Loss(reduction="none")
# Define Optimizer (Adam optimizer)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training Loop
num_epochs = 30

print("Starting training...")
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for images, ingredients, targets in train_loader:
        images = images.to(device)
        ingredients = [seq.to(device) for seq in ingredients]
        targets = targets.to(device)
        targets = jitter_targets(targets, epsilon=1.0)
        optimizer.zero_grad()
        outputs = model(images, ingredients)
        tot_pred = outputs.sum(1)
        tot_true = targets.sum(1)
        diff_sq = criterion(outputs, targets)
        loss = (diff_sq * weight_vec).mean()
        total_loss = loss + 0.001 * F.l1_loss(tot_pred, tot_true)
        total_loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}")

print("Training finished.")

In [None]:
def get_predictions(model, loader, device):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, ingredients, targets in loader:
            imgs = imgs.to(device)
            ingredients = [ing.to(device) for ing in ingredients]
            targets = targets.to(device)

            preds = model(imgs, ingredients)

            y_true.append(targets.cpu().numpy())
            y_pred.append(preds.cpu().numpy())

    y_true = np.concatenate(y_true, axis=0)  # (N, num_ing)
    y_pred = np.concatenate(y_pred, axis=0)
    true_total = y_true.sum(axis=1)
    pred_total = y_pred.sum(axis=1)
    return y_true, y_pred, true_total, pred_total


def evaluate(y_true, y_pred, true_total, pred_total):
    mae_total = np.abs(pred_total - true_total).mean()
    rho, _ = spearmanr(true_total, pred_total)

    print(f"\nTotal-waste MAE: {mae_total:.3f}   Spearman ρ: {rho:.3f}")
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {"MSE": mse, "RMSE": rmse, "MAE": mae, "R²": r2}

In [None]:
y_true, y_pred, true_total, pred_total = get_predictions(model, test_loader, device)

In [None]:
def get_predictions(model, loader, device):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, ingredients, targets in loader:
            imgs = imgs.to(device)
            ingredients = [ing.to(device) for ing in ingredients]
            targets = targets.to(device)

            preds = model(imgs, ingredients)

            y_true.append(targets.cpu().numpy())
            y_pred.append(preds.cpu().numpy())

    y_true = np.concatenate(y_true, axis=0)  # (N, num_ing)
    y_pred = np.concatenate(y_pred, axis=0)
    true_total = y_true.sum(axis=1)
    pred_total = y_pred.sum(axis=1)
    return y_true, y_pred, true_total, pred_total


def evaluate(y_true, y_pred, true_total, pred_total):
    mae_total = np.abs(pred_total - true_total).mean()
    rho, _ = spearmanr(true_total, pred_total)

    print(f"\nTotal-waste MAE: {mae_total:.3f}   Spearman ρ: {rho:.3f}")
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {"MSE": mse, "RMSE": rmse, "MAE": mae, "R²": r2}

In [None]:
y_true, y_pred, true_total, pred_total = get_predictions(model, test_loader, device)
metrics = evaluate(y_true, y_pred, true_total, pred_total)
print("\nTest metrics:")
for k, v in metrics.items():
    print(f"{k:>4}: {v:.4f}")

In [None]:
import matplotlib.pyplot as plt

plt.scatter(true_total, pred_total, alpha=0.4)
plt.xlabel("True total waste")
plt.ylabel("Predicted total")
plt.xlim(0)
plt.ylim(0)
plt.show()

In [None]:
bias = (y_pred - y_true).mean(axis=0)  # shape (num_ing,)
worst = np.argsort(np.abs(bias))[-10:]
for i in worst[::-1]:
    print(f"{idx2ing[i]:<30} bias={bias[i]:+.2f}")

In [None]:
bias = (y_pred - y_true).mean(axis=0)  # shape (num_ing,)
worst = np.argsort(np.abs(bias))[-10:]
for i in worst[::-1]:
    print(f"{idx2ing[i]:<30} bias={bias[i]:+.2f}")

In [None]:
def upload_predictions_sample_by_sample(model, test_view, device, dataset, transforms, emb_matrix, ing2idx, idx2ing):
    model.eval()
    
    with torch.no_grad():
        for sample_id in test_view.values("id"):
            # Get the sample from test_view
            fo_sample = test_view[sample_id]
            
            # Apply transforms (use your existing transforms)
            img = transforms(Image.open(fo_sample.filepath).convert("RGB")).unsqueeze(0)
            
            # Get ingredients for this sample
            cleaned_ingredients = [ing for ing in fo_sample.ingredient_name]
            ingredient_emb = torch.from_numpy(
                np.stack([emb_matrix[ing2idx[i]] for i in cleaned_ingredients])
            ).float().to(device)  # Make sure it's float32
            
            img = img.to(device)
            
            # Get prediction
            pred = model(img, [ingredient_emb])  # Pass as list
            pred_np = pred.squeeze(0).cpu().numpy()  # Remove batch dimension
            
            # Upload to FiftyOne dataset (this is the key part!)
            sample = dataset[sample_id]  # Get from FiftyOne dataset
            non_zero_indices = np.where(pred_np > 0)[0]
            
            if len(non_zero_indices) > 0:
                predicted_ingredients = [idx2ing[idx] for idx in non_zero_indices]
                predicted_values = pred_np[non_zero_indices].tolist()
                
                sample['predicted_ingredients'] = predicted_ingredients
                sample['predicted_amounts'] = predicted_values
                sample['total_predicted_waste'] = float(pred_np.sum())
            else:
                sample['predicted_ingredients'] = []
                sample['predicted_amounts'] = []
                sample['total_predicted_waste'] = 0.0
                
            sample.save()  # This saves to FiftyOne!
            
            print(f"Processed sample {sample_id}")

# Usage:
upload_predictions_sample_by_sample(
    model, train_view, device, loaded_dataset, 
    tfms, emb_matrix, ing2idx, idx2ing
)