# This code is used to create reference list what food contain what nutrients

# Importing Required Libraries

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import json

# Define Paths for Datasets and Metadata

In [2]:
base_path = 'nutritionverse-data'
images_path = os.path.join(base_path, 'nutritionverse-manual/nutritionverse-manual/images')
metadata_path = os.path.join(base_path, 'nutritionverse_dish_metadata3.csv')
coco_json_path = os.path.join(base_path, 'nutritionverse-manual/nutritionverse-manual/images/_annotations.coco.json')
splits_path = os.path.join(base_path, 'nutritionverse-manual/nutritionverse-manual/updated-manual-dataset-splits.csv')

# Load COCO Image Metadata

In [3]:
with open(coco_json_path, 'r') as f:
    coco_data = json.load(f)

print(f"COCO metadata loaded with {len(coco_data['images'])} images and {len(coco_data['annotations'])} annotations")

COCO metadata loaded with 905 images and 4392 annotations


# Inspect COCO Images Structure

In [4]:
# Optionally extract image information from COCO format
coco_images = pd.DataFrame(coco_data['images'])
print("COCO images structure:")
print(coco_images.columns.tolist())

COCO images structure:
['id', 'license', 'file_name', 'height', 'width', 'date_captured']


# Create Mapping from Image ID to File Name

In [5]:
image_id_to_filename = {img['id']: img['file_name'] for img in coco_data['images']}
print(f"Created mapping for {len(image_id_to_filename)} image IDs to filenames")

Created mapping for 905 image IDs to filenames


# Load Dataset Splits

In [6]:
# Load dataset splits if available
if os.path.exists(splits_path):
    splits_df = pd.read_csv(splits_path)
    print(f"Dataset splits loaded with {len(splits_df)} entries")
    print(f"Split distribution: {splits_df['category'].value_counts().to_dict()}")
else:
    print("Dataset splits file not found. Will use random splitting.")
    splits_df = None

Dataset splits loaded with 889 entries
Split distribution: {'Train': 624, 'Val': 265}


# Load Dish Metadata

In [7]:
# Load metadata
metadata_df = pd.read_csv(metadata_path)

# Display first few rows of metadata
print("Dataset Metadata Overview:")
print(f"Total entries: {len(metadata_df)}")
metadata_df.head()

Dataset Metadata Overview:
Total entries: 251


Unnamed: 0,dish_id,total_food_weight,total_calories,total_fats,total_carbohydrates,total_protein,total_calcium,total_iron,total_magnesium,total_potassium,...,fat(g)_7,carbohydrates(g)_7,protein(g)_7,calcium(mg)_7,iron(mg)_7,magnesium(mg)_7,potassium(mg)_7,sodium(mg)_7,vitamin_d(µg)_7,vitamin_b12(µg)_7
0,1,165.0,95.73,0.3336,22.7958,0.5049,0.01233,0.000199,0.00888,0.19104,...,,,,,,,,,,
1,2,127.0,186.99,1.8662,21.1603,19.8902,0.11461,0.001804,0.05289,0.2405,...,,,,,,,,,,
2,3,156.0,92.04,0.312,21.9336,0.4212,0.00936,0.000172,0.0078,0.16224,...,,,,,,,,,,
3,4,84.0,74.76,0.7224,0.0,15.96,0.08064,0.000244,0.03612,0.1932,...,,,,,,,,,,
4,5,307.0,399.1,16.3631,42.4888,20.0778,0.34077,0.003899,0.06447,0.55874,...,,,,,,,,,,


# Extract Nutritional Information Per Food Type

In [8]:
food_nutrient = {}
for index, item in metadata_df.iterrows():
    for i in range(1, 8):
        food_type_col = f'food_item_type_{i}'

        if food_type_col not in item or pd.isna(item[food_type_col]):
            continue

        food_type = item[food_type_col]

        if food_type not in food_nutrient:
            food_nutrient[food_type] = {
                'calories': 0,
                'fat': 0,
                'carbohydrates': 0,
                'protein': 0,
                'calcium': 0,
                'iron': 0,
                'magnesium': 0,
                'potassium': 0,
                'sodium': 0,
                'vitamin_d': 0,
                'vitamin_b12': 0
            }

        if pd.notna(item[f'calories(kCal)_{i}']):
            food_nutrient[food_type]['calories'] = item[f'calories(kCal)_{i}']
        if pd.notna(item[f'fat(g)_{i}']):
            food_nutrient[food_type]['fat'] = item[f'fat(g)_{i}']
        if pd.notna(item[f'carbohydrates(g)_{i}']):
            food_nutrient[food_type]['carbohydrates'] = item[f'carbohydrates(g)_{i}']
        if pd.notna(item[f'protein(g)_{i}']):
            food_nutrient[food_type]['protein'] = item[f'protein(g)_{i}']
        if pd.notna(item[f'calcium(mg)_{i}']):
            food_nutrient[food_type]['calcium'] = item[f'calcium(mg)_{i}']
        if pd.notna(item[f'iron(mg)_{i}']):
            food_nutrient[food_type]['iron'] = item[f'iron(mg)_{i}']
        if pd.notna(item[f'magnesium(mg)_{i}']):
            food_nutrient[food_type]['magnesium'] = item[f'magnesium(mg)_{i}']
        if pd.notna(item[f'potassium(mg)_{i}']):
            food_nutrient[food_type]['potassium'] = item[f'potassium(mg)_{i}']
        if pd.notna(item[f'sodium(mg)_{i}']):
            food_nutrient[food_type]['sodium'] = item[f'sodium(mg)_{i}']
        if pd.notna(item[f'vitamin_d(µg)_{i}']):
            food_nutrient[food_type]['vitamin_d'] = item[f'vitamin_d(µg)_{i}']
        if pd.notna(item[f'vitamin_b12(µg)_{i}']):
            food_nutrient[food_type]['vitamin_b12'] = item[f'vitamin_b12(µg)_{i}']


# Create and Save Final Nutritional DataFrame

In [9]:
# Convert the food_nutrient dictionary to a DataFrame
food_nutrient_df = pd.DataFrame.from_dict(food_nutrient, orient='index').reset_index()

# Rename the 'index' column to 'food_type'
food_nutrient_df.rename(columns={'index': 'food_type'}, inplace=True)

# Display the resulting DataFrame
food_nutrient_df.head()

Unnamed: 0,food_type,calories,fat,carbohydrates,protein,calcium,iron,magnesium,potassium,sodium,vitamin_d,vitamin_b12
0,red-apple,92.04,0.312,21.9336,0.4212,0.00936,0.000172,0.0078,0.16224,0.00156,0.0,0.0
1,carrot,3.69,0.0216,0.8622,0.0837,0.00297,2.7e-05,0.00108,0.0288,0.00621,0.0,0.0
2,half-bread-loaf,112.23,1.1438,21.1603,3.9302,0.03397,0.001561,0.01677,0.0473,0.22059,1.29e-07,1.72e-08
3,lobster,74.76,0.7224,0.0,15.96,0.08064,0.000244,0.03612,0.1932,0.40824,0.0,1.1984e-06
4,lasagna,399.1,16.3631,42.4888,20.0778,0.34077,0.003899,0.06447,0.55874,0.87188,0.0,1.7499e-06


# Save Nutrition Data to CSV

In [10]:
food_nutrient_df.to_csv('food_nutrients.csv', index=False)