First we will make a jupyter notebook and try to obtain best result. After that we will split the notebook into different python scripts for deployment if needed.

# Getting the data ready

In [1]:
import os
import zipfile
import tarfile
from pathlib import Path
import requests

In [2]:
data_path = Path('data')
image_path = data_path / "food-101"

if image_path.exists():
    print(f'{image_path} Already exists!!!')
else:
    image_path.mkdir(parents=True, exist_ok = True)

    with open(data_path/ 'food-101.tar.gz','wb') as f:
        print("Downloading food101 dataset. . . . ")
        response = requests.get("http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz")
        f.write(response.content)
    
    with tarfile.open(data_path / 'food-101.tar.gz', 'r:gz') as tar:
        tar.extractall(path=image_path)
    
    os.remove(data_path/'food-101.tar.gz')
    print("Extraction complete.")

In [12]:
# Visualize the stricture of the directory

def print_directory_tree(path, indent=""):
    """
        Print a tree-like structure of the directory contents.

        This function recursively traverses the directory specified by the `path` 
        argument and prints its structure in a visually appealing format. 
        Only directories are shown; files are omitted.

        Parameters:
        path (str): The path to the directory to be printed.
        indent (str): The string used for indentation to create the tree structure. 
                      It is used internally during recursion and should not be 
                      modified when calling the function.
        result (str): A string to accumulate the directory structure for testing.

        Returns:
        str: A formatted string representing the directory structure.
     """
    # List all items in the given directory
    items = os.listdir(path)
    # Sort the items for a more organized output
    items.sort()
    
    for index, item in enumerate(items):
        # Create the full path
        full_path = os.path.join(path, item)
        
        # Check if the item is a directory
        if os.path.isdir(full_path):
            # Print the directory name
            print(f"{indent}├── {item}/")
            # Recursively print the contents of the directory
            print_directory_tree(full_path, indent + "│   ")

# Usage
directory_path = "data"  
print_directory_tree(directory_path)

├── food-101/
│   ├── Test/
│   │   ├── apple_pie/
│   │   ├── baby_back_ribs/
│   │   ├── baklava/
│   │   ├── beef_carpaccio/
│   │   ├── beef_tartare/
│   │   ├── beet_salad/
│   │   ├── beignets/
│   │   ├── bibimbap/
│   │   ├── bread_pudding/
│   │   ├── breakfast_burrito/
│   │   ├── bruschetta/
│   │   ├── caesar_salad/
│   │   ├── cannoli/
│   │   ├── caprese_salad/
│   │   ├── carrot_cake/
│   │   ├── ceviche/
│   │   ├── cheese_plate/
│   │   ├── cheesecake/
│   │   ├── chicken_curry/
│   │   ├── chicken_quesadilla/
│   │   ├── chicken_wings/
│   │   ├── chocolate_cake/
│   │   ├── chocolate_mousse/
│   │   ├── churros/
│   │   ├── clam_chowder/
│   │   ├── club_sandwich/
│   │   ├── crab_cakes/
│   │   ├── creme_brulee/
│   │   ├── croque_madame/
│   │   ├── cup_cakes/
│   │   ├── deviled_eggs/
│   │   ├── donuts/
│   │   ├── dumplings/
│   │   ├── edamame/
│   │   ├── eggs_benedict/
│   │   ├── escargots/
│   │   ├── falafel/
│   │   ├── filet_mignon/
│   │   ├── fish_and_

In [13]:
def walk_through_dir(path):
    """
      Walks through dir_path returning its contents.
      Args:
        dir_path (str or pathlib.Path): target directory

      Returns:
        A print out of:
          number of subdiretories in dir_path
          number of images (files) in each subdirectory
          name of each subdirectory
    """
    for dirpath, dirnames, filenames in os.walk(path):
         print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [11]:
walk_through_dir(image_path)


NameError: name 'walk_through_dir' is not defined

In [4]:
new_image_path = image_path /'images'

In [5]:
new_image_path

PosixPath('data/food-101/images')

# Creating Custom DataLoader



In [6]:
from typing import Tuple, Dict, List

In [7]:
class_names_found = [clx.replace('_',' ') for clx in sorted(os.listdir(new_image_path))]

In [8]:
classes = class_names_found
class_to_idx  = {name:i for i,name in enumerate(classes)}

## Moving to Train and Test Directory

In [20]:
import os
import shutil
import random

# Set paths
source_dir = new_image_path
train_dir = image_path/ "Train"  # Replace with the path where Train data will be saved
test_dir = image_path/ "Test"  
# Set split ratio
split_ratio = 0.7  # 70% for train, 30% for test

# Function to create directories
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

# Function to split files into Train and Test
def split_data(source, train, test, ratio):
    for category in os.listdir(source):  # Loop through all categories
        category_path = os.path.join(source, category)
        
        if os.path.isdir(category_path):
            # Create directories in Train and Test folders
            create_dir(os.path.join(train, category))
            create_dir(os.path.join(test, category))
            
            # Get all image files from the category
            images = os.listdir(category_path)            
            # Split images into Train and Test
            split_index = int(len(images) * ratio)
            train_images = images[:split_index]
            test_images = images[split_index:]
            
            # Move images to Train and Test directories
            for img in train_images:
                shutil.copy(os.path.join(category_path, img), os.path.join(train, category, img))
            for img in test_images:
                shutil.copy(os.path.join(category_path, img), os.path.join(test, category, img))

# Run the script
create_dir(train_dir)
create_dir(test_dir)
split_data(source_dir, train_dir, test_dir, split_ratio)

print("Data has been split into Train and Test successfully!")

Data has been split into Train and Test successfully!


In [23]:
train_path= image_path / "Train"
test_path = image_path / "Test"