In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'food-101:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F8544%2F11959%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240404%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240404T092325Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D0acfc5342939d262d40c6bc1b7650504595b42e5324b3f2f308a550fcdf603ecde04f81c3c8d69c74018ef5d64835acca759c4fcb62298acfa18ffe3efb0cd9ef9bd90284f46e566b8654b609697d56c417f51ee0a4a2221063ae2c82eacaea228101e6dae98370ca728b35fddcc1d935b5a23a056a7ba58c61a07acda4917199bb7bea9478ca16ee6bd3ae9be46764b202c9eb5bdff3a04c4570bc54f6d52a7c34efb6028fee9741d14346b9e770e50b3d910be60e0634d319d2f75b2802a7abe413a5651044e0825369f5980781a165632d5b038d4df0f3f25949d6dd19e8c94759751907ef3d19877874c56b9b6771ba35584c4971acd3ece5d2f2191dc8b'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
import numpy as np
import pandas as pd
import matplotlib as plot
import seaborn as sns


In [None]:
#import necessary libraries
import os
import copy
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torchvision
import tensorflow as  tf
from torchvision import models
from sklearn.utils import shuffle
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.font_manager
from collections import OrderedDict

**Reading the data**

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
import os
print(os.getcwd())

In [None]:
if "food-101" in os.listdir():
    print("Dataset already exists")
else:
    print("Downloading the data...")
    !wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
    print("Dataset downloaded!")
    print("Extracting data..")
    !tar xzvf food-101.tar.gz > /dev/null 2>&1
    print("Extraction done!")

In [None]:
# Read the content of the 'classes.txt' file
with open('food-101/food-101/meta/classes.txt', 'r') as input_file:
    lines = input_file.readlines()

# Take the first 20 lines from the 'classes.txt' file
first_20_lines = lines[:20]

# Add the word "other" as the 21st line
first_20_lines.append("other\n")

# Write the modified content to the 'classes_mod.txt' file
with open('/kaggle/working/classes_mod.txt', 'w') as output_file:
    output_file.writelines(first_20_lines)

print("Modified file 'classes_mod.txt' created successfully and here is the file:")
df = pd.read_csv('/kaggle/working/classes_mod.txt', sep='\t', header=None)
print(df)

# Read the content of the 'classes.txt' file
with open('food-101/food-101/meta/classes.txt', 'r') as input_file:
    lines = input_file.readlines()

# Take the first 20 lines from the 'classes.txt' file
first_20_lines = lines[:20]

# Add the word "other" as the 21st line
first_20_lines.append("other\n")

# Write the modified content to the 'classes_mod.txt' file
with open('/kaggle/working/classes_mod.txt', 'w') as output_file:
    output_file.writelines(first_20_lines)

print("Modified file 'classes_mod.txt' created successfully and here is the file:")
df = pd.read_csv('/kaggle/working/classes_mod.txt', sep='\t', header=None)
print(df)

In [None]:
%cd /kaggle/input/food-101/
print("Files in the input directory")
print(os.listdir("food-101/food-101/meta"))

!head 'food-101/food-101/meta/classes.txt'
!head 'food-101/food-101/meta/train.txt'
!head 'food-101/food-101/meta/labels.txt'
!head 'food-101/food-101/meta/test.txt'

In [None]:
#from keras# Read the content of the 'classes.txt' file
with open('food-101/food-101/meta/classes.txt', 'r') as input_file:
    lines = input_file.readlines()

# Take the first 20 lines from the 'classes.txt' file
first_20_lines = lines[:20]

# Add the word "other" as the 21st line
first_20_lines.append("other\n")

# Write the modified content to the 'classes_mod.txt' file
with open('/kaggle/working/classes_mod.txt', 'w') as output_file:
    output_file.writelines(first_20_lines)

print("Modified file 'classes_mod.txt' created successfully and here is the file:")
df = pd.read_csv('/kaggle/working/classes_mod.txt', sep='\t', header=None)
print(df)

In [None]:
# Read the first 20 lines from 'classes_mod.txt'
with open('/kaggle/working/classes_mod.txt', 'r') as classes_file:
    first_20_classes = set(line.strip() for line in classes_file)

In [None]:
# Open the 'train.txt' file for reading and 'train_mod.txt' for writing
with open('food-101/food-101/meta/train.txt', 'r') as input_file, open('/kaggle/working/train_mod.txt', 'w') as output_file:
    for line in input_file:
        # Split each line into word and number
        parts = line.strip().split('/')
        if parts[0] in first_20_classes:
            # If the word is in the first 20 classes, write the line as is
            output_file.write(line)
        else:
            # If the word is not in the first 20 classes, replace it with "other"
            output_file.write(f'other/{parts[1]}\n')


In [None]:
# Open the 'test.txt' file for reading and 'test_mod.txt' for writing
with open('food-101/food-101/meta/test.txt', 'r') as input_file, open('/kaggle/working/test_mod.txt', 'w') as output_file:
    for line in input_file:
        # Split each line into word and number
        parts = line.strip().split('/')
        if parts[0] in first_20_classes:
            # If the word is in the first 20 classes, write the line as is
            output_file.write(line)
        else:
            # If the word is not in the first 20 classes, replace it with "other"
            output_file.write(f'other/{parts[1]}\n')

print("Modified file 'train_mod.txt' created successfully and here is the file:")
df = pd.read_csv('/kaggle/working/train_mod.txt', sep='\t', header=None)
print(df)
print("Modified file 'test_mod.txt' created successfully and here is the file:")
df = pd.read_csv('/kaggle/working/test_mod.txt', sep='\t', header=None)
print(df)

**create path of files**

In [None]:
def read_train_images_from_file(file_path, base_directory='food-101/food-101/images', image_extension='.jpg', num_lines=None):
    with open(file_path, 'r') as file, open('/kaggle/working/sorted_full_paths.txt', 'w') as output_file:
        lines = file.readlines()[:num_lines] if num_lines is not None else file.readlines()
        for line in lines:
            image_path = line.strip()  # Remove leading/trailing whitespace
            full_image_path = os.path.join(base_directory, image_path + image_extension)
            #print('full_image_path',full_image_path)
            output_file.write(full_image_path)
            output_file.write("\n")

In [None]:
def read_test_images_from_file(file_path, base_directory='food-101/food-101/images', image_extension='.jpg', num_lines=None):
    with open(file_path, 'r') as file, open('/kaggle/working/test_full_paths.txt', 'w') as output_file:
        lines = file.readlines()[:num_lines] if num_lines is not None else file.readlines()
        for line in lines:
            image_path = line.strip()  # Remove leading/trailing whitespace
            full_image_path = os.path.join(base_directory, image_path + image_extension)
            #print('full_image_path',full_image_path)
            output_file.write(full_image_path)
            output_file.write("\n")

In [None]:
#read_images_from_file('food-101/food-101/meta/train.txt', num_lines=42)
read_train_images_from_file('food-101/food-101/meta/train.txt')
read_test_images_from_file('food-101/food-101/meta/test.txt')
print("sorted_full_paths.txt' created successfully and here is the file:")

**Create Train and Test data**

In [None]:
#from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255,
                                    rotation_range=10,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    shear_range=0.2,
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    vertical_flip=True
                                  )
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Read train image paths
with open('/kaggle/working/sorted_full_paths.txt', 'r') as file:
    image_paths = file.read().splitlines()

# Read train labels and process them
with open('/kaggle/working/train_mod.txt', 'r') as file:
    labels = [line.split('/')[0].replace('_', ' ') for line in file]  # Remove underscores from labels

In [None]:
# Create a train DataFrame
df_train = pd.DataFrame({'filename': image_paths, 'label': labels})
print(df_train)

train_generator = train_datagen.flow_from_dataframe(dataframe=df_train,
                                                   x_col='filename',
                                                   y_col='label',  # If you have labels
                                                   batch_size=100,
                                                   class_mode='categorical')  # Change this according to your task

In [None]:

def barplot_vis(imgs_dataframe):# Use the newly integrated Roboto font family for all text.
    fig, ax = plt.subplots()

    new_labels = [row if row in classes_21 else "other" for row in imgs_dataframe.label]
    tmp_imgs_dataframe = imgs_dataframe.copy(deep=True)
    tmp_imgs_dataframe['label'] = new_labels

    grouped_train_imgs = tmp_imgs_dataframe.groupby("label")

    heights = [grouped_train_imgs.get_group(group).shape[0] for group in classes_21]

In [None]:
#Two GPU Function
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

from keras.applications import VGG16
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D


In [None]:
# Build your model here
with strategy.scope():
    # Load the pre-trained model (excluding the top classification layers)
    #base_model = VGG16(weights='imagenet', include_top=False, input_shape=(240, 320, 3))
    base_model = VGG16(weights='imagenet', include_top=False)

    # Add custom classification layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(21, activation='softmax')(x)  # Assuming 21 classes
