<a href="https://colab.research.google.com/github/ZemelakGoraga/image_classification/blob/main/Copy_of_food_image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam

In [None]:
# from the Kaggle account settings page.
from google.colab import files
files.upload() # here the downloded kaggle.json file is imported#

Saving kaggle.json to kaggle (5).json


{'kaggle (5).json': b'{"username":"zemelakgoraga","key":"9bcb68625f661266080d1c6be3de4b4b"}'}

In [None]:
# Next
# Next, install the Kaggle API client.
!pip install -q kaggle

# Next
# The Kaggle API client expects this file to be in ~/.kaggle,
# so move it there.
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# This permissions change avoids a warning on Kaggle tool startup.
!chmod 600 ~/.kaggle/kaggle.json


# Next

# List a given datasets

#!kaggle datasets download -d harishkumardatalab/food-image-classification-dataset

## Forcefully download the dataset using Kaggle CLI
!kaggle datasets download -d harishkumardatalab/food-image-classification-dataset --force


Downloading food-image-classification-dataset.zip to /content
 99% 1.67G/1.68G [00:09<00:00, 159MB/s]
100% 1.68G/1.68G [00:09<00:00, 181MB/s]


In [None]:
#unzip the different data files
# There is one .csv file in the following zip file

import zipfile

# Specify the path to the downloaded ZIP file
zip_file_path = "/content/food-image-classification-dataset.zip"

# Create a ZipFile object
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # Get the list of file paths within the ZIP archive
    file_paths = zip_ref.namelist()

# Print the list of file paths
for path in file_paths:
    print(path)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Food Classification dataset/idli/277.jpg
Food Classification dataset/idli/278.jpg
Food Classification dataset/idli/280.jpg
Food Classification dataset/idli/282.jpg
Food Classification dataset/idli/283.jpg
Food Classification dataset/idli/284.jpg
Food Classification dataset/idli/285.jpg
Food Classification dataset/idli/286.jpg
Food Classification dataset/idli/287.jpg
Food Classification dataset/idli/288.jpg
Food Classification dataset/idli/290.jpg
Food Classification dataset/idli/292.jpg
Food Classification dataset/idli/293.jpg
Food Classification dataset/idli/295.jpg
Food Classification dataset/idli/296.jpg
Food Classification dataset/idli/297.jpg
Food Classification dataset/idli/299.jpg
Food Classification dataset/idli/300.jpg
Food Classification dataset/idli/301.jpg
Food Classification dataset/idli/302.jpg
Food Classification dataset/idli/303.jpg
Food Classification dataset/idli/305.jpg
Food Classification dataset/idli/

In [None]:
# know the directory, in this project case, the directory is 'Food Classification dataset'
!ls

'Food Classification dataset'		'kaggle (2).json'   kaggle.json
 food-image-classification-dataset.zip	'kaggle (3).json'   sample_data
 image_classification			'kaggle (4).json'
'kaggle (1).json'			'kaggle (5).json'


In [None]:


# Define the dataset directory
dataset_dir = 'Food Classification dataset'

In [None]:
# Data preprocessing
image_size = (224, 224)

class_names = os.listdir(dataset_dir)
num_classes = len(class_names)

images = []
labels = []

for class_name in class_names:
    class_dir = os.path.join(dataset_dir, class_name)
    for image_name in os.listdir(class_dir):
        image_path = os.path.join(class_dir, image_name)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB format
        image = cv2.resize(image, image_size)
        images.append(image)
        labels.append(class_name)

X = np.array(images)
y = np.array(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    rescale=1.0/255.0
)

datagen.fit(X_train)


In [None]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)


In [None]:

# Build a CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Replace these lines with actual code to load and preprocess your data
X_train = ...
y_train = ...

# Train the model
history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                    epochs=10,
                    validation_data=(X_test, y_test))


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')


NameError: ignored

In [None]:
# Classification report and confusion matrix
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes))

conf_matrix = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

NameError: ignored

In [None]:
# Using transfer learning with MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model_tl = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

In [None]:
model_tl.compile(optimizer=Adam(lr=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Data preprocessing for transfer learning
# Load images, resize to (224, 224), and normalize
# Split into training and testing sets

# Train the transfer learning model

# Evaluate the transfer learning model

# Save and export the models for future use

# Hypotheses and questions
# 1. Are certain classes more accurately predicted than others?
# 2. How does the addition of data augmentation affect model performance?
# 3. Does the transfer learning model outperform the CNN model in terms of accuracy?

# Data analysis and visualization to answer the hypotheses
# Analyze class-wise accuracy
# Compare performance metrics of the CNN model with and without data augmentation
# Compare performance metrics of the CNN model and the transfer learning model

# Visualize the results

In [None]:
df.head()