<a href="https://colab.research.google.com/github/hanhduyenng/Portfolio-Projects/blob/main/meat_freshness_image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'meat-freshness-image-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F2304742%2F3878217%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240609%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240609T025526Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D62a281ae1cb224c13306bd0e548331818facc21d2369f201b213e852efdeaf46594047ae2319667f9125fcabfbf5a87f003cab2fb05eaaeed1ba638acefbde673d9e68c96bbaf208dd92641a4c43d5c4579ad28342e3f89a6478fd3d531cfaa0fa6ccef3825f63b68cc62eef67e582873d1acc256eb56173a112117ad7e3df371f483e9983d4d8edb8201158f31ec4780be895afaa734f1d68d964501b9bd5742a6471b143beae26547567abdda83476c1f9521d6fea0afe7f265c857923c3de57c692547bad46437651a40d94d580bb4187d9dd254fd011dc86a13903fc3673a69dfa34710e0ded0fbaeb68f84b6f65389eea55301e3bc77a06ed71071520b6'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading meat-freshness-image-dataset, 62254533 bytes compressed
Downloaded and uncompressed: meat-freshness-image-dataset
Data source import complete.


In [2]:
# importing modules
import cv2
import numpy as np
import os
import pandas as pd
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
train_x = []
train_y = []
test_x = []
test_y = []

In [4]:
# the class of each image file can be extracted using the file name
# to do so, we can split the filename using '-' and take the item in the 0th index

file_name = os.listdir('../input/meat-freshness-image-dataset/Meat Freshness.v1-new-dataset.multiclass/train/')
names = [i.split('-')[0] for i in file_name]
set(names)

{'FRESH', 'HALF', 'SPOILED', '_classes.csv'}

In [5]:
# performing label encoding
class_label_encoding = {
    'SPOILED': 0,
    'HALF': 1,
    'FRESH': 2
}

In [6]:
train_dir = '../input/meat-freshness-image-dataset/Meat Freshness.v1-new-dataset.multiclass/train/'

for file_name in os.listdir(train_dir):
    class_name = file_name.split('-')[0]
    if(class_name == '_classes.csv'): continue # ignore the csv file
    img = cv2.imread(train_dir+file_name).astype('float32') # read the image
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert to rgb format
    img = cv2.resize(img, (128, 128), interpolation=cv2.INTER_AREA) # resize the image
    img /= 255. # normalising pixel values
    train_x.append(img)
    train_y.append(class_label_encoding[class_name])

In [7]:
val_dir = '../input/meat-freshness-image-dataset/Meat Freshness.v1-new-dataset.multiclass/valid/'

for file_name in os.listdir(val_dir):
    class_name = file_name.split('-')[0]
    if(class_name == '_classes.csv'): continue # ignore the csv file
    img = cv2.imread(val_dir+file_name).astype('float32') # read the colour image
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert to rgb format
    img = cv2.resize(img, (128, 128), interpolation=cv2.INTER_AREA) # resize the image
    img /= 255. # normalising pixel values
    test_x.append(img)
    test_y.append(class_label_encoding[class_name])

In [8]:
print('shape of train x:', end=' ')
print(np.asarray(train_x).shape)
print('len of train y: '+ str(len(train_y)))

print('shape of test x:', end=' ')
print(np.asarray(test_x).shape)

print('len of test y: '+ str(len(test_y)))

shape of train x: (1815, 128, 128, 3)
len of train y: 1815
shape of test x: (451, 128, 128, 3)
len of test y: 451


In [9]:
# let's look at the class distribution
print('Train distribution')
print(pd.Series(train_y).value_counts())

print('Test distribution')
pd.Series(test_y).value_counts()

Train distribution
2    675
1    630
0    510
Name: count, dtype: int64
Test distribution


2    178
1    159
0    114
Name: count, dtype: int64

In [10]:
# performing train test split with stratify enabled to maintain the class distribution
x_train, x_val, y_train, y_val = train_test_split(train_x, train_y, test_size=0.3, random_state=42, stratify=train_y)

# free up memory
del train_x
del train_y

In [11]:
# performing one hot encoding and concerting list to numpy array before training
x_train = np.array(x_train)
x_val = np.array(x_val)
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

In [12]:
model = Sequential()

model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(128,128,3))) # each image shape - 416x416x3
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPooling2D())

model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.MaxPooling2D())

model.add(layers.Dropout(0.25))

model.add(layers.Flatten())

model.add(layers.Dense(512,activation='relu'))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(128,activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(3,'softmax')) # 3 classes

model.compile('Adam','categorical_crossentropy',['accuracy'])

print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 conv2d_1 (Conv2D)           (None, 124, 124, 64)      18496     
                                                                 
 max_pooling2d (MaxPooling2  (None, 62, 62, 64)        0         
 D)                                                              
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 64)        36928     
                                                                 
 conv2d_3 (Conv2D)           (None, 58, 58, 128)       73856     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 29, 29, 128)       0         
 g2D)                                                   

In [13]:
# perform data augmentation
data_generator = ImageDataGenerator(rotation_range=15, zoom_range=0.1, vertical_flip=True, width_shift_range=0.1, height_shift_range=0.1)

In [None]:
# history = model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=128, epochs=25)
history = model.fit(data_generator.flow(x_train,y_train,batch_size=128),validation_data=data_generator.flow(x_val, y_val, batch_size=32),epochs=100,callbacks=[EarlyStopping(patience=8,restore_best_weights=True)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100

In [None]:
pyplot.plot(history.history['accuracy'], label='train')
pyplot.plot(history.history['val_accuracy'], label='test')
pyplot.plot(history.history['loss'], label='train_loss')
pyplot.plot(history.history['val_loss'], label='test_loss')
pyplot.legend()
pyplot.show()

In [None]:
loss, acc = model.evaluate(np.array(test_x), to_categorical(test_y))
print('Testing accuracy on unseen data:', round(acc*100,3))
print('Testing loss on unseen data:', round(loss,3))

In [None]:
model.save('trained_model.h5')