In [1]:
#https://www.pyimagesearch.com/2019/05/20/transfer-learning-with-keras-and-deep-learning/
import tensorflow as tf

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '0' # Set to -1 if CPU should be used CPU = -1 , GPU = 0

gpus = tf.config.experimental.list_physical_devices('GPU')
cpus = tf.config.experimental.list_physical_devices('CPU')

if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
elif cpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        logical_cpus= tf.config.experimental.list_logical_devices('CPU')
        print(len(cpus), "Physical CPU,", len(logical_cpus), "Logical CPU")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical CPU, 1 Logical CPU


In [2]:
# +Add data before loading

!wget -O gdrivedl 'https://f.mjh.nz/gdrivedl'
!ls -l
!pwd

"""
!bash /kaggle/working/gdrivedl https://drive.google.com/file/d/1UdpT3LhiomMs6PQVinCkRwSUC0hD3D19/view?usp=sharing
!ls -l /kaggle/working
inputdir = "/kaggle/working"
"""

!bash /content/gdrivedl https://drive.google.com/file/d/1UdpT3LhiomMs6PQVinCkRwSUC0hD3D19/view?usp=sharing
!ls -l /content
inputdir = "/content"

--2020-12-12 22:50:56--  https://f.mjh.nz/gdrivedl
Resolving f.mjh.nz (f.mjh.nz)... 172.67.162.157, 104.28.30.233, 104.28.31.233, ...
Connecting to f.mjh.nz (f.mjh.nz)|172.67.162.157|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1362 (1.3K) [application/octet-stream]
Saving to: ‘gdrivedl’


2020-12-12 22:50:56 (17.3 MB/s) - ‘gdrivedl’ saved [1362/1362]

total 8
-rw-r--r-- 1 root root 1362 Jan 29  2019 gdrivedl
drwxr-xr-x 1 root root 4096 Dec  2 22:04 sample_data
/content
File ID: 1UdpT3LhiomMs6PQVinCkRwSUC0hD3D19
Downloading: https://docs.google.com/uc?export=download&id=1UdpT3LhiomMs6PQVinCkRwSUC0hD3D19 > .87.file
Downloading: https://docs.google.com/uc?export=download&id=1UdpT3LhiomMs6PQVinCkRwSUC0hD3D19&confirm=DrcI > .87.file
Moving: .87.file > Food-5K.zip
Saved: Food-5K.zip
DONE!
total 436476
-rw-r--r-- 1 root root 446938948 Dec 12 22:51 Food-5K.zip
-rw-r--r-- 1 root root      1362 Jan 29  2019 gdrivedl
drwxr-xr-x 1 root root      4096 Dec  2 22:04 samp

In [3]:
# import the necessary packages
import os

# initialize the path to the *original* input directory of images
ORIG_INPUT_DATASET = os.path.join(inputdir, "Food-5K.zip")

# initialize the base path to the *new* directory that will contain
# our images after computing the training and testing split
import tempfile
TEMPDIR = tempfile.gettempdir()
BASE_PATH = os.path.join(TEMPDIR, "dataset.zip")

# define the names of the training, testing, and validation
# directories
TRAIN = "training"
TEST = "evaluation"
VAL = "validation"

# initialize the list of class label names
CLASSES = ["non_food", "food"]

# set the batch size
BATCH_SIZE = 32

# initialize the label encoder file path and the output directory to
# where the extracted features (in CSV file format) will be stored
LE_PATH = os.path.sep.join([TEMPDIR, "le.cpickle"])
#BASE_CSV_PATH = "output"
BASE_CSV_PATH = TEMPDIR

# set the path to the serialized model after training
MODEL_PATH = os.path.sep.join([TEMPDIR, "model.cpickle"])

In [4]:
!pip install imutils



In [5]:
# import the necessary packages
from imutils import paths
import shutil
# import the necessary packages
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
import numpy as np
import pickle
import random
import os

In [6]:
import zipfile
import cv2
zf = zipfile.ZipFile(ORIG_INPUT_DATASET)
imagelist = zf.namelist()

print("[INFO] processing split images...")

ozf = zipfile.ZipFile(BASE_PATH, 'w')

# loop over the data splits
for split in (TRAIN, TEST, VAL):
	# grab all image paths in the current split
	#print("[INFO] processing '{} split'...".format(split))
	#p = os.path.sep.join([ORIG_INPUT_DATASET, split])
	#imagePaths = list(paths.list_images(p))
	imagePaths = [ix for ix in imagelist if ix.startswith(split)]

	# loop over the image paths
	for imagePath in imagePaths:
		# extract class label from the filename
		#filename = imagePath.split(os.path.sep)[-1]
		filename = imagePath.split('/')[-1]
		label = CLASSES[int(filename.split("_")[0])]

		# construct the path to the output directory
		#dirPath = os.path.sep.join([BASE_PATH, split, label])
		dirPath = '/'.join([split, label])

		# if the output directory does not exist, create it
		#if not os.path.exists(dirPath):
		#	os.makedirs(dirPath)

		# construct the path to the output image file and copy it
		#p = os.path.sep.join([dirPath, filename])
		#shutil.copy2(imagePath, p)
		#image = cv2.imdecode(np.frombuffer(zf.read(imagePath), np.uint8), 1)
		#ozf.writestr(dirPath + '/' + filename, image, compress_type=zipfile.ZIP_STORED)
		ozf.writestr(dirPath + '/' + filename, zf.read(imagePath), compress_type=zipfile.ZIP_STORED)
zf.close()
ozf.close()

[INFO] processing split images...


In [7]:
# load the VGG16 network and initialize the label encoder
print("[INFO] loading network...")
model = VGG16(weights="imagenet", include_top=False)

[INFO] loading network...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
import cv2
zf = zipfile.ZipFile(BASE_PATH)
imagelist = zf.namelist()

print("[INFO] processing split images...")

le = None

# loop over the data splits
for split in (TRAIN, TEST, VAL):
	# grab all image paths in the current split
	#print("[INFO] processing '{} split'...".format(split))
	#p = os.path.sep.join([BASE_PATH, split])
	#imagePaths = list(paths.list_images(p))
	imagePaths = [ix for ix in imagelist if ix.startswith(split)]

	# randomly shuffle the image paths and then extract the class
	# labels from the file paths
	random.shuffle(imagePaths)
	#labels = [p.split(os.path.sep)[-2] for p in imagePaths]
	labels = [p.split('/')[1] for p in imagePaths]
	print(imagePaths[:5], labels[:5])

	# if the label encoder is None, create it
	if le is None:
		le = LabelEncoder()
		le.fit(labels)

	# open the output CSV file for writing
	csvPath = os.path.sep.join([BASE_CSV_PATH,"{}.csv".format(split)])
	csv = open(csvPath, "w")

	# loop over the images in batches
	for (b, i) in enumerate(range(0, len(imagePaths), BATCH_SIZE)):
		# extract the batch of images and labels, then initialize the
		# list of actual images that will be passed through the network
		# for feature extraction
		print("[INFO] processing batch {}/{}".format(b + 1,int(np.ceil(len(imagePaths) / float(BATCH_SIZE)))))
		batchPaths = imagePaths[i:i + BATCH_SIZE]
		#print(list(le.classes_), imagePaths[:5], labels[:5])
		batchLabels = le.transform(labels[i:i + BATCH_SIZE])
		batchImages = []

		# loop over the images and labels in the current batch
		for imagePath in batchPaths:
			# load the input image using the Keras helper utility
			# while ensuring the image is resized to 224x224 pixels
			#image = load_img(imagePath, target_size=(224, 224))
			#image = img_to_array(image)
			image = cv2.imdecode(np.frombuffer(zf.read(imagePath), np.uint8), 1)
			image = cv2.resize(image, (224, 224))

			# preprocess the image by (1) expanding the dimensions and
			# (2) subtracting the mean RGB pixel intensity from the
			# ImageNet dataset
			image = np.expand_dims(image, axis=0)
			image = imagenet_utils.preprocess_input(image)

			# add the image to the batch
			batchImages.append(image)

		# pass the images through the network and use the outputs as
		# our actual features, then reshape the features into a
		# flattened volume
		batchImages = np.vstack(batchImages)
		features = model.predict(batchImages, batch_size=BATCH_SIZE)
		print(features.shape)
		features = features.reshape((features.shape[0], 7 * 7 * 512))

		# loop over the class labels and extracted features
		for (label, vec) in zip(batchLabels, features):
			# construct a row that exists of the class label and
			# extracted features
			vec = ",".join([str(v) for v in vec])
			csv.write("{},{}\n".format(label, vec))

	# close the CSV file
	csv.close()

[INFO] processing split images...
['training/non_food/0_564.jpg', 'training/non_food/0_1449.jpg', 'training/non_food/0_1233.jpg', 'training/non_food/0_1483.jpg', 'training/food/1_15.jpg'] ['non_food', 'non_food', 'non_food', 'non_food', 'food']
[INFO] processing batch 1/94
(32, 7, 7, 512)
[INFO] processing batch 2/94
(32, 7, 7, 512)
[INFO] processing batch 3/94
(32, 7, 7, 512)
[INFO] processing batch 4/94
(32, 7, 7, 512)
[INFO] processing batch 5/94
(32, 7, 7, 512)
[INFO] processing batch 6/94
(32, 7, 7, 512)
[INFO] processing batch 7/94
(32, 7, 7, 512)
[INFO] processing batch 8/94
(32, 7, 7, 512)
[INFO] processing batch 9/94
(32, 7, 7, 512)
[INFO] processing batch 10/94
(32, 7, 7, 512)
[INFO] processing batch 11/94
(32, 7, 7, 512)
[INFO] processing batch 12/94
(32, 7, 7, 512)
[INFO] processing batch 13/94
(32, 7, 7, 512)
[INFO] processing batch 14/94
(32, 7, 7, 512)
[INFO] processing batch 15/94
(32, 7, 7, 512)
[INFO] processing batch 16/94
(32, 7, 7, 512)
[INFO] processing batch 17/9

In [9]:
# serialize the label encoder to disk
f = open(LE_PATH, "wb")
f.write(pickle.dumps(le))
f.close()

In [10]:
def load_data_split(splitPath):
	# initialize the data and labels
	data = []
	labels = []

	# loop over the rows in the data split file
	for row in open(splitPath):
		# extract the class label and features from the row
		row = row.strip().split(",")
		label = row[0]
		features = np.array(row[1:], dtype="float")

		# update the data and label lists
		data.append(features)
		labels.append(label)

	# convert the data and labels to NumPy arrays
	data = np.array(data)
	labels = np.array(labels)

	# return a tuple of the data and labels
	return (data, labels)

In [11]:
# derive the paths to the training and testing CSV files
trainingPath = os.path.sep.join([BASE_CSV_PATH,"{}.csv".format(TRAIN)])
testingPath = os.path.sep.join([BASE_CSV_PATH,"{}.csv".format(TEST)])

In [12]:
# load the data from disk
print("[INFO] loading data...")
(trainX, trainY) = load_data_split(trainingPath)
(testX, testY) = load_data_split(testingPath)

[INFO] loading data...


In [13]:
# load the label encoder from disk
le = pickle.loads(open(LE_PATH, "rb").read())

In [14]:
# train the model
print("[INFO] training model...")
model = LogisticRegression(solver="lbfgs", multi_class="auto", max_iter=1000)
model.fit(trainX, trainY)

[INFO] training model...


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [15]:
# evaluate the model
print("[INFO] evaluating...")
preds = model.predict(testX)
print(classification_report(testY, preds, target_names=le.classes_))

[INFO] evaluating...
              precision    recall  f1-score   support

        food       0.98      0.97      0.97       500
    non_food       0.97      0.98      0.98       500

    accuracy                           0.97      1000
   macro avg       0.98      0.97      0.97      1000
weighted avg       0.98      0.97      0.97      1000



In [16]:
# serialize the model to disk
print("[INFO] saving model...")
f = open(MODEL_PATH, "wb")
f.write(pickle.dumps(model))
f.close()

[INFO] saving model...
