In [1]:
import os

# initialize the path to the *original* input directory of images
ORIG_INPUT_DATASET = "Food-5K"

# initialize the base path to the *new* directory that will contain
# our images after computing the training and testing split
BASE_PATH = "dataset"

# define the names of the training, testing, and validation
# directories
TRAIN = "training"
TEST = "evaluation"
VAL = "validation"

# initialize the list of class label names
CLASSES = ["non_food", "food"]

# set the batch size
BATCH_SIZE = 32

# initialize the label encoder file path and the output directory to
# where the extracted features (in CSV file format) will be stored
LE_PATH = os.path.sep.join(["output", "le.cpickle"])
BASE_CSV_PATH = "output"

# set the path to the serialized model after training
MODEL_PATH = os.path.sep.join(["output", "model.cpickle"])


In [8]:
# import the necessary packages
#pyt is a floder consisting config file
from pyt import config
from imutils import paths
import shutil
import os

# loop over the data splits
for split in (config.TRAIN, config.TEST, config.VAL):
    # grab all image paths in the current split
    print("[INFO] processing '{} split'...".format(split))
    p = os.path.sep.join([config.ORIG_INPUT_DATASET, split])
    print(p)
    print("-----------------------------------------")
    print(os.path.sep)
    imagePaths = list(paths.list_images(p))
    print(imagePaths)


    # loop over the image paths
    for imagePath in imagePaths:
        # extract class label from the filename
        print(imagePath.split(os.path.sep)[-1])
        filename = imagePath.split(os.path.sep)[-1]
        label = config.CLASSES[int(filename.split("_")[0])]
        print(label)

        # construct the path to the output directory
        dirPath = os.path.sep.join([config.BASE_PATH, split, label])
        
        print(dirPath)

        # if the output directory does not exist, create it
        if not os.path.exists(dirPath):
            print("yessssssssssssSssssssssssssssss")
            os.makedirs(dirPath)
#construct the path to the output image file and copy it
        print("hi")
        p = os.path.sep.join([dirPath, filename])
        shutil.copy2(imagePath, p)

[INFO] processing 'training split'...
Food-5K\training
-----------------------------------------
\
['Food-5K\\training\\0_0.jpg', 'Food-5K\\training\\0_1.jpg', 'Food-5K\\training\\0_10.jpg', 'Food-5K\\training\\0_100.jpg', 'Food-5K\\training\\0_1000.jpg', 'Food-5K\\training\\0_1001.jpg', 'Food-5K\\training\\0_1002.jpg', 'Food-5K\\training\\0_1003.jpg', 'Food-5K\\training\\0_1004.jpg', 'Food-5K\\training\\0_1005.jpg', 'Food-5K\\training\\0_1006.jpg', 'Food-5K\\training\\0_1007.jpg', 'Food-5K\\training\\0_1008.jpg', 'Food-5K\\training\\0_1009.jpg', 'Food-5K\\training\\0_101.jpg', 'Food-5K\\training\\0_1010.jpg', 'Food-5K\\training\\0_1011.jpg', 'Food-5K\\training\\0_1012.jpg', 'Food-5K\\training\\0_1013.jpg', 'Food-5K\\training\\0_1014.jpg', 'Food-5K\\training\\0_1015.jpg', 'Food-5K\\training\\0_1016.jpg', 'Food-5K\\training\\0_1017.jpg', 'Food-5K\\training\\0_1018.jpg', 'Food-5K\\training\\0_1019.jpg', 'Food-5K\\training\\0_102.jpg', 'Food-5K\\training\\0_1020.jpg', 'Food-5K\\training\\

In [3]:
# import the necessary packages
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from pyimagesearch import config
from imutils import paths
import numpy as np
import pickle
import random
import os

# load the VGG16 network and initialize the label encoder
print("[INFO] loading network...")
model = VGG16(weights="imagenet", include_top=False)
le = None

# loop over the data splits
for split in (config.TRAIN, config.TEST, config.VAL):
    # grab all image paths in the current split
    print("[INFO] processing '{} split'...".format(split))
    p = os.path.sep.join([config.BASE_PATH, split])
    imagePaths = list(paths.list_images(p))
    # randomly shuffle the image paths and then extract the class
    # labels from the file paths
    random.shuffle(imagePaths)
    labels = [p.split(os.path.sep)[-2] for p in imagePaths]

    # if the label encoder is None, create it
    if le is None:
        le = LabelEncoder()
        le.fit(labels)

    # open the output CSV file for writing
    csvPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(split)])
    csv = open(csvPath, "w")

    # loop over the images in batches
    for (b, i) in enumerate(range(0, len(imagePaths), config.BATCH_SIZE)):
        # extract the batch of images and labels, then initialize the
        # list of actual images that will be passed through the network
        # for feature extraction
        print("[INFO] processing batch {}/{}".format(b + 1,int(np.ceil(len(imagePaths) / float(config.BATCH_SIZE)))))
        batchPaths = imagePaths[i:i + config.BATCH_SIZE]
        batchLabels = le.transform(labels[i:i + config.BATCH_SIZE])
        batchImages = []

        # loop over the images and labels in the current batch
        for imagePath in batchPaths:
            # load the input image using the Keras helper utility
            # while ensuring the image is resized to 224x224 pixels
            image = load_img(imagePath, target_size=(224, 224))
            image = img_to_array(image)

            # preprocess the image by (1) expanding the dimensions and
            # (2) subtracting the mean RGB pixel intensity from the
            # ImageNet dataset
            image = np.expand_dims(image, axis=0)
            image = preprocess_input(image)

            # add the image to the batch
            batchImages.append(image)

# pass the images through the network and use the outputs as
# our actual features, then reshape the features into a
        # flattened volume
        batchImages = np.vstack(batchImages)   # making the list into an array
        features = model.predict(batchImages, batch_size=config.BATCH_SIZE)
        features = features.reshape((features.shape[0], 7 * 7 * 512))
        print(features.shape)

        # loop over the class labels and extracted features
        for (label, vec) in zip(batchLabels, features):
            # construct a row that exists of the class label and
            # extracted features
            vec = ",".join([str(v) for v in vec])
            csv.write("{},{}\n".format(label, vec))

    # close the CSV file
    csv.close()

# serialize the label encoder to disk
f = open(config.LE_PATH, "wb")
f.write(pickle.dumps(le))
f.close()

[INFO] loading network...
[INFO] processing 'training split'...
[INFO] processing batch 1/94
(32, 25088)
[INFO] processing batch 2/94
(32, 25088)
[INFO] processing batch 3/94
(32, 25088)
[INFO] processing batch 4/94
(32, 25088)
[INFO] processing batch 5/94
(32, 25088)
[INFO] processing batch 6/94
(32, 25088)
[INFO] processing batch 7/94
(32, 25088)
[INFO] processing batch 8/94
(32, 25088)
[INFO] processing batch 9/94
(32, 25088)
[INFO] processing batch 10/94
(32, 25088)
[INFO] processing batch 11/94
(32, 25088)
[INFO] processing batch 12/94
(32, 25088)
[INFO] processing batch 13/94
(32, 25088)
[INFO] processing batch 14/94
(32, 25088)
[INFO] processing batch 15/94
(32, 25088)
[INFO] processing batch 16/94
(32, 25088)
[INFO] processing batch 17/94
(32, 25088)
[INFO] processing batch 18/94
(32, 25088)
[INFO] processing batch 19/94
(32, 25088)
[INFO] processing batch 20/94
(32, 25088)
[INFO] processing batch 21/94
(32, 25088)
[INFO] processing batch 22/94
(32, 25088)
[INFO] processing bat

In [6]:
# import the necessary packages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from pyimagesearch import config
import numpy as np
import pickle
import os

def load_data_split(splitPath):
    # initialize the data and labels
    data = []
    labels = []

    # loop over the rows in the data split file
    for row in open(splitPath):
        # extract the class label and features from the row
        row = row.strip().split(",")
        label = row[0]
        features = np.array(row[1:], dtype="float")

        # update the data and label lists
        data.append(features)
        labels.append(label)

    # convert the data and labels to NumPy arrays
    data = np.array(data)
    labels = np.array(labels)

    # return a tuple of the data and labels
    return (data, labels)

# derive the paths to the training and testing CSV files
trainingPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TRAIN)])
testingPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TEST)])

# load the data from disk
print("[INFO] loading data...")
(trainX, trainY) = load_data_split(trainingPath)
(testX, testY) = load_data_split(testingPath)

# load the label encoder from disk
#le = pickle.loads(open(config.LE_PATH, "rb").read())

# train the model
print("[INFO] training model...")
model = LogisticRegression(solver="lbfgs", multi_class="auto",max_iter=150)
#lbfgs is used for regularisation
model.fit(trainX, trainY)

# evaluate the model
print("[INFO] evaluating...")
preds = model.predict(testX)
print(classification_report(testY, preds,target_names=le.classes_))

# serialize the model to disk
print("[INFO] saving model...")
f = open(config.MODEL_PATH, "wb")
f.write(pickle.dumps(model))
f.close()

[INFO] loading data...
[INFO] training model...
[INFO] evaluating...
              precision    recall  f1-score   support

        food       0.99      0.98      0.98       500
    non_food       0.98      0.99      0.99       500

    accuracy                           0.98      1000
   macro avg       0.99      0.98      0.98      1000
weighted avg       0.99      0.98      0.98      1000

[INFO] saving model...
