In [None]:
import numpy as np
import os
import shutil
import random
import pickle

from tqdm import tqdm
from keras.applications.inception_v3 import InceptionV3,preprocess_input,decode_predictions
from keras.preprocessing.image import img_to_array,load_img
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [None]:
# read train.csv and find images id

trainClass=[]
with open('./train.csv/train.csv',mode='r') as reading:
    trainClass=reading.readlines()[1:]
trainClass=[t.split(',')[0:2] for t in trainClass]
print(len(trainClass))    

In [None]:
# make dataset folder. each class has a folder 
# and images of classes are splitted in these folders

for classes in trainClass:
    if not os.path.exists("./dataset/"+classes[1]):
        os.makedirs("./dataset/"+classes[1])
    shutil.copy2('./images/{}.jpg'.format(classes[0]),'./dataset/{}/{}.jpg'.format(classes[1],classes[0]))

In [None]:
# load InceptionV3 model (without top layer) for feature extraction 

model= InceptionV3(include_top=False)

In [None]:
# make imagepath and labelEncoder

imagesPath=[]
labels=[]
le = LabelEncoder()
dirs=os.listdir('./dataset/')
for d in dirs:
    for f in os.listdir('./dataset/'+d):
        imagesPath.append(os.path.sep.join(['.\\dataset',d,f]))
        labels.append(d)
le.fit(labels)


In [None]:
# feed all training images to InceptionV3 model
# and extract features 

batchSize=32
exctractedFeatures=[]
for (b, i) in enumerate(tqdm(range(0, len(imagesPath), batchSize))):

    batchPaths = imagesPath[i:i + batchSize]
    batchLabels = le.transform(labels[i:i + batchSize])
    batchImages = []
    for imagePath in batchPaths:
        # load the input image using the Keras helper utility
        # while ensuring the image is resized to 229x299 pixels
        image = load_img(imagePath, target_size=(299, 299))
        image = img_to_array(image)
        
        # preprocess the image by (1) expanding the dimensions and
        # (2) subtracting the mean RGB pixel intensity from the
        # ImageNet dataset
        image = np.expand_dims(image, axis=0)
        image = preprocess_input(image)
        
        # add the image to the batch
        batchImages.append(image)

    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batchSize)
    features = features.reshape((features.shape[0], 8 * 8 * 2048))
    
    # construct a row that exists of the class label and extracted features
    for (label, vec) in zip(batchLabels, features):
        vec = ",".join([str(v) for v in vec])
        exctractedFeatures.append(','.join([str(label), vec]))

In [None]:
# save exctracted features to a csv file

with open("exctractedFeatures.csv",mode='wt', encoding='utf-8') as myfile:
    for lines in exctractedFeatures:
        myfile.write(lines)

In [None]:
# method for splitting images and labels to train and test

def splitDatas(splitPath,trainRate=0.7):
    # initialize the data and labels
    trainX=[]
    trainY=[]
    testX=[]
    testY = []
    # loop over the rows in the data split file
    random.shuffle(splitPath)
    trainLength=int(len(splitPath)*trainRate)
#     print(trainLength)
    for row in tqdm(splitPath[:trainLength],desc="TrainSet"):
        # extract the class label and features from the row
        row = row.strip().split(",")
        label = row[0]
        features = np.array(row[1:], dtype="float")
        # update the data and label lists
        trainX.append(features)
        trainY.append(label)
    for row in tqdm(splitPath[trainLength:],desc="TestSet"):
        # extract the class label and features from the row
        row = row.strip().split(",")
        label = row[0]
        features = np.array(row[1:], dtype="float")
        # update the data and label lists
        testX.append(features)
        testY.append(label)
    # convert the data and labels to NumPy arrays
    trainX = np.array(trainX)
    trainY = np.array(trainY)
    testX = np.array(testX)
    testY = np.array(testY)
    # return a tuple of the data and labels
    return (trainX, trainY,testX,testY)

(trainX, trainY,testX,testY) = splitDatas(exctractedFeatures)

In [None]:
# train a logistic regression model from extracted features

model2 = LogisticRegression(solver="lbfgs", multi_class='multinomial', max_iter=30)
model2.fit(trainX, trainY)

In [None]:
# evaluating logistic regression model with testX and testY

preds = model2.predict(testX)
score = model2.score(testX, testY)
print(score)

In [None]:
# save logistic regression model and label encoder

pickle.dump(le,open('LE.pickle',mode='wb'))
pickle.dump(model2,open('LeafLinearregresion.model',mode='wb'))

In [None]:
# read test.csv and find test images ids
test=[]
with open('./test.csv/test.csv',mode='r') as reading:
    test=reading.readlines()[1:]
test=[t.split(',')[0] for t in test]
# load, preproccess and classify test images
for i in test:
        image = load_img('./images/'+i+'.jpg', target_size=(299, 299))
        image = img_to_array(image)
        image = np.expand_dims(image, axis=0)
        image = preprocess_input(image)
        features = model.predict(image)
        features = features.reshape((features.shape[0], 8 * 8 * 2048))
        preds = model2.predict(features)
        # find predicted class name using label encoder inverse transform
        print('{}.jpg is classified as {}'.format(i,le.inverse_transform([int(preds[0])])[0]))
