In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import math
from imblearn.over_sampling import RandomOverSampler
from scipy.stats import ttest_ind
from sqlalchemy import create_engine
from scipy.stats.mstats import winsorize
from scipy.stats import boxcox
from scipy.stats import jarque_bera
from scipy.stats import normaltest
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import cross_val_score
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import make_scorer
from sklearn.datasets import load_files

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV, ElasticNetCV
from statsmodels.tools.eval_measures import mse, rmse
from wordcloud import WordCloud
import statsmodels.api as sm
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.svm import SVC
from sklearn import tree
from sklearn import ensemble
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn import neighbors
from IPython.display import Image


import pydotplus
from sklearn import ensemble

import warnings

%matplotlib inline
sns.set()

warnings.filterwarnings('ignore')
import time
import cv2
import glob

In [78]:
import tensorflow as tf
import keras

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, Conv3D, MaxPooling2D
from keras.layers import LSTM, Input, TimeDistributed
from keras.models import Model
from keras.optimizers import RMSprop

from keras import backend as K

In [97]:
#import the data
start_time = time.time()

train_dir = 'Data/fruits-360_dataset/fruits-360/Training' 
test_dir = 'Data/fruits-360_dataset/fruits-360/Test'

def load_dataset(path): 
    data = load_files(path) 
    files = np.array(data['filenames']) 
    targets = np.array(data['target']) 
    target_labels = np.array(data['target_names']) 
    return files,targets,target_labels

x_train, y_train, target_labels = load_dataset(train_dir) 
x_test, y_test,_ = load_dataset(test_dir)

print("--- %s seconds ---" % (time.time() - start_time))

--- 29.746604919433594 seconds ---


In [63]:
#convert image files to matrices
start_time = time.time()

x_train_mat = []
x_test_mat = []

for img in x_train:
    n= cv2.imread(img)
    x_train_mat.append(n)

for img in x_test:
    n= cv2.imread(img)
    x_test_mat.append(n)
    
print("--- %s seconds ---" % (time.time() - start_time))

--- 27.11545491218567 seconds ---


In [85]:
#convert data to numpy arrays
start_time = time.time()

x_train_matnp = np.array(x_train_mat)
x_test_matnp = np.array(x_test_mat)
print("--- %s seconds ---" % (time.time() - start_time))

In [86]:
#reshape and normalize data

start_time = time.time()

img_rows, img_cols = 100, 100
num_classes = 120

if K.image_data_format() == 'channels_first':
    x_train_matnp = x_train_matnp.reshape(x_train_matnp.shape[0], 3, img_rows, img_cols)
    x_test_matnp = x_test_matnp.reshape(x_test_matnp.shape[0], 3, img_rows, img_cols)
    input_shape = (3, img_rows, img_cols)
else:
    x_train_matnp = x_train_matnp.reshape(x_train_matnp.shape[0], img_rows, img_cols, 3)
    x_test_matnp = x_test_matnp.reshape(x_test_matnp.shape[0], img_rows, img_cols, 3)
    input_shape = (img_rows, img_cols, 3)

x_train_matnp = x_train_matnp.astype('float32')
x_test_matnp = x_test_matnp.astype('float32')
x_train_matnp /= 255
x_test_matnp /= 255
print('x_train_matnp shape:', x_train_matnp.shape)
print(x_train_matnp.shape[0], 'train samples')
print(x_test_matnp.shape[0], 'test samples')

print("--- %s seconds ---" % (time.time() - start_time))

x_train_matnp shape: (60498, 100, 100, 3)
60498 train samples
20622 test samples
--- 18.63475775718689 seconds ---


In [105]:
print(y_test)
print(y_test.shape)

[  0 104  83 ...  58  64  16]
(20622,)


In [106]:
# convert class vectors to binary class matrices

y_train_cat = keras.utils.to_categorical(y_train)
y_test_cat = keras.utils.to_categorical(y_test)

In [107]:
print(y_test_cat)
print(y_test_cat.shape)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
(20622, 120)


In [108]:
start_time = time.time()

x_train_matnp_big, x_train_matnp_small, y_train_cat_big, y_train_cat_small = train_test_split(
    x_train_matnp,
    y_train_cat,
    test_size=0.01,
    random_state=42)

print("--- %s seconds ---" % (time.time() - start_time))

--- 159.30167293548584 seconds ---


In [109]:
start_time = time.time()

x_test_matnp_big, x_test_matnp_small, y_test_cat_big, y_test_cat_small = train_test_split(
    x_test_matnp,
    y_test_cat,
    test_size=0.01,
    random_state=42)

print("--- %s seconds ---" % (time.time() - start_time))

--- 39.61524987220764 seconds ---


In [111]:
y_test_cat_small.shape

(207, 120)

In [115]:
model = Sequential()

model.add(Conv2D(16, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=(100,100,3)))
model.add(MaxPooling2D(pool_size=(2, 2), strides = 2))
model.add(Conv2D(32, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides = 2))
model.add(Conv2D(64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides = 2))
model.add(Conv2D(128, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides = 2))
model.add(Dense(1024, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 96, 96, 16)        1216      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 48, 48, 16)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 44, 44, 32)        12832     
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 22, 22, 32)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 18, 18, 64)        51264     
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 9, 9, 64)          0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 5, 5, 128)        

In [116]:
model.fit(x_train_matnp_small, y_train_cat_small,
          epochs=10,
          verbose=1,
          validation_data=(x_test_matnp_small, y_test_cat_small))
score = model.evaluate(x_test_matnp_small, y_test_cat_small, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

ValueError: Error when checking target: expected dense_12 to have 4 dimensions, but got array with shape (605, 120)