# Keras CNN

(Dataset is secret)

### Read Data

In [1]:
import cv2
import glob
import pandas as pd

abst_filename_list = [img for img in glob.glob("../dataset/*.jpg")]

filename_list = [name.split("./dataset\\")[1] for name in abst_filename_list]
filename_list = [name.split(".jpg")[0] for name in filename_list]

model_id_list = list()
product_id_list = list()
image_id_list = list()

for info in [filename.split("_") for filename in filename_list]:
    model_id_list.append(info[0])
    product_id_list.append(info[1])
    image_id_list.append(info[2])
    
dataset_list = list(zip(abst_filename_list, filename_list, model_id_list, product_id_list, image_id_list))

df = pd.DataFrame(dataset_list, columns=["absolute_path", "file_name", "model_id", "product_id_list", "image_id_list"])

del abst_filename_list, model_id_list, product_id_list, image_id_list, filename_list, dataset_list

In [2]:
converted_id_dict = dict()
converted_id_list = list()

labels = [int(id) for id in df["model_id"]]
labels_set = list(set(labels))

for i, id_ in enumerate(labels_set):
    converted_id_dict[id_] = i

for label in labels:
    converted_id_list.append(converted_id_dict.get(label))

df["label"] = converted_id_list

In [3]:
df.head()

Unnamed: 0,absolute_path,file_name,model_id,product_id_list,image_id_list,label
0,../train\10047545284_10226223039_0.jpg,10047545284_10226223039_0,10047545284,10226223039,0,134
1,../train\10047545284_10226256889_0.jpg,10047545284_10226256889_0,10047545284,10226256889,0,134
2,../train\10047545284_10226293526_0.jpg,10047545284_10226293526_0,10047545284,10226293526,0,134
3,../train\10047545284_10321296333_0.jpg,10047545284_10321296333_0,10047545284,10321296333,0,134
4,../train\10047545284_10321296355_0.jpg,10047545284_10321296355_0,10047545284,10321296355,0,134


### Read Features

In [4]:
import os
import numpy as np

import tensorflow.keras as k

In [5]:
dataset = list()
label_dataset = list()

img_size = 50

for path in df["absolute_path"]:
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
    dataset.append(np.array(img))
    
reshaped_dataset = np.array([np.reshape(data, (img_size, img_size, 3)) for data in dataset])
label_dataset = np.array([[label] for label in df["label"]])

In [6]:
print("Dataset Shape : {}".format(reshaped_dataset.shape))
print("Label Shape : {}".format(label_dataset.shape))

Dataset Shape : (10618, 50, 50, 3)
Label Shape : (10618, 1)


In [7]:
num_classes = 153

label_dataset_one_hot = k.utils.to_categorical(label_dataset, num_classes)

In [8]:
reshaped_dataset = reshaped_dataset.astype("float32")
reshaped_dataset /= 255.

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    reshaped_dataset,
    label_dataset_one_hot,
    test_size=0.3,
    random_state=1
)

### Making Model

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten

In [11]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding="same", activation="relu", input_shape=X_train.shape[1:]))
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding="same", activation="relu"))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(1024, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation="softmax"))

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 50, 50, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 48, 48, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 22, 22, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 11, 11, 64)        0         
__________

In [13]:
epochs = 100
batch_size = 128
lr = 0.0001
decay = 1e-6

optimizer = k.optimizers.RMSprop(lr=lr, decay=decay)
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

In [14]:
history = model.fit(
    X_train, 
    y_train, 
    batch_size=batch_size, 
    epochs=epochs, validation_data=(X_test, y_test), 
    shuffle=True, 
    verbose=2
)

Train on 7432 samples, validate on 3186 samples
Epoch 1/100
 - 9s - loss: 5.0194 - acc: 0.0131 - val_loss: 4.9868 - val_acc: 0.0468
Epoch 2/100
 - 3s - loss: 4.9385 - acc: 0.0447 - val_loss: 4.7647 - val_acc: 0.0995
Epoch 3/100
 - 3s - loss: 4.6089 - acc: 0.1010 - val_loss: 4.1752 - val_acc: 0.1877
Epoch 4/100
 - 3s - loss: 4.0979 - acc: 0.1759 - val_loss: 3.6156 - val_acc: 0.3258
Epoch 5/100
 - 3s - loss: 3.6283 - acc: 0.2550 - val_loss: 3.0122 - val_acc: 0.4567
Epoch 6/100
 - 3s - loss: 3.1828 - acc: 0.3291 - val_loss: 2.5170 - val_acc: 0.5832
Epoch 7/100
 - 3s - loss: 2.7812 - acc: 0.4084 - val_loss: 1.9558 - val_acc: 0.6466
Epoch 8/100
 - 3s - loss: 2.4783 - acc: 0.4658 - val_loss: 1.7088 - val_acc: 0.7059
Epoch 9/100
 - 3s - loss: 2.1595 - acc: 0.5250 - val_loss: 1.3993 - val_acc: 0.7363
Epoch 10/100
 - 3s - loss: 1.9394 - acc: 0.5717 - val_loss: 1.1870 - val_acc: 0.7665
Epoch 11/100
 - 3s - loss: 1.7326 - acc: 0.6167 - val_loss: 1.0616 - val_acc: 0.7847
Epoch 12/100
 - 3s - loss:

Epoch 97/100
 - 3s - loss: 0.0471 - acc: 0.9852 - val_loss: 0.4666 - val_acc: 0.9309
Epoch 98/100
 - 3s - loss: 0.0451 - acc: 0.9860 - val_loss: 0.4696 - val_acc: 0.9303
Epoch 99/100
 - 3s - loss: 0.0432 - acc: 0.9879 - val_loss: 0.4649 - val_acc: 0.9313
Epoch 100/100
 - 3s - loss: 0.0450 - acc: 0.9875 - val_loss: 0.4670 - val_acc: 0.9309


In [15]:
print("Accuracy : %.2f" % history.history["acc"][-1])
print("Valid Accuracy : %.2f" % history.history["val_acc"][-1])

Accuracy : 0.99
Valid Accuracy : 0.93


In [16]:
# Saving Model and Weights
model_json = model.to_json()
with open("./d2_model.json", "w") as json_file:
    json_file.write(model_json)

model.save_weights("./d2_weights.h5")

### Predict Data

In [17]:
y_pred = model.predict(reshaped_dataset)
y_pred_label = np.argmax(y_pred, axis=1)

In [18]:
y_pred_label

array([134, 134, 134, ..., 111, 111, 111], dtype=int64)

### Save to files

In [19]:
# Writing to .txt
f = open("./labels_pred.txt", "w")
for label in y_pred_label:
    label_input = str(label)
    f.write(label_input + "\n")
f.close()

In [20]:
# Writing to .csv (with clustering)
csv_dict = {"file_name": df["file_name"], "labels_pred": y_pred_label}
df_csv = pd.DataFrame(data=csv_dict)
df_csv.to_csv("./pred_db.csv")