In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
cd "/content/drive/MyDrive/SEM 6 - COURSES/CSE542 - SML/Kaggle Challenge 2/"

In [None]:
!unzip 'SML_T.zip'

In [6]:
import numpy as np
import pandas as pd
import os
%matplotlib inline
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.layers import Dense, MaxPool2D, Conv2D, Dropout, Flatten, GlobalAveragePooling2D, BatchNormalization, Activation, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import glob
import cv2
import re
import pathlib
import zipfile

In [7]:
train_labels = pd.read_csv("SML_Train.csv", header="infer")

classes = train_labels['category'].unique()
print(classes)

[20 19 23  9  6 17 10  1  2 24  4 15 11 16  3 12  0 13 18  8 21 22  7 14
  5]


In [8]:
parent_path_train = "SML_Train"
parent_path_test = "SML_Test"

In [9]:
train_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(directory=parent_path_train, shuffle=True, target_size=(32,32),batch_size=128)

Found 16000 images belonging to 25 classes.


In [10]:
type(train_generator)

keras.preprocessing.image.DirectoryIterator

In [26]:
#Defining the model architecture
def define_model():
    model = Sequential()
    
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu',strides=(1,1), padding='same',use_bias=False, input_shape=(64, 64, 3)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=92, kernel_size=(3, 3), activation='relu',strides=(1,1), padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2),padding='same'))
    
    model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu',strides=(1,1),  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=128, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=128, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2),padding='same'))
    
    model.add(Conv2D(filters=256, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=256, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=256, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    
    model.add(Conv2D(filters=512, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=512, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=512, kernel_size=(3, 3),strides=(1,1),activation='relu',  padding='same',use_bias=False))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    
    model.add(Flatten())
    
    #model.add(Dense(units=2048))
    #model.add(Dense(units=2048))
    model.add(Dense(units=512))
    model.add(Dense(units=128))
    model.add(Activation('relu'))
    
    model.add(Dense(units=25, activation='softmax'))
    # compile model
    #opt = SGD(lr=0.001, momentum=0.9,'adam')
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = define_model()
print('Code Segment Running: 1Done')

Code Segment Running: 1Done


In [29]:
# 学習のハイパーパラメータ
EPOCHS = 1000              # 学習回数
hidden_nodes1 = 128        # 中間層ノード数1
hidden_nodes2 = 256        # 中間層ノード数2
hidden_nodes3 = 512        # 中間層ノード数3
output_nodes  = 1024       # 全結合層ノード数
validation_rate = 0.2      # trainデータに対するvalidationデータの割合
IMAGE_SIZE = 64            # 入力画像サイズ
BATCH_SIZE = 500           # 学習する画像枚数

In [30]:
# CNNの構築
model = Sequential()

# 入力層，中間層01
model.add(Conv2D(hidden_nodes1, (3, 3), padding='same', input_shape=(64, 64, 3)))
model.add(Activation('relu'))
model.add(Conv2D(hidden_nodes1, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# 中間層02
model.add(Conv2D(hidden_nodes2, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(hidden_nodes2, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# 中間層03
model.add(Conv2D(hidden_nodes3, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(hidden_nodes3, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# 全結合層
model.add(GlobalAveragePooling2D())
model.add(Dense(output_nodes))
model.add(Activation('relu'))
model.add(Dropout(0.50))

# 10クラスの分類
model.add(Dense(25))
model.add(Activation('softmax'))

In [37]:
# 訓練過程の設定
model.compile(
    loss='categorical_crossentropy', # 損失関数，ラベルがone-hot エンコーディングで表現されている場合に使用
    optimizer='rmsprop',                # 最適化アルゴリズム
    metrics=['accuracy']
)

In [38]:
model.fit(train_generator,epochs=10,workers=8,use_multiprocessing=True)

Epoch 1/10


2023-04-26 11:19:48.048162: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-04-26 11:19:48.654323: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_2/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7eff6de85930>

In [39]:
#Perform preprocessing on test data
test_datagen = ImageDataGenerator()

test_gen = test_datagen.flow_from_directory(directory=parent_path_test,target_size=(64,64),batch_size=128,class_mode=None,shuffle=False)

Found 1500 images belonging to 1 classes.


In [40]:
# test_gen.reset()
predictions_vecs = model.predict(test_gen)

predictions_final = np.argmax(predictions_vecs, axis=1)

 2/12 [====>.........................] - ETA: 0s

2023-04-26 11:21:28.159101: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




In [36]:
test_files = os.listdir('SML_Test/0')
# Save the predictions to a CSV file
test_df = pd.DataFrame({'id': test_files, 'category': predictions_final})
test_df.to_csv('now.csv', index=False)