In [82]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from matplotlib import pyplot as plt
import cv2
import random
import os

In [83]:
DATA_DIR = "/Users/abhraneelsaha/Desktop/oral_cancer_data"
SET = ["train","test","val"]
CATEGORIES = ["Normal","OSCC"]

In [109]:
#processing for training set

training_data = []
validation_data = []
PARENT_PATH = os.path.join(DATA_DIR,SET[0])
for category in CATEGORIES:
    category_num = CATEGORIES.index(category)
    PATH = os.path.join(PARENT_PATH,category)
    for img in os.listdir(PATH):
        IMG_PATH = os.path.join(PATH,img)
        img_array = cv2.imread(IMG_PATH, cv2.IMREAD_GRAYSCALE)
        img_array.resize(224,224)
        training_data.append([img_array,category_num])
        
#processing for validation set

PARENT_PATH = os.path.join(DATA_DIR,SET[2])
for category in CATEGORIES:
    category_num = CATEGORIES.index(category)
    PATH = os.path.join(PARENT_PATH,category)
    for img in os.listdir(PATH):
        IMG_PATH = os.path.join(PATH,img)
        img_array = cv2.imread(IMG_PATH, cv2.IMREAD_GRAYSCALE)
        img_array.resize(224,224)
        training_data.append([img_array,category_num])
        
        
        

In [73]:
# data_aug = tf.keras.Sequential([
# #     tf.keras.layers.experimental.preprocessing.RandomFlip("vertical",
# #                                                          input_shape = (224,224)),
#     tf.keras.layers.experimental.preprocessing.RandomRotation(0.1)
# #     tf.keras.layers.experimental.preprocessing.RandomZoom(0.1)
# ])

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip=True,
    fill_mode = "nearest")

In [110]:
random.shuffle(training_data)

In [111]:
x_train = []
y_train = []

for feature,label in training_data:
    x_train.append(feature)
    y_train.append(label)

x_train = np.array(x_train).reshape(-1,224,224,1)
y_train = np.array(y_train).reshape(-1,1)
x_train = x_train/255

In [112]:
x_train.shape

(5066, 224, 224, 1)

In [77]:
datagen.fit(x_train)

i = 0  
for batch in datagen.flow(x_train,batch_size = 1,save_to_dir = "/Users/abhraneelsaha/Desktop/oral_cancer_data/preview",
                   save_prefix = "img", save_format = "jpeg"):
    
    i += 1
    if i>20:
        break

In [116]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(64,(3,3),input_shape = x_train.shape[1:],padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(64,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size =(2,2),strides = (2,2)))
          
model.add(tf.keras.layers.Conv2D(128,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(128,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size =(2,2),strides = (2,2)))

model.add(tf.keras.layers.Conv2D(256,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(256,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(256,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size =(2,2),strides = (2,2)))

model.add(tf.keras.layers.Conv2D(512,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(512,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(512,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size =(2,2),strides = (2,2)))

model.add(tf.keras.layers.Conv2D(512,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(512,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.Conv2D(512,(3,3),padding = 'same', activation = 'relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size =(2,2),strides = (2,2)))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(4096, activation = 'relu'))
model.add(tf.keras.layers.Dense(4096, activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1))
model.add(tf.keras.layers.Activation("sigmoid"))

model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

MODEL_DATA = model.summary()



Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_25 (Conv2D)          (None, 224, 224, 64)      640       
                                                                 
 conv2d_26 (Conv2D)          (None, 224, 224, 64)      36928     
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 112, 112, 64)     0         
 g2D)                                                            
                                                                 
 conv2d_27 (Conv2D)          (None, 112, 112, 128)     73856     
                                                                 
 conv2d_28 (Conv2D)          (None, 112, 112, 128)     147584    
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 56, 56, 128)      0         
 g2D)                                                

In [115]:
model.fit(x_train, y_train, batch_size = 32, validation_split = 0.3, epochs = 20)

In [None]:
#processing for testing set

testing_data = []

PARENT_PATH = os.path.join(DATA_DIR,SET[1])
for category in CATEGORIES:
    category_num = CATEGORIES.index(category)
    PATH = os.path.join(PARENT_PATH,category)
    for img in os.listdir(PATH):
        IMG_PATH = os.path.join(PATH,img)
        img_array = cv2.imread(IMG_PATH, cv2.IMREAD_GRAYSCALE)
        img_resize_array = img_array.resize(224,224)
        testing_data.append([img_array,category_num])
        


In [None]:
x_test = []
y_test = []

for feature,label in testing_data:
    x_test.append(feature)
    y_test.append(label)

x_test = np.array(x_test).reshape(-1,224,224,1)
y_test = np.array(y_test)
x_test = x_test/255