<a href="https://colab.research.google.com/github/heesukjang/Income_Prediction/blob/main/Kesha_2nd_Notebook_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import os
import numpy as np
import pandas as pd
import cv2

#colab imports
from google.colab import drive
from google.colab.patches import cv2_imshow

#tensorflow imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import plot_model, to_categorical, Sequence
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
#replace these paths with the paths of your 
val_image_directory = '/content/gdrive/MyDrive/207_final_project/Dataset/Validate'
train_image_directory = '/content/gdrive/MyDrive/207_final_project/Dataset/Train'
test_image_directory = '/content/gdrive/MyDrive/207_final_project/Dataset/Test'
directory_path = '/content/gdrive/MyDrive/207_final_project'

In [None]:
def get_paths(directory):
  all_path = []
  idc_image_path = []
  idc_image_label = []

  for dir, subdir, files in os.walk(directory):
    path = dir + "/"
    all_path.append(path)

  for i in range(len(all_path)):
    for file in os.listdir(all_path[i]):
      test = file
      path = all_path[i] + test
      if path.lower().endswith('.png'):
        idc_image_path.append(path)

  for i in range(len(idc_image_path)):
    split_test = idc_image_path[i]
    split_path = split_test.split("/")
    directory_name = split_path[6]
    idc_image_label.append('class_' + split_path[7])
  return idc_image_path, idc_image_label, directory_name

In [None]:
train_paths, train_labels, train_dir = get_paths(train_image_directory)
val_paths, val_labels, val_dir = get_paths(val_image_directory)
test_paths, test_labels, test_dir = get_paths(test_image_directory)

In [None]:
print(len(train_paths), len(train_labels))
print(len(test_paths), len(test_labels))
print(len(val_paths), len(val_labels))

800 800
200 200
200 200


In [None]:
def create_dataframes(idc_image_path, idc_image_label, directory_name):
  same_name = directory_name.lower() + '_'
  #creating the dataframes that we will be passing to our generators
  idc_data_cleaned = {'path': idc_image_path,
            'label': idc_image_label}

  idc_df = pd.DataFrame(idc_data_cleaned)
  df = idc_df.sample(frac = 1)
  print(df)

  csv_path = directory_path
  csv_file = df.to_csv(csv_path + '/' + same_name + 'idc_dataframe.csv')
  return csv_file

In [None]:
train_dataframe = create_dataframes(train_paths, train_labels, train_dir)
train_generator = pd.read_csv(train_dataframe)

test_dataframe = create_dataframes(test_paths, test_labels, test_dir)
test_generator = pd.read_csv(test_dataframe)

val_dataframe = create_dataframes(val_paths, val_labels, val_dir)
val_generator = pd.read_csv(val_dataframe)

In [None]:
data_generator = ImageDataGenerator()

train_data_generator = data_generator.flow_from_dataframe(
    train_generator,
    directory = None,
    x_col =  'path',
    y_col =  'label',
    weight_col=None,
    featurewise_center = True,
    featurewise_std_normalization = True,
    #readjust the target size based on max size of images
    target_size=(50, 50),
    color_mode="grayscale",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    # validate_filenames=True
)

validation_data_generator = data_generator.flow_from_dataframe(
    val_generator,
    directory = None,
    x_col =  'path',
    y_col =  'label',
    weight_col=None,
    featurewise_center = True,
    featurewise_std_normalization = True,
    #readjust the target size based on max size of images
    target_size=(50, 50),
    color_mode="grayscale",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    # validate_filenames=True
)

# test_data_generator = 

Found 800 validated image filenames belonging to 2 classes.
Found 200 validated image filenames belonging to 2 classes.


In [None]:
img_height = 50
img_width = 50
img_channel = 1

In [None]:
def get_doc_id_model():
  return tf.keras.Sequential([
                           keras.layers.Conv2D(input_shape = (img_height, img_width, img_channel), 
                                               filters=32, 
                                               kernel_size=(3, 3),
                                               padding='same', 
                                               activation='relu'),
                           
                           keras.layers.MaxPooling2D(pool_size=(2, 2),
                                                  strides=(2, 2)),
                           
                           keras.layers.Conv2D(filters=64, 
                                               kernel_size=(3, 3), 
                                               padding='same', 
                                               activation='relu'),
                              
                           keras.layers.MaxPooling2D(pool_size=(2, 2),
                                                  strides=(2, 2)),
                          
                           keras.layers.Conv2D(filters=128, 
                                               kernel_size=(3, 3), 
                                               padding='same', 
                                               activation='relu'),
                          
                           keras.layers.MaxPooling2D(pool_size=(2, 2), 
                                                  strides=(2, 2)),
                           
                           keras.layers.Flatten(),
                           
                           keras.layers.Dense(units = 256, 
                                              activation = 'relu'),
                           
                          #  keras.layers.Dense(units = 512, 
                          #                     activation = 'relu'),
                           
                           keras.layers.Dense(units = 2, 
                                              activation = 'softmax')
])

In [None]:
model = get_doc_id_model()
model.compile(optimizer=keras.optimizers.Adam(learning_rate = 0.001), 
                    loss=keras.losses.categorical_crossentropy, 
                    metrics=['accuracy']
              )

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 50, 50, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 25, 25, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 25, 25, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 12, 12, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 12, 12, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 6, 6, 128)        0

In [None]:
val_acc_early_stopping = EarlyStopping(monitor = 'val_acc', 
                                       patience = 5, 
                                       verbose = 1,
                                       mode = 'auto')

In [None]:
hist = model.fit(train_data_generator,
                 epochs=7,
                #  callbacks=[val_acc_early_stopping],
                 validation_data = validation_data_generator
                 )

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
