In [32]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from glob import glob
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_files       
#from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.metrics import log_loss

train_data = '/Users/jetcalz07/Desktop/projects/sf_dd/data/imgs/train/'
test_data = '/Users/jetcalz07/Desktop/projects/sf_dd/data/imgs/test/'

In [33]:
# Function to read indiv image
def get_cv2_image(path, img_rows, img_cols, color_type=1):
    # Loading as Grayscale image
    if color_type == 1:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    elif color_type == 3:
        img = cv2.imread(path, cv2.IMREAD_COLOR)
    # Reduce size
    img = cv2.resize(img, (img_rows, img_cols)) 
    return img

In [38]:
num_classes = 10
classes = ['c' + str(x) for x in range(num_classes)]

# Load train batch into image and label arrays
def load_train(img_rows, img_cols, color_type=1):
    train_imgs = []
    train_labels = []
    
    # Go through each folder
    for class_ in classes:
        files = glob(os.path.join(train_data, class_, '*.jpg'))
        for file in tqdm(files):
            img = get_cv2_image(file, img_rows, img_cols, color_type)
            train_imgs.append(img)
            train_labels.append(class_)
            
    return train_imgs, train_labels


# Split into train and val
def normalize_and_split_train_val(img_rows, img_cols, color_type=1):
    X, labels = load_train(img_rows, img_cols, color_type)
    y = np_utils.to_categorical(labels, 10)
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Convert dtype and reshape for batches
    x_train = np.array(x_train, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    x_test = np.array(x_test, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    
    return x_train, x_val, y_train, y_val
    
    
# Same for test
def load_test(img_rows, img_cols, color_type=1, size=200000):
    X_test = []
    
    files = glob(os.path.join(test_data, '*.jpg'))
    total = 0
    for file in files:
        if total >= size:
            break
        img = get_cv2_image(file, img_rows, img_cols, color_type)
        X_test.append(img)
        total += 1
            
    return X_test

def read_and_normalize_test_data(img_rows, img_cols, color_type=1, size=200000):
    test_data = load_test(img_rows, img_cols, color_type, size)
    test_data = np.array(test_data, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    
    return test_data

#### Add normalization step and val split ^^

In [35]:
img_rows = 64
img_cols = 64
color_type = 1

train_imgs, train_labels = load_train(img_rows, img_cols, color_type)
x_train, x_val, y_train, y_val = normalize_and_split_train_val(img_rows, img_cols, color_type)

print(f"Sample Image Label: {train_labels[0]}")
plt.imshow(train_imgs[0], cmap='gray')

100%|██████████████████████████████████████| 2489/2489 [00:03<00:00, 721.47it/s]
100%|██████████████████████████████████████| 2267/2267 [00:03<00:00, 719.39it/s]
100%|██████████████████████████████████████| 2317/2317 [00:03<00:00, 727.56it/s]
100%|██████████████████████████████████████| 2346/2346 [00:03<00:00, 724.72it/s]
100%|██████████████████████████████████████| 2326/2326 [00:03<00:00, 723.04it/s]
100%|██████████████████████████████████████| 2312/2312 [00:03<00:00, 721.56it/s]
100%|██████████████████████████████████████| 2325/2325 [00:03<00:00, 725.59it/s]
100%|██████████████████████████████████████| 2002/2002 [00:02<00:00, 716.54it/s]
100%|██████████████████████████████████████| 1911/1911 [00:02<00:00, 727.43it/s]
100%|██████████████████████████████████████| 2129/2129 [00:02<00:00, 723.29it/s]
100%|██████████████████████████████████████| 2489/2489 [00:03<00:00, 737.22it/s]
100%|██████████████████████████████████████| 2267/2267 [00:03<00:00, 725.17it/s]
100%|███████████████████████

NameError: name 'np_utils' is not defined

In [39]:
nb_test_samples = 200
test_files = read_and_normalize_test_data(img_rows, img_cols, color_type, nb_test_samples)
print('Test shape:', test_files.shape)

Test shape: (200, 64, 64, 1)


### Simple CNN

In [None]:
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras.layers import Embedding, Input, Dense, Lambda
# from tensorflow.keras.models import Model