Data - BitMap in numpy 
Choosing 10 categories from QuickDraw dataset :
    - cat
    - bear
    - car
    - eye
    - hat
    - frog
    - crown
    - guitar
    - pig
    - coffee cup
100K image per elements

# Main method

In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
import os
import random

2022-11-29 11:10:37.959002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import glob

In [7]:
def load_data(root, test_size=0.2, max_items_per_class= 1000):
    
    all_files = glob.glob(os.path.join(root, '*.npy'))

    #initialize variables 
    X = np.empty([0, 784])
    y = np.empty([0])
    class_names = []

    #load a subset of the data to memory 
    for idx, file in enumerate(all_files):
        print(file,"loaded")
        data = np.load(file)
        data = data[0: max_items_per_class, :]
        labels = np.full(data.shape[0], idx)

        X = np.concatenate((X, data), axis=0)
        y = np.append(y, labels)

        class_name, ext = os.path.splitext(os.path.basename(file))
        class_names.append(class_name)

    data = None
    labels = None

    #shuffle (to be sure)
    permutation = np.random.permutation(y.shape[0])
    X = X[permutation, :]
    y = y[permutation]
    
    #separate into training and testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=True)

    return X_train, X_test, y_train, y_test, class_names

In [8]:
X_train, X_test, y_train, y_test, class_names = load_data("data_npy", test_size=0.2, max_items_per_class= 1000)

data_npy/full_numpy_bitmap_hat.npy loaded
data_npy/full_numpy_bitmap_cat.npy loaded
data_npy/full_numpy_bitmap_eye.npy loaded
data_npy/full_numpy_bitmap_car.npy loaded
data_npy/full_numpy_bitmap_guitar.npy loaded
data_npy/full_numpy_bitmap_frog.npy loaded
data_npy/full_numpy_bitmap_coffee cup.npy loaded
data_npy/full_numpy_bitmap_crown.npy loaded
data_npy/full_numpy_bitmap_bear.npy loaded
data_npy/full_numpy_bitmap_pig.npy loaded


In [11]:
class_names

['full_numpy_bitmap_hat',
 'full_numpy_bitmap_cat',
 'full_numpy_bitmap_eye',
 'full_numpy_bitmap_car',
 'full_numpy_bitmap_guitar',
 'full_numpy_bitmap_frog',
 'full_numpy_bitmap_coffee cup',
 'full_numpy_bitmap_crown',
 'full_numpy_bitmap_bear',
 'full_numpy_bitmap_pig']

In [37]:
# Reshape and normalize
from tensorflow.keras.utils import to_categorical

image_size=28
num_classes = len(class_names)

X_train = X_train.reshape(X_train.shape[0], image_size, image_size, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], image_size, image_size, 1).astype('float32')

X_train /= 255.0
X_test /= 255.0

# Convert class vectors to class matrices, one hot encoded
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [38]:
y_train.shape

(6700, 10)

In [39]:
X_train.shape

(6700, 28, 28, 1)

In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam

In [41]:
model = Sequential()
model.add(Convolution2D(16, (3, 3),
                        padding='same',
                        input_shape=X_train.shape[1:], activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(32, (3, 3), padding='same', activation= 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, (3, 3), padding='same', activation= 'relu'))
model.add(MaxPooling2D(pool_size =(2,2)))
model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax')) 
# Train model
adam = Adam()
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])
print(model.summary())

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 28, 28, 16)        160       
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 14, 14, 16)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 14, 14, 32)        4640      
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 7, 7, 32)         0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 7, 7, 64)          18496     
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 3, 3, 64)        

In [42]:
history = model.fit(X_train, y_train, batch_size= 32, epochs= 10, verbose= 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [43]:
def plot_loss_accuracy(history):

    with plt.style.context('seaborn-deep'):

        fig, ax = plt.subplots(1, 2, figsize=(15, 4))

        ## Plot Losses and Accuracies
        x_axis = np.arange(len(history.history['loss']))

        ax[0].set_title("Loss")
        ax[0].plot(x_axis, history.history['loss'], color="blue", linestyle=":", marker="X", label="Train Loss")
        ax[0].plot(x_axis, history.history['val_loss'], color="orange", linestyle="-", marker="X", label="Val Loss")

        ax[1].set_title("Accuracy")
        ax[1].plot(x_axis, history.history['accuracy'], color="blue", linestyle=":", marker="X", label="Train Accuracy")
        ax[1].plot(x_axis,
                   history.history['val_accuracy'],
                   color="orange",
                   linestyle="-",
                   marker="X",
                   label="Val Accuracy")

        ## Customization
        ax[0].grid(axis="x", linewidth=0.5)
        ax[0].grid(axis="y", linewidth=0.5)
        ax[0].legend()
        ax[1].grid(axis="x", linewidth=0.5)
        ax[1].grid(axis="y", linewidth=0.5)
        ax[1].legend()

        plt.show()

In [44]:
plot_loss_accuracy(history)

KeyError: 'val_loss'

# Backup Method

In [1]:
dataset = "data_npy"
files = os.listdir(dataset)
max_item_per_cl = 1500
class_name = []

size = 0

for name in files :
    #Evaluate the size of the dataset
    
    data= np.load(os.path.join(dataset, name))
    data= data[:max_item_per_cl]
    size += data.shape[0]


#create 2 buffers to stock data
X = np.zeros((size, 28, 28)) #images
y = np.zeros((size,)) #targets


i=0
t=0
for name in files :
    #open each dataset and add a new class
    class_name.append(name.replace("full_numpy_bitmap_", "").replace(".npy", ""))
    data= np.load(os.path.join(dataset, name))
    data= data[:max_item_per_cl]
    #add image to the buffers
    X[i:i + data.shape[0]] = np.invert(data.reshape(-1, 28, 28))
    y[i:i + data.shape[0]] = t
    #iterate
    i += data.shape[0]
    t += 1

#Shuffle dataset
idx = np.arange(size)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

X, X_val, y, y_val = train_test_split(X, y, test_size= 0.33)

print("X.shape", X.shape)
print("y.shape", y.shape)

print("X_val.shape", X_val.shape)
print("y.shape", y_val.shape)

print(class_name)

NameError: name 'os' is not defined

## Normalization (avoid scale effect)

In [7]:
print("mean and std", X.mean(), X.std())
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.reshape(-1, 28*28))
X_val_scaled = scaler.fit_transform(X_val.reshape(-1, 28*28))

X_scaled = X_scaled.reshape(-1, 28, 28, 1)
X_val_scaled = X_val_scaled.reshape(-1, 28, 28, 1)

mean and std 209.52174370494467 85.21704864839893


## Create Dataset (object tensor)

In [11]:
train_data = tf.data.Dataset.from_tensor_slices(X_scaled)
val_data = tf.data.Dataset.from_tensor_slices(X_val_scaled)

### biblio

Challenges & lectures

--> data-intuition-on-convolutions
--> data-cifar-classification
--> data-transfer-learning
--> data-autoencoder
—-> data-recap_cnn

docs

--> https://www.tensorflow.org/api_docs/python/tf/data/Dataset
--> https://www.youtube.com/watch?v=rsMVCPIq8iY --> loading data
--> https://www.youtube.com/watch?v=sdIINp0-CAA --> tensorflow