In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import layers, models
import keras
import matplotlib.pyplot as plt
import cv2
import os
import random
tf.config.list_physical_devices('GPU')

2024-08-12 01:46:52.596456: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-12 01:46:52.609400: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-12 01:46:52.613427: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-12 01:46:52.624250: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
I0000 00:00:1723441614.268686  3898

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
class My_Custom_Generator(keras.utils.Sequence):

    def __init__(self, image_filenames, labels, batch_size, img_size) :
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
        self.image_size = img_size


    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int32)


    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]

        return np.array([
                cv2.resize(cv2.imread(str(file_name)), self.image_size)
                    for file_name in batch_x])/255.0, np.array(batch_y)

In [3]:
def getData(dataset, n={} ,k=99999999, m=99999999):

    if dataset == 'ImageNet':
        target = "./Data/150_subset/"
    elif dataset == 'Games':
        target = "./Data/Games/DATA BACKUP/"
        
    data = {}
    dataLabels = []
    splits = os.listdir(target)
    for split in splits[:k]:
        data[split] = {'X':[], 'y':[]}
        if dataLabels == []: #avoid potentially different read order
            labels = sorted(os.listdir(target+f'{split}/'))
            dataLabels = labels
        for label in dataLabels[:m]:
            files = os.listdir(target+f'{split}/{label}/')
            random.shuffle(files)
            for file in files[:n[split]]:
                data[split]['X'].append(f'{target}{split}/{label}/{file}')
                y_temp = [0]*min(len(dataLabels), m)
                y_temp[dataLabels.index(label)] = 1
                data[split]['y'].append(np.asarray(y_temp))
        data[split]['X'] = np.asarray(data[split]['X'])
        data[split]['y'] = np.asarray(data[split]['y'])
    return data, dataLabels, splits

In [4]:
def makeModel(inputShape=(256, 256, 3), labelCount=5):

    keras.backend.clear_session()

    inputs = keras.Input(shape=inputShape)

    x = layers.Conv2D(filters=32, kernel_size=3, strides=3, activation="relu")(inputs) #(85,85,32)

    x = layers.Conv2D(filters=64, kernel_size=3, strides=3, activation="relu")(x) #(28,28,64)

    x = layers.Conv2D(filters=128, kernel_size=8, strides=8, activation="relu")(x) #(16,16,128)

    #x = layers.Conv2D(filters=256, kernel_size=8, strides=8, activation="relu")(x) #(2,2,256)

    #x = layers.Conv2D(filters=256, kernel_size=2, strides=2, activation="relu")(x) #(1,1,256)

    x = layers.Flatten()(x)
#    x = layers.Dropout(0.15)(x)
    x = layers.Dense(256, activation='relu')(x)
    #x = layers.Dense(256, activation='relu')(x)

    outputs = layers.Dense(labelCount, activation='sigmoid')(x)

    return keras.Model(inputs=inputs, outputs=outputs)

In [5]:
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=5000,
    decay_rate=0.90)

In [6]:
callbacks = [
keras.callbacks.ModelCheckpoint(
    filepath="callback.keras",
    save_best_only=True,
    monitor="val_accuracy")
]

In [7]:
def BulkTrain(callbacks, datasets = ['Games', 'ImageNet'], M=50, Ns=[1000, 750, 500, 375, 250, 175, 100, 75, 50, 25], batch_size=256, image_size = (256,256)):
    for dataset in datasets:
        for n in Ns:

            N={ #max samples per split #This block defines N according to testing needs
                'test':9999999, #only used for evaluation, so more only increases granularity
                'train':n,
                'val':65
            }

            data, dataLabels, splits = getData(dataset, n=N,  m=M)
            my_training_batch_generator = My_Custom_Generator(data['train']['X'], data['train']['y'], batch_size, image_size)
            my_validation_batch_generator = My_Custom_Generator(data['val']['X'], data['val']['y'], batch_size, image_size)
            model = makeModel(inputShape=(256, 256, 3), labelCount=data['train']['y'][0].shape[0])

            model.compile(optimizer='Adam',
                loss="categorical_crossentropy",
                metrics=['accuracy'])

            model.fit(my_training_batch_generator,
                epochs = 25,
                verbose = 0,
                callbacks=callbacks,
                validation_data = my_validation_batch_generator,
               )

            model.save(f'models/cnn_{dataset}_{M:03d}_N-{n:04d}.keras')
            print(f'Saved [models/cnn_{dataset}_{M:03d}_N-{n:04d}.keras]')

In [8]:
BulkTrain(callbacks, datasets = ['ImageNet'], Ns=[750, 375, 175, 75])

I0000 00:00:1723441614.706203  389841 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1723441614.706314  389841 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1723441614.706364  389841 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1723441614.831842  389841 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1723441614.831940  389841 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-08-12

Saved [models/cnn_ImageNet_050_N-0750.keras]
Saved [models/cnn_ImageNet_050_N-0375.keras]
Saved [models/cnn_ImageNet_050_N-0175.keras]





Saved [models/cnn_ImageNet_050_N-0075.keras]
