In [1]:
import tensorflow as tf
import keras
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping


2023-11-20 22:52:12.294477: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
train_image_folder = '/raid/mpsych/OMAMA/DATA/data/train'
train_npz_folder = '/raid/mpsych/OMAMA/DATA/data/2d_resized_512/images'

In [3]:
# Get lists of image and npz files
image_files = [os.path.join(train_image_folder, f) for f in os.listdir(train_image_folder) if f.endswith('.png')]
npz_files = [os.path.join(train_npz_folder, f) for f in os.listdir(train_npz_folder) if f.endswith('.npz')]


In [8]:
len(image_files)

50000

In [9]:
len(npz_files)

50000

In [6]:
image_files = image_files[:50000]

In [7]:
npz_files = npz_files[:50000]

In [10]:
# Split the dataset (adjust the test_size as needed)
train_files, test_files = train_test_split(list(zip(image_files, npz_files)), test_size=0.3, random_state=42)
val_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)

In [11]:
img_height = 512
img_width = 512
batch_size = 32

In [12]:
def custom_data_generator(file_list, batch_size, img_height, img_width):
    total_files = len(file_list)
    indices = np.arange(total_files)
    np.random.shuffle(indices)

    while True:
        for i in range(0, total_files, batch_size):
            batch_indices = indices[i:i + batch_size]
            batch_images = []
            batch_npz = []
            batch_labels = []

            for idx in batch_indices:
                img_file, npz_file = file_list[idx]

                # Process image file
                image = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE)
                image = cv2.resize(image, (img_width, img_height))
                image = np.expand_dims(image, axis=-1)
                batch_images.append(image)

                # Process npz file
                with np.load(npz_file, allow_pickle=True) as data:
                    npz = data['data']
                npz = np.expand_dims(npz, axis=-1)
                batch_npz.append(npz)

                # Assign class labels
                batch_labels.append([1, 0] if img_file.endswith('.png') else [0, 1])

            combined_batch = np.concatenate((batch_images, batch_npz), axis=-1)
            yield (combined_batch, np.array(batch_labels))




In [13]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, min_delta=0.001, mode='min')

In [14]:
NUMBER_OF_CLASSES = 2

In [15]:
# Create data generators
train_generator = custom_data_generator(train_files, batch_size, img_height, img_width)
val_generator = custom_data_generator(val_files, batch_size, img_height, img_width)
test_generator = custom_data_generator(test_files, batch_size, img_height, img_width)


In [16]:
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(32, kernel_size=(3, 3),
                             activation='relu',
                             input_shape=(img_height, img_width, 1)))
model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Dropout(0.25))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(NUMBER_OF_CLASSES, activation='softmax'))

2023-11-20 22:54:55.993230: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2023-11-20 22:54:56.149727: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:4e:00.0 name: A100-SXM4-40GB computeCapability: 8.0
coreClock: 1.41GHz coreCount: 108 deviceMemorySize: 39.59GiB deviceMemoryBandwidth: 1.41TiB/s
2023-11-20 22:54:56.149768: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2023-11-20 22:54:56.153097: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2023-11-20 22:54:56.153144: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11
2023-11-20 22:54:56.154170: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10
2

In [17]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer="adadelta",
              metrics=['accuracy'])

In [18]:
# Model training
try:
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_files) // batch_size,
        epochs=3,
        validation_data=val_generator,
        validation_steps=len(val_files) // batch_size,
        verbose=1,
        callbacks=[early_stopping]
    )
except Exception as e:
    print("An error occurred during training:", str(e))

# Model evaluation on the test set
test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_files) // batch_size)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

2023-11-20 22:55:48.028057: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2023-11-20 22:55:48.047183: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2245720000 Hz


Epoch 1/3


2023-11-20 22:56:01.073204: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8
2023-11-20 22:56:01.688470: I tensorflow/stream_executor/cuda/cuda_dnn.cc:359] Loaded cuDNN version 8201
2023-11-20 22:56:02.391547: W tensorflow/stream_executor/gpu/asm_compiler.cc:191] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 8.0
2023-11-20 22:56:02.391570: W tensorflow/stream_executor/gpu/asm_compiler.cc:194] Used ptxas at ptxas
2023-11-20 22:56:02.391632: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Unimplemented: ptxas ptxas too old. Falling back to the driver to compile.
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2023-11-20 22:56:03.833429: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2023-11-20 22:56:04.624668: I tensorflow/stream_exec

Epoch 2/3
Epoch 3/3
Test Loss: 0.0, Test Accuracy: 1.0


In [19]:
# 3 epochs
test_file = '/raid/mpsych/OMAMA/DATA/data/train/sample_40069.png'
test_image = cv2.imread(test_file, cv2.IMREAD_GRAYSCALE)
test_image = cv2.resize(test_image, (img_width, img_height))
test_image = np.expand_dims(test_image, axis=-1)
test_image = test_image / 255.0
test_image = np.expand_dims(test_image, axis=0)  # Add batch dimension

predictions = model.predict(test_image)
print("Predictions:", predictions)
predicted_class = np.argmax(predictions)
print("Predicted Class:", predicted_class)

Predictions: [[0.55975455 0.44024542]]
Predicted Class: 0


In [26]:
# Load and process a synthetic (PNG) test image
test_png_file = '/raid/mpsych/OMAMA/DATA/data/train/sample_10446.png'
test_image = cv2.imread(test_file, cv2.IMREAD_GRAYSCALE)
test_image = cv2.resize(test_image, (img_width, img_height))
test_image = np.expand_dims(test_image, axis=-1)
test_image = test_image / 255.0
test_image = np.expand_dims(test_image, axis=0)  # Add batch dimension

predictions = model.predict(test_image)
print("Predictions:", predictions)
predicted_class = np.argmax(predictions)
print("Predicted Class:", predicted_class)

Predictions: [[0.63113284 0.36886716]]
Predicted Class: 0


In [31]:
test_npz_file = '/raid/mpsych/OMAMA/DATA/data/2d_resized_512/images/100220136299296817993264225430810813957.npz'

with np.load(test_npz_file, allow_pickle=True) as data:
    test_npz = data['data']

# Preprocess the NPZ data as per your training data preprocessing
# For example, if resizing and normalization were applied during training
test_npz = cv2.resize(test_npz, (img_width, img_height))
test_npz = np.expand_dims(test_npz, axis=-1)  # Add channel dimension if necessary
test_npz = test_npz / 255.0  # Normalize if it was done during training
test_npz = np.expand_dims(test_npz, axis=0)  # Add batch dimension

# Make prediction
prediction = model.predict(test_npz)
print("Predictions:", prediction)
predicted_class = np.argmax(prediction)
class_label = 'Real' if predicted_class == 0 else 'Synthetic'
print(f"Predicted Class: {class_label}, Probability: {np.max(prediction)}")


Predictions: [[0.9863598  0.01364024]]
Predicted Class: Real, Probability: 0.9863597750663757


In [9]:
# 3 epochs
# test_file = '/raid/mpsych/OMAMA/DATA/data/train/sample_40069.png'
# test_image = cv2.imread(test_file, cv2.IMREAD_GRAYSCALE)
# test_image = cv2.resize(test_image, (img_width, img_height))
# test_image = np.expand_dims(test_image, axis=-1)
# test_image = test_image / 255.0
# test_image = np.expand_dims(test_image, axis=0)  # Add batch dimension

# predictions = model.predict(test_image)
# print("Predictions:", predictions)
# predicted_class = np.argmax(predictions)
# print("Predicted Class:", predicted_class)

Predictions: [[0.4999916 0.5000084]]
Predicted Class: 1
