In [None]:
!pip install datasets keras-tuner

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datasets import load_dataset
import numpy as np
import matplotlib.pyplot as plt
from kerastuner import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters

In [None]:
# Load dataset from Hugging Face
dataset = load_dataset("yuighj123/covid-19-classification")

def preprocess_data(examples):
    images = []
    for img in examples['image']:
        # Convert to numpy array if it's not already
        if not isinstance(img, np.ndarray):
            img = np.array(img)

        # Ensure 3D shape (height, width, channels)
        if len(img.shape) == 2:
            img = np.expand_dims(img, axis=-1)

        # Ensure consistent data type
        img = img.astype(np.float32) / 255.0  # Normalize to [0, 1]

        images.append(img)

    labels = examples['label']
    return {'images': images, 'labels': labels}

# Apply the preprocessing
dataset = dataset.map(preprocess_data, batched=True, batch_size=32)

# Split the dataset
train_dataset = dataset['train'].shuffle(1000)
test_dataset = dataset['test']

In [None]:
# Prepare the data
def preprocess_data(example):
    image = np.array(example['image'])  # Convert PIL Image to numpy array
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    return image, example['label']

# Convert to TensorFlow dataset
def create_tf_dataset(hf_dataset, batch_size=32):
    tf_dataset = tf.data.Dataset.from_tensor_slices(hf_dataset)
    tf_dataset = tf_dataset.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE)
    tf_dataset = tf_dataset.batch(batch_size)
    return tf_dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
# Create train and test datasets
train_dataset = create_tf_dataset(dataset['train'])
test_dataset = create_tf_dataset(dataset['test'])

In [None]:
# Create an image data generator with augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    fill_mode='nearest'
)

# Fit the data generator on the training data
datagen.fit(x_train)


In [None]:
# Define the model-building function
def build_model(hp):
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(
        filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=32),
        kernel_size=hp.Choice('conv_1_kernel', values=[3, 5]),
        activation='relu',
        input_shape=(32, 32, 3)
    ))
    model.add(keras.layers.MaxPooling2D((2, 2)))

    for i in range(hp.Int('n_conv_layers', 1, 3)):
        model.add(keras.layers.Conv2D(
            filters=hp.Int(f'conv_{i+2}_filter', min_value=32, max_value=128, step=32),
            kernel_size=hp.Choice(f'conv_{i+2}_kernel', values=[3, 5]),
            activation='relu'
        ))
        model.add(keras.layers.MaxPooling2D((2, 2)))

    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(
        units=hp.Int('dense_1_units', min_value=32, max_value=128, step=32),
        activation='relu'
    ))
    model.add(keras.layers.Dense(10, activation='softmax'))

    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
        ),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [None]:
# Set up the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='my_dir',
    project_name='covid19_tuning'
)

# Perform hyperparameter search
tuner.search(train_dataset, epochs=10, validation_data=test_dataset)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Train the best model
history = best_model.fit(train_dataset, epochs=50, validation_data=test_dataset)

# Evaluate the model
test_loss, test_acc = best_model.evaluate(test_dataset)
print(f'Test accuracy: {test_acc:.3f}')

In [None]:
# Plot accuracy curves
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

Run the code below if the above code takes too long to run

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers

import matplotlib.pyplot as plt

In [None]:
#Mount your google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
TestDataGenerator = ImageDataGenerator(rescale = 1/255)

TrainDataGenerator = ImageDataGenerator(rescale = 1/255,
                                        zoom_range = 0.15,
                                        rotation_range = 12,
                                        width_shift_range=0.05,
                                        height_shift_range=0.05)

In [None]:
train_data_dir = '/content/drive/MyDrive/AnomalyGPT/Covid19-dataset/train'
test_data_dir = '/content/drive/MyDrive/AnomalyGPT/Covid19-dataset/test'

train_data = TestDataGenerator.flow_from_directory(train_data_dir,
                                                target_size = (512, 512),
                                                batch_size = 16,
                                                class_mode = "categorical",
                                                color_mode = 'grayscale')

test_data = TrainDataGenerator.flow_from_directory(test_data_dir,
                                                target_size = (512, 512),
                                                batch_size = 2,
                                                class_mode = "categorical",
                                                color_mode = 'grayscale')

In [None]:
model = Sequential()

#Input layer
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(512, 512, 1)))

model.add(layers.Conv2D(filters = 16,
                kernel_size = (5, 5),
                activation = "relu"))

model.add(layers.MaxPooling2D(pool_size = (2, 2)))

model.add(layers.Conv2D(filters = 16,
                kernel_size = (3, 3),
                activation = "relu"))

model.add(layers.MaxPooling2D(pool_size = (2, 2)))

model.add(layers.Conv2D(filters = 16,
                kernel_size = (3, 3),
                activation = "relu"))

model.add(layers.MaxPooling2D(pool_size = (2, 2)))

model.add(layers.Conv2D(4, 3, strides=1, activation="relu"))

model.add(layers.Flatten())

model.add(layers.Dense(16, activation = 'relu'))
model.add(layers.Dense(16, activation = 'relu'))
#Output layer
model.add(layers.Dense(3, activation = "softmax"))

In [None]:
model.summary()
model.compile(optimizer = "Adam",
             loss = "categorical_crossentropy",
             metrics = ["accuracy"])

In [None]:
history = model.fit(train_data, epochs = 10, verbose = 1, validation_data = test_data, validation_split=0.1)

In [None]:
# Plot accuracy curves
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()