In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.python.client import device_lib


2024-10-02 12:18:43.530662: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-02 12:18:43.562105: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-02 12:18:43.570019: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-02 12:18:43.629916: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Check if TensorFlow is using a GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("TensorFlow is using the following GPU(s):")
    for gpu in gpus:
        print(gpu)
else:
    print("No GPU detected for TensorFlow.")


In [None]:
import tensorflow as tf
print(tf.__version__)


In [None]:
import time

# Create a random tensor
matrix1 = tf.random.normal([10000, 10000])
matrix2 = tf.random.normal([10000, 10000])

# Perform matrix multiplication on the GPU
start_time = time.time()
result = tf.matmul(matrix1, matrix2)
print("GPU computation time:", time.time() - start_time)

# Check if the operation was performed on the GPU
print("GPU used:", result.device)


In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Allow TensorFlow to only use a limited amount of memory on the GPU
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [None]:
# List all available devices
devices = device_lib.list_local_devices()
print("Available devices:")
for device in devices:
    print(device.name, device.device_type)


In [None]:
# Paths
data_dir = 'data/training'  # Folder with .tif images
csv_file = 'data/training.csv'  # CSV file with image_id and is_homogeneous


In [None]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 16

In [None]:
df = pd.read_csv(csv_file)

In [None]:
# Strip any leading/trailing spaces from column names
df.columns = df.columns.str.strip()

# Now you can access 'image_id' without the extra space
image_id_values = df['image_id'].values
print(image_id_values)


In [None]:
def load_and_preprocess_image(image_path):
    # Load image with PIL and convert to an array
    img = load_img(image_path, target_size=IMG_SIZE)
    img_array = img_to_array(img)
    # Normalize image pixel values (0-255 -> 0-1)
    img_array = img_array / 255.0
    return img_array

In [None]:
print(df.columns)



In [None]:
# 3. Create lists of image paths and labels
# Assuming image_id values need to be 3 digits with leading zeros
image_paths = [os.path.join(data_dir, f"{str(image_id).zfill(3)}.tif") for image_id in df['image_id']]
labels = df['is_homogenous'].values

In [None]:
image_paths

In [None]:
# 4. Load images and preprocess them
images = np.array([load_and_preprocess_image(image_path) for image_path in image_paths])



In [None]:
# 5. Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)


In [None]:
# 6. Create data generators for augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

In [None]:
val_datagen = ImageDataGenerator()


In [None]:
# 7. Create the data generators
train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCH_SIZE)
val_generator = val_datagen.flow(X_val, y_val, batch_size=BATCH_SIZE)


In [None]:
# 8. Load the pre-trained VGG16 model without the top layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


In [None]:
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# 10. Create the model by adding custom layers on top of the pre-trained base model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # Add dropout for regularization
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

In [None]:
# 11. Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# 12. Train the model
history = model.fit(
    train_generator,
    epochs=10,  # Adjust the number of epochs as needed
    validation_data=val_generator
)

In [None]:
# 13. Evaluate the model on the validation set
val_loss, val_acc = model.evaluate(val_generator)
print(f"Validation Accuracy: {val_acc*100:.2f}%")

In [None]:
model.save('vgg16_homogeneous_classification.h5')


In [None]:
from tensorflow.keras.models import load_model


In [None]:
model1 = load_model('vgg16_homogeneous_classification.h5')  # Path to your saved model


In [None]:
val_loss, val_accuracy = model1.evaluate(X_val, y_val)


In [None]:
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')

In [None]:
predictions = model.predict(X_val)
predicted_labels = (predictions >= 0.5).astype(int)  # Threshold at 0.5 to get binary labels


In [None]:
from sklearn.metrics import classification_report


In [None]:
print(classification_report(y_val, predicted_labels, target_names=['Heterogeneous', 'Homogeneous']))


In [None]:

# Assuming y_val contains the true labels and predicted_labels contains the predicted labels

# Step 1: Calculate n_0 and n_1
n_0 = np.sum(y_val == 0)  # Number of true heterogeneous cells
n_1 = np.sum(y_val == 1)  # Number of true homogeneous cells

# Step 2: Calculate a_0 and a_1
a_0 = np.sum((y_val == 0) & (predicted_labels == 0))  # Correctly predicted as heterogeneous
a_1 = np.sum((y_val == 1) & (predicted_labels == 1))  # Correctly predicted as homogeneous

# Step 3: Calculate the score
if n_0 == 0 or n_1 == 0:
    score = 0  # Handle edge cases where there are no samples of a class
else:
    score = (a_0 * a_1) / (n_0 * n_1)

print(f'Score: {score}')
