<a href="https://colab.research.google.com/github/cadenlpicard/bacterial_morphology_classification/blob/main/Bacterial_Morphology_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow keras numpy opencv-python


In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, classification_report

# Define paths
train_dir = '/content/drive/MyDrive/Machine Learning/Competitions/bacteria_classification/split_dataset/split_dataset/train'
val_dir = '/content/drive/MyDrive/Machine Learning/Competitions/bacteria_classification/split_dataset/split_dataset/validation'
test_dir = '/content/drive/MyDrive/Machine Learning/Competitions/bacteria_classification/split_dataset/split_dataset/test'
test_filenames_path = '/content/drive/MyDrive/Machine Learning/Competitions/bacteria_classification/split_dataset/test_filenames.txt'

# Labels
LABEL_MAP = {'cocci': 0, 'bacilli': 1, 'spirilla': 2}

# Step 1: Generate training_labels.txt based on the folder structure
training_labels_path = '/content/drive/MyDrive/Machine Learning/Competitions/bacteria_classification/training_labels.txt'
with open(training_labels_path, 'w') as f:
    for class_name, label in LABEL_MAP.items():
        class_dir = os.path.join(train_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith(('.jpg', '.png')):
                f.write(f"{filename},{label}\n")
print(f"Training labels saved to {training_labels_path}")

# Model parameters
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 10

# Load data with ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1.0/255)
val_datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# Model definition
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Training the model
print("Starting Training...")
model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=EPOCHS
)

# Save model weights
model_save_path = '/content/drive/MyDrive/Machine Learning/Competitions/bacteria_classification/bacteria_classification_model.h5'
model.save(model_save_path)
print(f"Model saved to {model_save_path}")

# Step 2: Predict labels on test set
# Load test filenames
with open(test_filenames_path, 'r') as f:
    test_filenames = [line.strip() for line in f.readlines()]

# Prepare predictions list
predictions = []

print("Predicting on Test Set...")
for filename in tqdm(test_filenames, desc="Predicting"):
    img_path = os.path.join(test_dir, filename)
    img = load_img(img_path, target_size=IMG_SIZE)
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    # Predict
    pred = model.predict(img_array)
    predicted_class = np.argmax(pred, axis=1)[0]

    # Append predicted label (0, 1, or 2)
    predictions.append(predicted_class)

# Save predictions to CSV file
predictions_save_path = '/content/drive/MyDrive/Machine Learning/Competitions/bacteria_classification/predictions.csv'
predictions_data = list(zip(test_filenames, predictions))  # Combine filenames and predictions

# Create a DataFrame and save as CSV
predictions_df = pd.DataFrame(predictions_data, columns=['filename', 'label'])
predictions_df.to_csv(predictions_save_path, index=False)

print(f"Predictions saved to {predictions_save_path}")

# Step 3: Evaluate the model on the validation set
print("Evaluating on Validation Set...")
val_labels = validation_generator.classes  # True labels for the validation set
val_predictions = model.predict(validation_generator)
val_pred_classes = np.argmax(val_predictions, axis=1)  # Predicted labels

# Calculate accuracy and display classification report
val_accuracy = accuracy_score(val_labels, val_pred_classes)
classification_rep = classification_report(val_labels, val_pred_classes, target_names=LABEL_MAP.keys())
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print("Classification Report:\n", classification_rep)
