<a href="https://colab.research.google.com/github/amitgal21/Final_Project/blob/main/Predict_Bacteria_Type.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The purpose of this part of the code is bacterial type prediction using artificial intelligence. This operation does not involve the use of segmentation images, as we are conducting a prediction process here rather than analyzing image statistics.

This code demonstrates the process of preparing and training an image classification model using TensorFlow and Keras, with the VGG16 architecture as the base. Initially, it loads and processes images from a given directory, resizing each image to a standard size and normalizing them (scaling the pixel values of each image to a range between 0 and 1). The labels for the images are derived from the names of the folders containing them and are then converted to a numeric format using LabelEncoder and subsequently translated into a one-hot format to fit the model's requirements for classification. Additionally, the code performs a split of the data into training and validation sets and creates a data augmentation configuration to increase the diversity of the training data and prevent overfitting.

After preparing the data, the code defines a model with VGG16 as the non-trainable base layer (meaning its weights remain fixed) and adds a new classification head comprising dense layers, dropout layers to prevent overfitting, and an output layer with a softmax activation function intended for categorizing the images into various classes. The model is compiled with the Adam optimizer and categorical cross-entropy loss, typical for multi-class classification problems. Finally, the model is trained using a data flow generated by ImageDataGenerator, iterating over the previously defined training and validation sets with the augmentation configuration, and ultimately saved for future use.








In [None]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import json
from sklearn.model_selection import train_test_split

def load_images_and_labels(data_dir, target_size=(224, 224)):
    images = []
    labels = []
    for subdir, dirs, files in os.walk(data_dir):
        for file in files:
            if file.lower().endswith('.tif') and '_segmentation' not in file.lower():
                filepath = os.path.join(subdir, file)
                img = tf.keras.preprocessing.image.load_img(filepath, target_size=target_size)
                img = tf.keras.preprocessing.image.img_to_array(img)
                img /= 255.0  # Normalize images
                images.append(img)
                label = os.path.basename(subdir)
                labels.append(label)
    return np.array(images), labels

data_dir = '/content/drive/MyDrive/Part_B/Datset3/Learn'
images, labels = load_images_and_labels(data_dir)

le = LabelEncoder()
labels_enc = le.fit_transform(labels)
labels_enc = to_categorical(labels_enc, num_classes=len(le.classes_))

class_indices = {class_label: index for index, class_label in enumerate(le.classes_)}
class_indices_file_path = '/content/drive/MyDrive/Part_B/Trained_Models/class_indices.json'
with open(class_indices_file_path, 'w') as file:
    json.dump(class_indices, file)

images_train, images_val, labels_train, labels_val = train_test_split(images, labels_enc, test_size=0.2, random_state=42)

train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator()

train_generator = train_datagen.flow(images_train, labels_train, batch_size=32)
validation_generator = val_datagen.flow(images_val, labels_val, batch_size=32)

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base model

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(len(le.classes_), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

model.fit(
    train_generator,
    steps_per_epoch=len(images_train) // 32,
    validation_data=validation_generator,
    validation_steps=len(images_val) // 32,
    epochs=20)  # Increased number of epochs

model_save_path = '/content/drive/MyDrive/Part_B/Trained_Models/vgg16_model_improved.h5'
model.save(model_save_path)

print("Model saved successfully at", model_save_path)


In [None]:
import os
import numpy as np
import json
from keras.models import load_model
from keras.preprocessing.image import load_img, img_to_array

def preprocess_image(image, target_size=(224, 224)):
    img = load_img(image, target_size=target_size)
    img = np.array(img)
    if img.ndim == 2:
        img = np.stack((img,) * 3, axis=-1)
    img = img / 255.0
    img = np.expand_dims(img, axis=0)
    return img

# Load the trained model
model_path = '/content/drive/MyDrive/Part_B/Trained_Models/vgg16_model_improved.h5'
model = load_model(model_path)

# Load class indices
class_indices_file_path = '/content/drive/MyDrive/Part_B/Trained_Models/class_indices.json'
with open(class_indices_file_path, 'r') as file:
    class_indices = json.load(file)
index_to_class = {v: k for k, v in class_indices.items()}

root_path = '/content/drive/MyDrive/Part_B/Datset3/dataset4'

# Iterate over all directories and subdirectories
for subdir, dirs, files in os.walk(root_path):
    for filename in files:
        if filename.endswith('.tif'):
            image_path = os.path.join(subdir, filename)
            new_image = preprocess_image(image_path)
            predictions = model.predict(new_image)
            predicted_class_index = np.argmax(predictions, axis=1)[0]
            predicted_class_name = index_to_class[predicted_class_index]
            print(f"File: {filename}, Predicted class: {predicted_class_name} in folder {os.path.basename(subdir)}")
