In [3]:
import os
import shutil
import random
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models

# Define paths
dataset_folder = 'C:\\Users\\Alexandra\\Documents\\SPRING_2024\\BMEN_689\\Project_2\\Jupyter_Notebook\\ResizedPNG_Phage_EMs\\'  # Update with your dataset folder path

# Function to split dataset into train/validation/test and normalize images
def split_and_normalize_dataset(data_folder, train_size=0.6, val_size=0.2, test_size=0.2):
    for class_folder in os.listdir(data_folder):
        class_path = os.path.join(data_folder, class_folder)
        if os.path.isdir(class_path):
            files = os.listdir(class_path)
            train_files, test_files = train_test_split(files, test_size=val_size+test_size, random_state=42)
            val_files, test_files = train_test_split(test_files, test_size=test_size/(val_size+test_size), random_state=42)

            # Create directories for train, validation, and test sets
            train_folder = os.path.join(data_folder, 'train', class_folder)
            val_folder = os.path.join(data_folder, 'validation', class_folder)
            test_folder = os.path.join(data_folder, 'test', class_folder)
            for folder in [train_folder, val_folder, test_folder]:
                if not os.path.exists(folder):
                    os.makedirs(folder)

            # Copy files to respective folders and normalize
            for file in train_files:
                src_path = os.path.join(class_path, file)
                dst_path = os.path.join(train_folder, file)
                shutil.copy(src_path, dst_path)
                normalize_image(dst_path)
            for file in val_files:
                src_path = os.path.join(class_path, file)
                dst_path = os.path.join(val_folder, file)
                shutil.copy(src_path, dst_path)
                normalize_image(dst_path)
            for file in test_files:
                src_path = os.path.join(class_path, file)
                dst_path = os.path.join(test_folder, file)
                shutil.copy(src_path, dst_path)
                normalize_image(dst_path)

# Function to normalize image
def normalize_image(image_path):
    image = cv2.imread(image_path)
    # Normalize pixel values between 0 and 1
    normalized_image = image.astype('float32') / 255.0
    # Save the normalized image
    cv2.imwrite(image_path, normalized_image)

# Call the function to split the dataset and normalize images
split_and_normalize_dataset(dataset_folder)

# Define the CNN model
def create_large_cnn(input_shape, num_classes):
    model = models.Sequential()

    # Convolutional layers
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(256, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(256, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Flatten layer
    model.add(layers.Flatten())

    # Dense layers
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(0.5))  # Adding dropout for regularization
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))  # Adding dropout for regularization
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))  # Adding dropout for regularization
    model.add(layers.Dense(num_classes, activation='softmax'))  # Softmax for multi-class classification

    return model

# Define the input shape and number of classes
input_shape = (5056, 4224, 3)  # Assuming RGB images
num_classes = 3  # Number of classes in your categorical classification

# Create the CNN model
model = create_large_cnn(input_shape, num_classes)

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',  # Categorical cross-entropy for multi-class classification
              metrics=['accuracy'])

# Print model summary
model.summary()

KeyboardInterrupt: 

In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Assuming your data is structured in subdirectories under 'train', 'validation', and 'test' folders
train_dir = os.path.join(dataset_folder, 'train')
validation_dir = os.path.join(dataset_folder, 'validation')
test_dir = os.path.join(dataset_folder, 'test')

# Since the images have already been normalized and saved, we just need to load them
train_datagen = ImageDataGenerator()
validation_datagen = ImageDataGenerator()

# Note: Adjust batch size and target size as necessary
batch_size = 32
target_size = (5056, 4224)  # Same as the input_shape of the model without the channel dimension

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical')  # Use 'categorical' for multi-class classification

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical')

# Train the model
epochs = 10  # Start with a small number of epochs and adjust based on your dataset size and computational resources
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size)

# Evaluate the model on the test data
test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical')

test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // batch_size)
print(f"Test accuracy: {test_acc*100:.2f}%")





NameError: name 'os' is not defined