In [None]:
#Import Packages

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical
import os 
import json
import pandas as pd
import shutil
import tensorflow as tf
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.applications import VGG16
import matplotlib.pyplot as plt

### Clean and Process Data

In [None]:

annotations_file = 'path\to\annotations'
with open(annotations_file, 'r') as file:
    annotations = json.load(file)

# Parse annotations
data_directory = r'\path\to\directory'
data = []
for filename, file_data in annotations.items():

    image_path = data_directory+'\\'+filename.split('.')[0]+'.png'
    image = Image.open(image_path)
    for region in file_data['regions']:

        shape_attrs = region['shape_attributes']
        region_attrs = region['region_attributes']
        temp = {
            'filename': filename.split('.')[0]+'.png',
            'name': shape_attrs['name'],
            'width': image.width,
            'height': image.height,
            'all_points_x': shape_attrs['all_points_x'],
            'all_points_y': shape_attrs['all_points_y'],
            'Type': region_attrs['Type']
        }

        #Don't include duplicates
        if temp not in data:
            data.append({
                'filename': filename.split('.')[0]+'.png',
                'name': shape_attrs['name'],
                'width': image.width,
                'height': image.height,
                'all_points_x': shape_attrs['all_points_x'],
                'all_points_y': shape_attrs['all_points_y'],
                'Type': region_attrs['Type']
            })

In [None]:
def extract_regions(data, images_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for img in data:
        
        filename = img['filename']
        image_path = os.path.join(images_dir, filename)
        image = cv2.imread(image_path)
        if image is None:
            print(f"Image {filename} not found.")
            continue

        
        label = img['Type']
        all_points_x = img['all_points_x']
        all_points_y = img['all_points_y']
        points = np.array(list(zip(all_points_x, all_points_y)), dtype=np.int32)

        # Create a mask
        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        cv2.fillPoly(mask, [points], 255)

        # Extract the region using the mask
        masked_image = cv2.bitwise_and(image, image, mask=mask)

        # Find bounding box and crop region
        x, y, w, h = cv2.boundingRect(points)
        cropped_image = masked_image[y:y+h, x:x+w]
        
        # Save the extracted region
        label_dir = os.path.join(output_dir, label)
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)

        roi_filename = os.path.join(label_dir, f"{filename}_{x}_{y}.png")
        cv2.imwrite(roi_filename, cropped_image)
        print(f"Saved ROI to {roi_filename}")

# Directory paths
images_dir = 'path\to\directory'
output_dir = 'path\to\output'

extract_regions(data, images_dir, output_dir)

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define paths
dataset_dir = 'path\to\dataset'
output_dir = 'path\to\train_test_val'
train_dir = os.path.join(output_dir, 'train')
val_dir = os.path.join(output_dir, 'val')
test_dir = os.path.join(output_dir, 'test')

# Create output directories if they do not exist
for dir_path in [train_dir, val_dir, test_dir]:
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# Function to split and copy data
def split_and_copy_data(class_dir, train_dir, val_dir, test_dir, val_size=0.2, test_size=0.1):
    # Get list of all images in the class directory
    images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
    
    # Split data
    train_images, temp_images = train_test_split(images, test_size=(val_size + test_size))
    val_images, test_images = train_test_split(temp_images, test_size=test_size / (val_size + test_size))

    # Define class name
    class_name = os.path.basename(class_dir)
    
    # Create class directories in train, val, test directories
    for split_dir in [train_dir, val_dir, test_dir]:
        class_split_dir = os.path.join(split_dir, class_name)
        if not os.path.exists(class_split_dir):
            os.makedirs(class_split_dir)
    
    # Copy images to respective directories
    for image in train_images:
        shutil.copy(os.path.join(class_dir, image), os.path.join(train_dir, class_name, image))
    for image in val_images:
        shutil.copy(os.path.join(class_dir, image), os.path.join(val_dir, class_name, image))
    for image in test_images:
        shutil.copy(os.path.join(class_dir, image), os.path.join(test_dir, class_name, image))

# Iterate over each class directory and split data
for class_name in os.listdir(dataset_dir):
    class_dir = os.path.join(dataset_dir, class_name)
    if os.path.isdir(class_dir):
        split_and_copy_data(class_dir, train_dir, val_dir, test_dir)




### Train Model (VGG 16)

In [None]:

# Directories
train_dir = 'path\to\train_test_val\train\'
validation_dir = 'path\to\train_test_val\val\'

# Image Data Generator with Data Augmentation
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')
validation_datagen = ImageDataGenerator(rescale=1./255)

#Create generators (adjust size if needed)
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(64, 64), batch_size=32, class_mode='categorical')
validation_generator = validation_datagen.flow_from_directory(validation_dir, target_size=(64, 64), batch_size=32, class_mode='categorical')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# Model Building
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Training
history = model.fit(train_generator, epochs=50, validation_data=validation_generator,  callbacks=[reduce_lr, early_stopping])

# Save the model
model.save('model_name.keras')

In [None]:
test_dir = 'path\to\train_test_val\test\'

# Create an ImageDataGenerator for the test set (no augmentation needed for testing) 
test_datagen = ImageDataGenerator(rescale=0.255)
# Define sizes (adjust if needed)
image_height=64
batch_size=32
image_width=64
# Create a test generator
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(image_height, image_width),  # Specify the same size as used during training
    batch_size=batch_size,
    class_mode='categorical', 
    shuffle=False 
)
# Predict the class probabilities for the test set
predictions = model.predict(test_generator, steps=test_generator.samples // batch_size + 1)

# Get the true and predicted labels
true_labels = test_generator.classes
predicted_labels = np.argmax(predictions, axis=1)

# Print the classification report
print(classification_report(true_labels, predicted_labels, target_names=test_generator.class_indices.keys()))

# Print the confusion matrix
print(confusion_matrix(true_labels, predicted_labels))

### Predict Other Images 

In [None]:
model_path = 'model_name.keras'
model = load_model(model_path)

In [None]:
test_dir = 'path\to\train_test_val\test\'

# Create an ImageDataGenerator for the test set (no augmentation needed for testing)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Create a test generator (adjust size if needed)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(64,64),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

class_labels = {v: k for k, v in test_generator.class_indices.items()}

In [None]:
image_folder = 'path\to\images'
results = []
for img_name in os.listdir(image_folder):
    img_path = os.path.join(image_folder, img_name)
    if os.path.isfile(img_path):
        # Load and preprocess the image
        img_array = load_and_preprocess_image(img_path, target_size)
        
        # Predict the class probabilities
        predictions = model.predict(img_array)
        
        # Get the predicted class index
        predicted_class_index = np.argmax(predictions, axis=1)[0]
        
        # Get the predicted class label
        predicted_class_label = class_labels[predicted_class_index]
        
        # Store the result
        results.append((img_name, predicted_class_label))

        print(f"Image: {img_name}, Predicted class: {predicted_class_label}")