# Final project FDS

Bone fracture detection on x-ray images using a CNN.

Group members: Aiman Nadeem, Thale Krohn-Pettersen, Kirsten Nord.

## Table of contents
1. [Setup and data preprocessing](#setup)
2. [Model definition](#model)
3. [Training and validation](#training)
4. [Analysis](#analysis)

## Setup and data preprocessing <a name="setup"></a>

- Installing required packages
- Importing necessary libraries
- Downloading dataset and re-splitting the dataset into 80% training, 10% validation and 10% testing.

In [14]:
if True:
    #%pip install kagglehub scikit-learn
    #%pip install tensorflow
    #%pip install pillow
    %pip install matplotlib

Collecting matplotlib
  Using cached matplotlib-3.7.5-cp38-cp38-macosx_11_0_arm64.whl.metadata (5.7 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.1.1-cp38-cp38-macosx_11_0_arm64.whl.metadata (5.9 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.55.1-cp38-cp38-macosx_10_9_universal2.whl.metadata (164 kB)
Collecting kiwisolver>=1.0.1 (from matplotlib)
  Downloading kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl.metadata (6.3 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.1.4-py3-none-any.whl.metadata (5.1 kB)
Collecting importlib-resources>=3.2.0 (from matplotlib)
  Using cached importlib_resources-6.4.5-py3-none-any.whl.metadata (4.0 kB)
Using cached matplotlib-3.7.5-cp38-cp38-macosx_11_0_arm64.whl (7.3 MB)
Using cached contourpy-1.1.1-cp38-cp38-macosx_11_0_arm64.whl (232 kB)
Using cached cycle

In [15]:
# Import the required libraries
import os
import random
import shutil
import kagglehub
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from tensorflow.keras.metrics import Precision, Recall

In [16]:
# Download the dataset from Kaggle
dataset_path = kagglehub.dataset_download("osamajalilhassan/bone-fracture-dataset")
print(f"Dataset downloaded to {dataset_path}")

Dataset downloaded to /Users/thalekp/.cache/kagglehub/datasets/osamajalilhassan/bone-fracture-dataset/versions/1


In [17]:
def is_image_file(filename):
    return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'))

def split_dataset(dataset_path, output_path, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1):
    os.makedirs(output_path, exist_ok=True)  # Ensure output directory exists

    splits = ['training', 'testing']
    categories = ['fractured', 'not_fractured']

    for split in splits:
        for category in categories:
            class_dir = os.path.join(dataset_path, split, category)
            
            # Get all image files in the category
            images = [f for f in os.listdir(class_dir) if is_image_file(f)]
            random.shuffle(images)

            # Calculate split sizes
            train_end = int(len(images) * train_ratio)
            val_end = train_end + int(len(images) * val_ratio)

            train_images = images[:train_end]
            val_images = images[train_end:val_end]
            test_images = images[val_end:]

            # Create subdirectories for train, val, test
            for split_name, split_images in zip(
                ['train', 'val', 'test'], [train_images, val_images, test_images]
            ):
                split_dir = os.path.join(output_path, split_name, category)
                os.makedirs(split_dir, exist_ok=True)

                # Copy images to respective split folder
                for image in split_images:
                    src = os.path.join(class_dir, image)
                    dst = os.path.join(split_dir, image)
                    try:
                        shutil.copy(src, dst)
                    except PermissionError as e:
                        print(f"Skipping {src} due to permission error: {e}")
                        
    print(f"Dataset split into train, validation, and test sets at {output_path}")

# Perform the dataset split
dataset_path = dataset_path + "/BoneFractureDataset"
output_path = "data"
split_dataset(dataset_path, output_path)

Dataset split into train, validation, and test sets at data


In [18]:
def count_images_in_split(split_path):
    # Count the number of image files in a given split directory (train, val, or test)
    total_images = 0
    for category in ['fractured', 'not_fractured']:
        category_path = os.path.join(split_path, category)
        if os.path.exists(category_path):
            category_images = os.listdir(category_path)
            total_images += len(category_images)
    return total_images

def verify_split(output_path):
    # Verify the directory structure and the number of images in each split
    splits = ['train', 'val', 'test']
    print("Verifying dataset split...")

    for split in splits:
        split_path = os.path.join(output_path, split)
        if os.path.exists(split_path):
            print(f"Number of images in {split} split:")
            num_images = count_images_in_split(split_path)
            print(f"  Total images in {split}: {num_images}")
        else:
            print(f"  No {split} directory found!")

# Perform the verification
verify_split(output_path)

Verifying dataset split...
Number of images in train split:
  Total images in train: 9461
Number of images in val split:
  Total images in val: 3871
Number of images in test split:
  Total images in test: 3904


## Model definition <a name="model"></a>
...

In [19]:
# Code here
base_dir = output_path
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

In [20]:
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize pixel values to [0, 1]
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_test_datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),  # Resize images to 150x150
    batch_size=32,
    class_mode='binary'
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)


Found 9461 images belonging to 2 classes.
Found 3871 images belonging to 2 classes.
Found 3904 images belonging to 2 classes.


In [21]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification: fractured or not
])


In [22]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(), Recall()]
)


## Training and validation <a name="training"></a>
...

In [None]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)




In [24]:
model.save('fracture_classifier.h5')

  saving_api.save_model(


## Analysis <a name="analysis"></a>
...

In [26]:
loss, acc, precision, recall  = model.evaluate(test_generator)
f1_score = 2 * (precision * recall) / (precision + recall + 1e-7)
print(f"Accuracy: {acc:.2f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")

Accuracy: 0.61
Precision: 0.5837
Recall: 0.6977
F1 Score: 0.6356


In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
