# Final project FDS

Bone fracture detection on x-ray images using a CNN.

Group members: Aiman Nadeem, Thale Krohn-Pettersen, Kirsten Nord.

## Table of contents
1. [Setup and data preprocessing](#setup)
2. [Model definition](#model)
3. [Training and validation](#training)
4. [Analysis](#analysis)

## Setup and data preprocessing <a name="setup"></a>

- Installing required packages
- Importing necessary libraries
- Downloading dataset and re-splitting the dataset into 80% training, 10% validation and 10% testing.

In [1]:
if True:
    %pip install kagglehub scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import the required libraries
import os
import random
import shutil
import kagglehub

In [3]:
# Download the dataset from Kaggle
dataset_path = kagglehub.dataset_download("osamajalilhassan/bone-fracture-dataset")
print(f"Dataset downloaded to {dataset_path}")

Dataset downloaded to C:\Users\aiman\.cache\kagglehub\datasets\osamajalilhassan\bone-fracture-dataset\versions\1


In [None]:
def is_image_file(filename):
    return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'))

def split_dataset(dataset_path, output_path, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1):
    os.makedirs(output_path, exist_ok=True)  # Ensure output directory exists

    splits = ['training', 'testing']
    categories = ['fractured', 'not_fractured']

    for split in splits:
        for category in categories:
            class_dir = os.path.join(dataset_path, split, category)
            
            # Get all image files in the category
            images = [f for f in os.listdir(class_dir) if is_image_file(f)]
            random.shuffle(images)

            # Calculate split sizes
            train_end = int(len(images) * train_ratio)
            val_end = train_end + int(len(images) * val_ratio)

            train_images = images[:train_end]
            val_images = images[train_end:val_end]
            test_images = images[val_end:]

            # Create subdirectories for train, val, test
            for split_name, split_images in zip(
                ['train', 'val', 'test'], [train_images, val_images, test_images]
            ):
                split_dir = os.path.join(output_path, split_name, category)
                os.makedirs(split_dir, exist_ok=True)

                # Copy images to respective split folder
                for image in split_images:
                    src = os.path.join(class_dir, image)
                    dst = os.path.join(split_dir, image)
                    try:
                        shutil.copy(src, dst)
                    except PermissionError as e:
                        print(f"Skipping {src} due to permission error: {e}")
                        
    print(f"Dataset split into train, validation, and test sets at {output_path}")

# Perform the dataset split
dataset_path = dataset_path + "/BoneFractureDataset"
output_path = "data"
split_dataset(dataset_path, output_path)

In [None]:
def count_images_in_split(split_path):
    # Count the number of image files in a given split directory (train, val, or test)
    total_images = 0
    for category in ['fractured', 'not_fractured']:
        category_path = os.path.join(split_path, category)
        if os.path.exists(category_path):
            category_images = os.listdir(category_path)
            total_images += len(category_images)
    return total_images

def verify_split(output_path):
    # Verify the directory structure and the number of images in each split
    splits = ['train', 'val', 'test']
    print("Verifying dataset split...")

    for split in splits:
        split_path = os.path.join(output_path, split)
        if os.path.exists(split_path):
            print(f"Number of images in {split} split:")
            num_images = count_images_in_split(split_path)
            print(f"  Total images in {split}: {num_images}")
        else:
            print(f"  No {split} directory found!")

# Perform the verification
verify_split(output_path)

Verifying dataset split...
Number of images in train split:
  Total images in train: 7570
Number of images in val split:
  Total images in val: 946
Number of images in test split:
  Total images in test: 947


## Model definition <a name="model"></a>
...

In [None]:
# Code here

## Training and validation <a name="training"></a>
...

In [None]:
# Code here

## Analysis <a name="analysis"></a>
...

In [None]:
# Code here