1. Data loading and cleaning
2. Object Segmentation
    1. U-Net
    2. YOLO seg

In [4]:
# Download the data from Kaggle and unzip it

import kaggle
kaggle.api.authenticate()

kaggle.api.dataset_download_files('humansintheloop/teeth-segmentation-on-dental-x-ray-images', path='.', unzip=True)

Dataset URL: https://www.kaggle.com/datasets/humansintheloop/teeth-segmentation-on-dental-x-ray-images


In [5]:
import os
import shutil

import json

def create_directories(base_dir):

    os.makedirs(os.path.join(base_dir,'images'), exist_ok=True)         # Images dir
    os.makedirs(os.path.join(base_dir,'labels'), exist_ok=True)         # Labels dir

    subdirs = [
    "images/train", 
    "images/test", 
    "images/valid",
    "labels/train", 
    "labels/test", 
    "labels/valid"]

    # Create directories
    for subdir in subdirs:
        os.makedirs(os.path.join(base_dir, subdir), exist_ok=True)
        
def convert_annotation(label_file, image_w, image_h):
    
    with open(label_file, "r") as f:
        data = json.load(f)

    new_label_file = label_file.replace('.jpg.json', '.txt')

    with open(new_label_file, 'w') as f:

        for obj in data['objects']:

            tooth = obj['classTitle']           # Tooth label
            points = obj['points']['exterior']  # Polygon points
            yolo_seg = []

            yolo_seg.append(tooth)
 
            for x, y in points:
                norm_x = x / image_w
                norm_y = y / image_h
                yolo_seg.append(f"{norm_x:.6f},{norm_y:.6f}")  # Format to 6 decimal places

            # Write the class and normalized polygon points to the file
            f.write(f"{tooth} " + " ".join(yolo_seg) + '\n')


In [6]:
# Create a dataset complient to YOLO format
import os 
from sklearn.model_selection import train_test_split
from PIL import Image

base_dir = 'teeth_segmentation'

train_data, temp_data = train_test_split(os.listdir('Teeth Segmentation PNG/d2/img'), test_size=0.2, random_state=42)
test_data, valid_data = train_test_split(temp_data, test_size=0.5, random_state=42)

train_data = [os.path.join('Teeth Segmentation PNG/d2/img', image) for image in train_data]
test_data = [os.path.join('Teeth Segmentation PNG/d2/img', image) for image in test_data]
valid_data = [os.path.join('Teeth Segmentation PNG/d2/img', image) for image in valid_data]

print(f'Train test split created')
print(f'Train data: {len(train_data)}')
print(f'Test data: {len(test_data)}')
print(f'Valid data: {len(valid_data)}')

create_directories(base_dir)

print(f'Directories created')

for el in train_data:

    img = Image.open(el)
    convert_annotation(el.replace('img', 'ann')+'.json', img.size[0], img.size[1])

    shutil.move(el, os.path.join(base_dir, 'images/train'))
    shutil.move(el.replace('img', 'ann').replace('.jpg', '.txt'), os.path.join(base_dir, 'labels/train'))

print(f'Train data converted')

for el in test_data:

    img = Image.open(el)    
    convert_annotation(el.replace('img','ann')+'.json', img.size[0], img.size[1])

    shutil.move(el, os.path.join(base_dir, 'images/test'))
    shutil.move(el.replace('img', 'ann').replace('.jpg', '.txt'), os.path.join(base_dir, 'labels/test'))

print(f'Test data converted')

for el in valid_data:
    
    img = Image.open(el)    
    convert_annotation(el.replace('img', 'ann')+'.json', img.size[0], img.size[1])

    shutil.move(el, os.path.join(base_dir, 'images/valid'))
    shutil.move(el.replace('img', 'ann').replace('.jpg', '.txt'), os.path.join(base_dir, 'labels/valid'))

print(f'Valid data converted')

Train test split created
Train data: 478
Test data: 60
Valid data: 60
Directories created
Train data converted
Test data converted
Valid data converted


### U-NET

In [None]:
# Create the dataset
from torch.utils.data import Dataset

class TeethDataset(Dataset):
    
    def __init__(self, base_dir, transform=None):
        
        self.base_dir = base_dir
        self.transform = transform

        self.images_dir = os.path.join(base_dir, 'images')
        self.labels_dir = os.path.join(base_dir, 'labels')

        self.images = os.listdir(self.images_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):

        img_path = os.path.join(self.images_dir, self.images[idx])
        label_path = os.path.join(self.labels_dir, self.images[idx].replace('.jpg', '.txt'))

        image = Image.open(img_path)
        label = open(label_path, 'r').read().strip()

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# Create the datasets
from torchvision import transforms
import matplotlib.pyplot as plt

transform = transforms.Compose([
    transforms.Resize((416, 416)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = TeethDataset(base_dir, transform=transform)
test_dataset = TeethDataset(base_dir, transform=transform)
valid_dataset = TeethDataset(base_dir, transform=transform)

# PLot a sample image with the 
image, label = train_dataset[0]

fig = plt.figure(figsize=(10, 10))
plt.imshow(image[0].permute(1, 2, 0))
plt.axis('off')

for seg_mask in label:

    seg_mask = seg_mask.split(' ')
    tooth = seg_mask[0]
    points = seg_mask[1:]

    points = [float(point) for point in points]

    x = points[::2]
    y = points[1::2]

    plt.fill(x, y, edgecolor='r', fill=False)

plt.show()

In [None]:
# Implement the architecture of a base U-net
import torch.nn as nn

class UNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UNet, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.middle = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.decoder = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 64, kernel_size=2, stride=2),
            nn.Conv2d(64, out_channels, kernel_size=1)
        )

    def forward(self, x):
        x1 = self.encoder(x)
        x2 = self.middle(x1)
        x = self.decoder(x2)
        return x


In [1]:
# IMplement IuO to compute the goodness of the model prediction

def IoU(pred, target):
    pred = pred.view(-1)
    target = target.view(-1)

    intersection = (pred * target).sum()
    union = pred.sum() + target.sum() - intersection

    return intersection / (union + 1e-6) # Add a small value to avoid division by zero

In [None]:
# Implement combined BCE + Dice loss
from torch import nn

class DiceBCELoss(nn.Module):
    def __init__(self, image_size):
        super(DiceBCELoss, self).__init__()
        
        #self.bce_loss = nn.BCEWithLogitsLoss()
        self.bce_loss = nn.BCELoss()
        self.img_size = image_size

    def compute_loss(self, predictions, targets):
        
        for pred, target in zip(predictions, targets):

            pred_class = pred[0]
            pred_mask = pred[1:]

            target_class = target[0]
            target_mask = target[1:]

            





            intersection = (pred * target).sum()
            union = pred.sum() + target.sum() - intersection

            dice_loss = 1 - (2 * intersection) / (union + 1e-6)
            bce_loss = self.bce_loss(pred, target)

            return bce_loss + dice_loss
        tooth_class = 


In [None]:
# Implement the fit loop

def train_fn(model, train_loader, optimizer, criterion, device, phase):
    
    if phase == 'train':
        model.train()
        running_loss = 0.0
    else:
        model.eval()

    runnign_loss = 0.0
    running_IoU = 0.0
    
    for images, labels in train_loader:
        
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        if phase == 'train':
            loss.backward()
            optimizer.step()
        
        runnign_loss += loss.item()
        running_IoU += IoU(model(images), tooth_mask)


    return runnign_loss / len(train_loader), running_IoU / len(train_loader)

In [None]:
# Implement the main training loop

import torch

device = 'cuda:0'
batch_size = 32
num_workers = 8
lr = 1e-3
num_epochs = 10

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

model = UNet(3, 1).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

loss_train = []
loss_test = []


for epoch in range(num_epochs):
    
    train_loss = train_fn(model, train_loader, optimizer, criterion, device, 'train')
    test_loss = train_fn(model, test_loader, optimizer, criterion, device, 'test')

    print(f"Epoch: {epoch}, Train Loss: {train_loss}, Test Loss: {test_loss}")

    loss_train.append(train_loss)
    loss_test.append(test_loss)






