In [7]:
import pandas as pd
import os

# Load the annotations CSV file
annotations = pd.read_csv('train/_annotations.csv')

# Directory containing the filtered images
filtered_images_dir = 'train/filtered_images'

# Get the list of filenames in the filtered_images directory
filtered_images = [f for f in os.listdir(filtered_images_dir) if os.path.isfile(os.path.join(filtered_images_dir, f))]

# Filter the annotations based on filenames
filtered_annotations = annotations[annotations['filename'].isin(filtered_images)]

# Map the class labels
class_mapping = {
    'Class 0- No endodontic treatment': 0,
    'Class 1- complete endodontic treatment': 1,
    'Class 2- incomplete endodontic treatment': 1,
    'cavity': 0,
}

# Apply the class mapping
filtered_annotations['class'] = filtered_annotations['class'].map(class_mapping)

# Remove entries where class == 0
filtered_annotations = filtered_annotations[filtered_annotations['class'] != 0]

# Save the filtered and updated annotations to a new CSV file
filtered_annotations.to_csv('filtered_annotations.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_annotations['class'] = filtered_annotations['class'].map(class_mapping)


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pandas as pd
import numpy as np
import cv2
import os
from PIL import Image
from sklearn.model_selection import train_test_split

class DentalDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])
        image = cv2.imread(img_name, cv2.IMREAD_GRAYSCALE)
        
        # Create mask from bounding box
        mask = np.zeros((640, 640), dtype=np.float32)
        if self.annotations.iloc[idx, 3] == 1:  # if treated tooth
            xmin = self.annotations.iloc[idx, 4]
            ymin = self.annotations.iloc[idx, 5]
            xmax = self.annotations.iloc[idx, 6]
            ymax = self.annotations.iloc[idx, 7]
            mask[ymin:ymax, xmin:xmax] = 1.0

        # Convert to PIL Images
        image = Image.fromarray(image)
        mask = Image.fromarray(mask)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        
        # Encoder
        self.enc1 = self._double_conv(1, 64)
        self.enc2 = self._double_conv(64, 128)
        self.enc3 = self._double_conv(128, 256)
        self.enc4 = self._double_conv(256, 512)
        
        # Decoder
        self.dec1 = self._double_conv(512 + 256, 256)
        self.dec2 = self._double_conv(256 + 128, 128)
        self.dec3 = self._double_conv(128 + 64, 64)
        self.final = nn.Conv2d(64, 1, kernel_size=1)
        
        self.pool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def _double_conv(self, in_ch, out_ch):
        return nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        # Encoder
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool(e1))
        e3 = self.enc3(self.pool(e2))
        e4 = self.enc4(self.pool(e3))
        
        # Decoder
        d1 = self.dec1(torch.cat([self.upsample(e4), e3], dim=1))
        d2 = self.dec2(torch.cat([self.upsample(d1), e2], dim=1))
        d3 = self.dec3(torch.cat([self.upsample(d2), e1], dim=1))
        
        return torch.sigmoid(self.final(d3))

def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs):
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        for images, masks in train_loader:
            images = images.to(device)
            masks = masks.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, masks in val_loader:
                images = images.to(device)
                masks = masks.to(device)
                outputs = model(images)
                val_loss += criterion(outputs, masks).item()
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Training Loss: {train_loss:.4f}')
        print(f'Validation Loss: {val_loss:.4f}')
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')

def segment_and_save(model, test_loader, output_dir, device):
    model.eval()
    os.makedirs(output_dir, exist_ok=True)
    
    with torch.no_grad():
        for idx, (images, _) in enumerate(test_loader):
            images = images.to(device)
            outputs = model(images)
            
            # Convert predictions to binary masks
            predictions = (outputs > 0.5).float()
            
            # Save segmented images
            for i in range(predictions.shape[0]):
                mask = predictions[i].cpu().numpy().squeeze()
                mask = (mask * 255).astype(np.uint8)
                cv2.imwrite(os.path.join(output_dir, f'segmented_{idx}_{i}.png'), mask)

def main():
    # Hyperparameters
    BATCH_SIZE = 4
    LEARNING_RATE = 0.001
    NUM_EPOCHS = 50
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Transforms
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    
    # Create datasets
    dataset = DentalDataset(
        csv_file='filtered_annotations.csv',
        img_dir='train/filtered_images',
        transform=transform
    )
    
    # Split dataset
    train_size = int(0.7 * len(dataset))
    val_size = int(0.15 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size, test_size]
    )
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
    
    # Initialize model, criterion, and optimizer
    model = UNet().to(DEVICE)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    
    # Train model
    train_model(model, train_loader, val_loader, criterion, optimizer, DEVICE, NUM_EPOCHS)
    
    # Load best model and perform segmentation
    model.load_state_dict(torch.load('best_model.pth'))
    segment_and_save(model, test_loader, 'segmented_output', DEVICE)

if __name__ == '__main__':
    main()

Epoch 1/50:
Training Loss: 0.5016
Validation Loss: 0.5415


KeyboardInterrupt: 

In [8]:
import pandas as pd
import os
import cv2

# Load the filtered annotations CSV file
annotations = pd.read_csv('filtered_annotations.csv')

# Directory containing the filtered images
filtered_images_dir = 'train/filtered_images'

# Directory to save images with bounding boxes
output_dir = 'output_images'
os.makedirs(output_dir, exist_ok=True)

# Iterate over each annotation
for idx, row in annotations.iterrows():
    img_name = row['filename']
    img_path = os.path.join(filtered_images_dir, img_name)
    
    # Load the image
    image = cv2.imread(img_path)
    
    # Check if the image was loaded successfully
    if image is not None:
        # Get bounding box coordinates
        x_min = int(row['xmin'])
        y_min = int(row['ymin'])
        x_max = int(row['xmax'])
        y_max = int(row['ymax'])
        
        # Draw the bounding box on the image
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
        
        # Save the image with bounding box
        output_path = os.path.join(output_dir, img_name)
        cv2.imwrite(output_path, image)
    else:
        print(f"Warning: Could not load image {img_path}")

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
import cv2
import numpy as np
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split


# Simple CNN for classification and localization
class DentalNet(nn.Module):
    def __init__(self):
        super(DentalNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(128 * 80 * 80, 512),
            nn.ReLU(),
            nn.Linear(512, 5)  # 1 for class + 4 for bbox coordinates
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# Dataset class
class DentalDataset(Dataset):
    def __init__(self, csv_file, img_dir):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])
        image = Image.open(img_name)
        image = self.transform(image)

        # Get class and bbox
        label = self.annotations.iloc[idx, 3]  # class
        bbox = self.annotations.iloc[idx, 4:8].values.astype(float)  # xmin, ymin, xmax, ymax

        target = np.concatenate(([label], bbox))
        return image, torch.FloatTensor(target)

def extract_treated_teeth(model, img_dir, csv_file, output_dir,dataloader):
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Load data
    dataset = DentalDataset(csv_file, img_dir)
    # dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    with torch.no_grad():
        for i, (images, targets) in enumerate(dataloader):
            images = images.to(device)
            outputs = model(images)

            # Get original image name
            img_name = dataset.annotations.iloc[i, 0]

            # Get predictions
            pred_class = outputs[0][0].item() > 0.5  # threshold at 0.5
            pred_bbox = outputs[0][1:].cpu().numpy()

            if pred_class:  # if treated tooth detected
                # Load original image
                orig_img = cv2.imread(os.path.join(img_dir, img_name))

                # Extract coordinates
                xmin, ymin, xmax, ymax = map(int, pred_bbox)

                # Crop the tooth
                cropped_tooth = orig_img[ymin:ymax, xmin:xmax]

                # Save cropped image
                output_path = os.path.join(output_dir, f'treated_{img_name}')
                cv2.imwrite(output_path, cropped_tooth)

# Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs=50):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, targets in train_loader:
            images, targets = images.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            # Calculate loss
            class_loss = criterion(outputs[:, 0], targets[:, 0])
            bbox_loss = criterion(outputs[:, 1:], targets[:, 1:])
            loss = class_loss + bbox_loss

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')

# Usage
def main():
    # Initialize model
    model = DentalNet()

    # Define loss and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Create dataset
    dataset = DentalDataset('filtered_annotations.csv', '')
    # Load dataset and split into train/test
    dataset = DentalDataset('filtered_annotations.csv', '')
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    # Train model
    train_model(model, train_loader, criterion, optimizer)

    # Save model
    torch.save(model.state_dict(), 'dental_model.pth')

    # Extract treated teeth
    extract_treated_teeth(
        model,
        '',
        'filtered_annotations.csv',
        'output_treated_teeth',
        test_loader
    )

if __name__ == '__main__':
    main()

Purpose and effect of each component:

1. **Input Layer**:
- The network expects grayscale images (1 channel)
- Input size is 640x640 pixels (original image size)

2. **Feature Extraction Layers (self.features)**:

```python
nn.Conv2d(1, 32, 3, padding=1)
```
- First Convolutional Layer
- Input: 1 channel (grayscale)
- Output: 32 channels
- Kernel size: 3x3
- Padding=1: Maintains spatial dimensions
- Purpose: Detects basic features like edges, gradients
- Output size: 640x640x32

```python
nn.ReLU()
```
- Activation function
- Purpose: Introduces non-linearity
- Converts negative values to zero
- Helps network learn complex patterns
- Reduces vanishing gradient problem

```python
nn.MaxPool2d(2)
```
- First Max Pooling Layer
- Kernel size: 2x2
- Stride: 2
- Purpose: 
  * Reduces spatial dimensions by half
  * Provides translation invariance
  * Reduces computation
- Output size: 320x320x32

```python
nn.Conv2d(32, 64, 3, padding=1)
```
- Second Convolutional Layer
- Input: 32 channels
- Output: 64 channels
- Kernel: 3x3
- Purpose: Detects more complex features
- Output size: 320x320x64

```python
nn.ReLU()
nn.MaxPool2d(2)
```
- Second activation and pooling
- Further reduces dimensions
- Output size: 160x160x64

```python
nn.Conv2d(64, 128, 3, padding=1)
```
- Third Convolutional Layer
- Input: 64 channels
- Output: 128 channels
- Purpose: Detects high-level features
- Output size: 160x160x128

```python
nn.ReLU()
nn.MaxPool2d(2)
```
- Final activation and pooling
- Output size: 80x80x128

3. **Classifier Layers**:

```python
x = x.view(x.size(0), -1)
```
- Flattening operation
- Converts 3D feature maps to 1D vector
- Size: 128 * 80 * 80 = 819,200 features

```python
nn.Linear(128 * 80 * 80, 512)
```
- First Fully Connected Layer
- Input: 819,200 features
- Output: 512 neurons
- Purpose: 
  * Combines all features
  * Learns high-level representations
  * Reduces dimensionality

```python
nn.ReLU()
```
- Activation for fully connected layer
- Maintains non-linearity

```python
nn.Linear(512, 5)
```
- Final Output Layer
- Input: 512 features
- Output: 5 values
  * First value: Classification score (treated/untreated)
  * Next 4 values: Bounding box coordinates (xmin, ymin, xmax, ymax)

4. **Architecture Design Reasoning**:

A. Convolutional Layers:
- Increasing channel depth (32 → 64 → 128)
  * Captures increasingly complex features
  * Earlier layers: basic edges
  * Later layers: tooth structures, filling patterns

B. Pooling Layers:
- Reduces dimensionality gradually
- Original: 640x640
- After pooling: 320x320 → 160x160 → 80x80
- Benefits:
  * Reduces computation
  * Provides spatial invariance
  * Helps focus on important features

C. Feature Hierarchy:
- Layer 1: Basic edges, contrasts
- Layer 2: Simple shapes, boundaries
- Layer 3: Complex patterns, tooth structures

D. Output Design:
- Joint learning of classification and localization
- Classification: Single value for treated/untreated
- Localization: 4 values for bounding box
- Shares feature extraction for both tasks

5. **Training Considerations**:

A. Parameter Count:
- Convolutional layers: Relatively few parameters
- Fully connected layers: Most parameters
- Total parameters: Large due to 128*80*80 to 512 transition

B. Potential Improvements:
- Add batch normalization for better training
- Include dropout for regularization
- Use global average pooling to reduce parameters
- Add skip connections for better gradient flow