In [4]:
import os
import pydicom
from skimage.transform import resize
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
import numpy as np
from PIL import Image

In [None]:
dicom_dir_path = '/Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/CapstoneData'  # Directory where DICOM files are stored

In [2]:
class DICOMDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = ['non-cancer', 'cancer']
        self.paths = []
        self.labels = []
        
        for idx, cls in enumerate(self.classes):
            class_dir = os.path.join(root_dir, cls)
            for subdir, dirs, files in os.walk(class_dir):
                for file in files:
                    if file.endswith(".dcm"):
                        self.paths.append(os.path.join(subdir, file))
                        self.labels.append(idx)  # Assign 0 for non-cancer, 1 for cancer
        
    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        dicom_path = self.paths[idx]
        image = self._load_dicom(dicom_path)
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

    def _load_dicom(self, path):
        dicom = pydicom.dcmread(path)
        array = dicom.pixel_array
        array = array.astype(float)
        array = (np.maximum(array, 0) / array.max()) * 255.0  # Normalize
        array = np.uint8(array)
        rgb_array = np.stack([array] * 3, axis=-1)  # Convert grayscale to RGB
        return Image.fromarray(rgb_array)

In [None]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Create the dataset
root_dir = '/path/to/your/split/dicom/dataset'
dicom_dataset = DICOMDataset(root_dir=root_dir, transform=transform)

# Create a DataLoader
batch_size = 4
dataloader = DataLoader(dicom_dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Load a pre-trained ResNet model
model = models.resnet18(pretrained=True)

# Modify the model for binary classification
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # Assuming binary classification

In [None]:
# Assuming the transform variable is already defined as in the previous example

# Create the dataset
root_dir = '/Users/arjunmoorthy/Desktop/Research_Capstone/Image Data/CapstoneData'
dicom_dataset = DICOMDataset(root_dir=root_dir, transform=transform)

# Create a DataLoader
batch_size = 4
dataloader = DataLoader(dicom_dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    
    for inputs in dataloader:
        inputs = inputs.to(device)
        labels = # You need to modify the dataset to also load labels
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(dicom_dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')


In [None]:
train_dataset = DICOMDataset(dicom_dir_path, train_patient_ids, transform=transform)
test_dataset = DICOMDataset(dicom_dir_path, test_patient_ids, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Model setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

train_model(model, train_loader, criterion, optimizer)