<a href="https://colab.research.google.com/github/Helenessli/BreakingTheBinary/blob/main/Step_2_Multiclass_Classifier_5epochs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Trim the dataset so only images with one label are kept
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/Datasets/train_df.csv')
df.head()
len(df)
new_df = df[df['Target'].str.len() <= 3]
print(new_df.head())
print(len(new_df))

from sklearn.model_selection import train_test_split

train, test = train_test_split(new_df, test_size=0.2)
print(len(train), len(test))


                                      SOPInstanceUID Target  MultiImageType  \
0  1.2.826.0.1.3680043.8.498.10025629581362719970...     0              1.0   
1  1.2.826.0.1.3680043.8.498.10036150326276641158...    15              1.0   
2  1.2.826.0.1.3680043.8.498.10038426859954986240...    12              1.0   
3  1.2.826.0.1.3680043.8.498.10050991192143676483...    14              NaN   
4  1.2.826.0.1.3680043.8.498.10053309524595490852...     3              1.0   

  ImageType ImageType1  InstanceCreationDate  InstanceCreationTime  \
0  ORIGINAL    PRIMARY                   NaN                   NaN   
1  ORIGINAL    PRIMARY                   NaN                   NaN   
2  ORIGINAL    PRIMARY                   NaN                   NaN   
3       NaN        NaN                   NaN                   NaN   
4  ORIGINAL    PRIMARY                   NaN                   NaN   

                 SOPClassUID  StudyDate  SeriesDate  ...  WindowWidth  \
0  1.2.840.10008.5.1.4.1.1.1   

In [3]:
#SKIP THIS (only do it once)
train.to_csv('traindf.csv')
test.to_csv('testdf.csv')

In [4]:
# Copy the dataset to Colab
!cp -r '/content/drive/MyDrive/Datasets/images' '/content/'


In [5]:
#basic imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torchvision.models.densenet import DenseNet121_Weights
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

In [6]:
# Obtain the labels of from csv file
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx, 65]  # Assuming 'image_path' is in the 65th column
        image = Image.open(img_name).convert('RGB')
        label = torch.tensor(self.data.iloc[idx, 2], dtype=torch.long)  # Assuming 'Target' is in the second column

        if self.transform:
            image = self.transform(image)

        return image, label

In [7]:
# cuda availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformations for the images
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [8]:
# Load the custom dataset
#csv_file_path = 'train_df.csv'
#custom_dataset = CustomImageDataset(csv_file=csv_file_path, transform=transform)

# Initialize the CustomImageDataset
train_dataset = CustomImageDataset(csv_file='traindf.csv', transform=transform)
test_dataset = CustomImageDataset(csv_file='testdf.csv', transform=transform)

# Accessing elements from the dataset
#image, label = custom_dataset[0]  # Example: Accessing the first element in the dataset

# Create DataLoader instances
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64)

In [9]:
# Pretrained, densenet model for imagenet
densenet_model = models.densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1)


# freezing all layers in the model
for param in densenet_model.parameters():
    param.requires_grad = False

# Define the model (using a pre-trained DenseNet)
num_classes = 22  # Number of body parts
densenet_model = models.densenet121(pretrained=True)
num_ftrs = densenet_model.classifier.in_features
densenet_model.classifier = nn.Linear(num_ftrs, num_classes)
densenet_model = densenet_model.to(device)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 184MB/s]


In [10]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(densenet_model.parameters(), lr=0.001, momentum=0.9)

In [12]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    densenet_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = densenet_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate training loss
    epoch_loss = running_loss / len(train_loader)

  # Training accuracy
    densenet_model.eval()
    traincorrect = 0
    traintotal = 0
    with torch.no_grad():
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = densenet_model(inputs)
            _, predicted = outputs.max(1)
            traintotal += labels.size(0)
            traincorrect += predicted.eq(labels).sum().item()

    # Validation accuracy
    densenet_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = densenet_model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    # Print stats at the end of each epoch
    print(f"Epoch {epoch + 1}/{num_epochs}, "
          f"Training Loss: {epoch_loss:.4f}, "
          f"Training Accuracy: {100 * traincorrect / traintotal:.2f}%"
          f"Test Accuracy: {100 * correct / total:.2f}%")

Epoch 1/5, Training Loss: 2.2077, Training Accuracy: 58.49%Test Accuracy: 60.25%
Epoch 2/5, Training Loss: 1.5057, Training Accuracy: 71.11%Test Accuracy: 70.50%
Epoch 3/5, Training Loss: 1.0476, Training Accuracy: 80.14%Test Accuracy: 77.33%
Epoch 4/5, Training Loss: 0.7970, Training Accuracy: 85.51%Test Accuracy: 81.06%
Epoch 5/5, Training Loss: 0.6455, Training Accuracy: 89.02%Test Accuracy: 84.16%


In [13]:
# Saving the model
torch.save(densenet_model, '/content/drive/MyDrive/OutputModelBinaries/steptwo_5epochs.pth')