In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

from torchvision import datasets, transforms
from torchvision.datasets import VisionDataset
from torch.utils.data import DataLoader, random_split

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

ModuleNotFoundError: No module named 'sklearn'

In [15]:
pip install sklearn


Collecting sklearn
  Downloading sklearn-0.0.post12.tar.gz (2.6 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  × Getting requirements to build wheel did not run successfully.
  │ exit code: 1
  ╰─> [15 lines of output]
      The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
      rather than 'sklearn' for pip commands.
      
      Here is how to fix this error in the main use cases:
      - use 'pip install scikit-learn' rather than 'pip install sklearn'
      - replace 'sklearn' by 'scikit-learn' in your pip requirements files
        (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
      - if the 'sklearn' package is used by one of your dependencies,
        it would be great if you take some time to track which package uses
        'sklearn' instead of 'scikit-learn' and report it to their issue tracker
      - as a last resort, set the environment variable
        SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error
      
      More information is available at
      https://github.com/scikit-learn/sklearn-

(a)

In [6]:
train_dataset = datasets.ImageFolder('tumor\Training')
test_dataset = datasets.ImageFolder('tumor\Testing')

valid_size = int(0.2 * len(train_dataset))
train_size = len(train_dataset) - valid_size
train_dataset_split, valid_dataset_split = random_split(train_dataset, 
                                                        [train_size, valid_size], 
                                                        generator=torch.Generator().manual_seed(42))

(b)

In [8]:
train_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05), scale=(0.95, 1.05)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

valid_test_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [9]:
class TransformWrapper(VisionDataset):
    def __init__(self, dataset, transform=None):
        super().__init__(root='', transform=transform)
        self.dataset = dataset

    def __getitem__(self, index):
        x, y = self.dataset[index]
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.dataset)


train_dataset = TransformWrapper(train_dataset_split, transform=train_transforms)
valid_dataset = TransformWrapper(valid_dataset_split, transform=valid_test_transforms)
test_dataset = TransformWrapper(test_dataset, transform=valid_test_transforms)


train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

(c)

In [12]:
model = models.resnet50(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Bingyan/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100.0%


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

In [None]:
def calculate_metrics(outputs, labels):
    _, preds = torch.max(outputs, 1)
    preds = preds.cpu().numpy()
    labels = labels.cpu().numpy()
    precision = precision_score(labels, preds, average='binary')
    recall = recall_score(labels, preds, average='binary')
    f1 = f1_score(labels, preds, average='binary')
    accuracy = accuracy_score(labels, preds)
    return precision, recall, f1, accuracy

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    

    model.eval() 
    val_running_loss = 0.0
    all_precision, all_recall, all_f1, all_accuracy = [], [], [], []
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        with torch.no_grad():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            precision, recall, f1, accuracy = calculate_metrics(outputs, labels)
            
        val_running_loss += loss.item() * inputs.size(0)
        all_precision.append(precision)
        all_recall.append(recall)
        all_f1.append(f1)
        all_accuracy.append(accuracy)
    
    val_loss = val_running_loss / len(valid_loader.dataset)
    
    print(f'Epoch {epoch+1}/{num_epochs}, 
          Loss: {epoch_loss:.4f}, 
          Val Loss: {val_loss:.4f}, 
          Precision: {np.mean(all_precision):.4f}, 
          Recall: {np.mean(all_recall):.4f}, 
          F1: {np.mean(all_f1):.4f}, 
          Accuracy: {np.mean(all_accuracy):.4f}')

(d)

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    test_running_loss = 0.0
    all_preds = []
    all_labels = []
    
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        with torch.no_grad():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        test_running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    
    test_loss = test_running_loss / len(test_loader.dataset)
    
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')
    f1 = f1_score(all_labels, all_preds, average='binary')
    accuracy = accuracy_score(all_labels, all_preds)
    
    return test_loss, precision, recall, f1, accuracy, all_preds, all_labels

test_loss, precision, recall, f1, accuracy, all_preds, all_labels = evaluate_model(model, test_loader, criterion, device)

print(f'Test Loss: {test_loss:.4f}, 
      Precision: {precision:.4f}, 
      Recall: {recall:.4f}, 
      F1: {f1:.4f}, 
      Accuracy: {accuracy:.4f}')

In [None]:
conf_matrix = confusion_matrix(all_labels, all_preds)

fig, ax = plt.subplots(figsize=(5, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax)
ax.set_xlabel('Predicted Labels')
ax.set_ylabel('True Labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['Notumor', 'Glioma'])
ax.yaxis.set_ticklabels(['Notumor', 'Glioma'])

plt.show()