In [1]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision
from tqdm import tqdm
#from google.colab import drive
import albumentations as A
import torch.nn.functional as F
import math

#drive.mount('/content/drive')


In [2]:
labels = ['PNEUMONIA', 'NORMAL']
img_size = 224

def get_training_data(data_dir):
    data = []

    for label in labels:
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)

        for img in tqdm(os.listdir(path)):
            try:
                # Load and resize the image
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size))  # Resize the image

                # Add the image and label as a pair
                data.append([resized_arr, class_num])
            except Exception as e:
                print(f"Error loading image {img}: {e}")

    # Convert the list to a NumPy array
    data = np.array(data, dtype=object)  # Use dtype=object to allow image-label pairing
    return data

# Load the data
train_data = get_training_data('/Users/giuliasaresini/Documents/tesi_vs/Medicine/Progetto/chest_xray/train')
test_data = get_training_data('/Users/giuliasaresini/Documents/tesi_vs/Medicine/Progetto/chest_xray/test')
val_data = get_training_data('/Users/giuliasaresini/Documents/tesi_vs/Medicine/Progetto/chest_xray/val')

100%|██████████| 3875/3875 [00:07<00:00, 502.77it/s]
100%|██████████| 1341/1341 [00:10<00:00, 131.73it/s]
100%|██████████| 390/390 [00:00<00:00, 585.38it/s]
100%|██████████| 234/234 [00:00<00:00, 246.04it/s]
100%|██████████| 8/8 [00:00<00:00, 587.38it/s]
100%|██████████| 8/8 [00:00<00:00, 230.07it/s]


In [3]:
from sklearn.utils import shuffle

# Function to normalize the images
def normalize_images(data):
    images = []
    labels = []
    
    for img, label in data:
        # Normalization: each pixel is divided by 255
        normalized_img = img / 255.0
        images.append(normalized_img)
        labels.append(label)
    
    # Convert the images and labels into separate arrays
    images = np.array(images)
    labels = np.array(labels)
    
    return images, labels

# Normalize the images in the training dataset
train_images, train_labels = normalize_images(train_data)
val_images, val_labels = normalize_images(val_data)
test_images, test_labels = normalize_images(test_data)

# Shuffle the training and validation data
train_images, train_labels = shuffle(train_images, train_labels, random_state=42)
val_images, val_labels = shuffle(val_images, val_labels, random_state=42)

# Check the shape and an example of the normalized and shuffled data
print(f"Shape of normalized and shuffled train images: {train_images.shape}")
print(f"Shape of normalized and shuffled validation images: {val_images.shape}")

Shape of normalized and shuffled train images: (5216, 224, 224)
Shape of normalized and shuffled validation images: (16, 224, 224)


In [4]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle

# Identify the images of the "Normal" and "Pneumonia" classes
normal_images = train_images[train_labels == 1]  # Only "Normal" images
normal_labels = train_labels[train_labels == 1]  # Corresponding labels
pneumonia_images = train_images[train_labels == 0]  # Only "Pneumonia" images
pneumonia_labels = train_labels[train_labels == 0]  # Corresponding labels

# Add a dimension for the channel (1 for grayscale images)
normal_images = np.expand_dims(normal_images, axis=-1)
pneumonia_images = np.expand_dims(pneumonia_images, axis=-1)

# Determine the number of target images to balance the dataset
target_normal_images_count = pneumonia_images.shape[0]
current_normal_images_count = normal_images.shape[0]
images_to_generate = target_normal_images_count - current_normal_images_count

# Create an ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Generate augmented images to balance the dataset
augmented_normal_images = []
if images_to_generate > 0:
    augmented_data_gen = datagen.flow(normal_images, normal_labels, batch_size=32, shuffle=False)
    for _ in range(images_to_generate // 32 + 1):
        batch_images, _ = next(augmented_data_gen)
        augmented_normal_images.append(batch_images)

    # Concatenate all the generated images
    augmented_normal_images = np.vstack(augmented_normal_images)[:images_to_generate]
else:
    augmented_normal_images = np.empty((0, *normal_images.shape[1:]))

# Concatenate the original and augmented images for the "Normal" class
balanced_normal_images = np.concatenate([normal_images, augmented_normal_images])
balanced_normal_labels = np.full(balanced_normal_images.shape[0], 1)

# Create the final balanced dataset
augmented_train_images = np.concatenate([pneumonia_images, balanced_normal_images])
augmented_train_labels = np.concatenate([pneumonia_labels, balanced_normal_labels])

# Shuffle the dataset randomly
augmented_train_images, augmented_train_labels = shuffle(augmented_train_images, augmented_train_labels)

In [22]:
class ResNet(nn.Module):
    def __init__(self, num_classes=2, softmax=True):
      super(ResNet, self).__init__()
      self.resnet = torchvision.models.resnet18(pretrained=True)
      num_ftrs = self.resnet.fc.out_features
      self.fc = nn.Linear(num_ftrs, num_classes)
      self.bn = nn.BatchNorm1d(num_ftrs)
      self.relu = nn.ReLU()
      self.softmax = torch.nn.Softmax(dim=1) if softmax else None
      self.change_conv1()

    def forward(self, x):
      x = self.resnet(x)
      x = self.bn(x)
      x = self.relu(x)
      x = self.fc(x)
      if self.softmax:
        x = self.softmax(x)
      return x

    def change_conv1(self):
      original_conv1 = self.resnet.conv1

      #Create a new convolutional layer with 1 input channel instead of 3
      new_conv1 = nn.Conv2d(
        in_channels=1,  # Grayscale has 1 channel
        out_channels=original_conv1.out_channels,
        kernel_size=original_conv1.kernel_size,
        stride=original_conv1.stride,
        padding=original_conv1.padding,
        bias=original_conv1.bias is not None
)

      # Initialize the new conv layer's weights by averaging the RGB weights
      with torch.no_grad():
        new_conv1.weight = nn.Parameter(original_conv1.weight.mean(dim=1, keepdim=True))

        #Replace the original conv1 with the new one
        self.resnet.conv1 = new_conv1

class KANLinear_v2(nn.Module):
    def __init__(self, in_features, out_features, grid_size=10, spline_order=3,
                 scale_noise=0.05, scale_base=1.0, scale_spline=1.0,
                 enable_standalone_scale_spline=True, base_activation=nn.ReLU,
                 grid_eps=0.01, grid_range=[0, 1], dropout_prob=0.2):
        super(KANLinear_v2, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.grid_size = grid_size
        self.spline_order = spline_order

        h = (grid_range[1] - grid_range[0]) / grid_size
        grid = ((torch.arange(-spline_order, grid_size + spline_order + 1) * h
                 + grid_range[0]).expand(in_features, -1).contiguous())
        self.register_buffer("grid", grid)

        self.base_weight = nn.Parameter(torch.Tensor(out_features, in_features))
        self.spline_weight = nn.Parameter(
            torch.Tensor(out_features, in_features, grid_size + spline_order)
        )
        if enable_standalone_scale_spline:
            self.spline_scaler = nn.Parameter(torch.Tensor(out_features, in_features))

        self.scale_noise = scale_noise
        self.scale_base = scale_base
        self.scale_spline = scale_spline
        self.enable_standalone_scale_spline = enable_standalone_scale_spline
        self.base_activation = base_activation()
        self.grid_eps = grid_eps
        self.dropout = nn.Dropout(p=dropout_prob)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.base_weight, a=math.sqrt(5) * self.scale_base)
        with torch.no_grad():
            noise = ((torch.rand(self.grid_size + 1, self.in_features, self.out_features) - 0.5)
                     * self.scale_noise / self.grid_size)
            self.spline_weight.data.copy_(
                self.scale_spline * self.curve2coeff(self.grid.T[self.spline_order : -self.spline_order], noise)
            )
            if self.enable_standalone_scale_spline:
                nn.init.kaiming_uniform_(self.spline_scaler, a=math.sqrt(5) * self.scale_spline)

    def b_splines(self, x):
        grid = self.grid
        x = x.unsqueeze(-1)
        bases = ((x >= grid[:, :-1]) & (x < grid[:, 1:])).to(x.dtype)
        for k in range(1, self.spline_order + 1):
            bases = ((x - grid[:, :-(k+1)]) / (grid[:, k:-1] - grid[:, :-(k+1)]) * bases[:, :, :-1]
                     + (grid[:, k+1:] - x) / (grid[:, k+1:] - grid[:, 1:-k]) * bases[:, :, 1:])
        return bases.contiguous()

    def curve2coeff(self, x, y):
        A = self.b_splines(x).transpose(0, 1)
        B = y.transpose(0, 1)
        solution = torch.linalg.lstsq(A, B).solution
        return solution.permute(2, 0, 1).contiguous()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        base_output = F.linear(self.base_activation(x), self.base_weight)
        spline_output = F.linear(
            self.b_splines(x).view(x.size(0), -1),
            self.spline_weight.view(self.out_features, -1)
        )
        spline_output = self.dropout(spline_output)  # Dropout per generalizzazione
        return base_output + spline_output

class FKAN_ResNet(nn.Module):
    def __init__(self, num_classes=2, softmax=True):
        super(FKAN_ResNet, self).__init__()
        self.backbone = torchvision.models.resnet18(pretrained=True)
        self.kan_layer1 = KANLinear_v2(256, 128, grid_size=8)
        self.bn1 = nn.BatchNorm1d(128)
        self.kan_layer2 = KANLinear_v2(128, 64, grid_size=8)
        self.bn2 = nn.BatchNorm1d(64)
        self.kan_layer3 = KANLinear_v2(64, num_classes, grid_size=5)
        self.softmax = torch.nn.Softmax(dim=1) if softmax else None
        self.change_conv1()
        self.modify_fc_layer()

    def forward(self, x):
        x = self.backbone(x)
        x = self.kan_layer1(x)
        x = self.bn1(x)
        x = self.kan_layer2(x)
        x = self.bn2(x)
        x = self.kan_layer3(x)
        if self.softmax:
            x = self.softmax(x)
        return x

    def change_conv1(self):
        original_conv1 = self.backbone.conv1
        new_conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=original_conv1.out_channels,
            kernel_size=original_conv1.kernel_size,
            stride=original_conv1.stride,
            padding=original_conv1.padding,
            bias=original_conv1.bias is not None
        )
        with torch.no_grad():
            new_conv1.weight = nn.Parameter(original_conv1.weight.mean(dim=1, keepdim=True))
        self.backbone.conv1 = new_conv1

    def modify_fc_layer(self):
        num_ftrs = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(num_ftrs, 256)

model = FKAN_ResNet(num_classes=2, softmax=True)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(device)


cpu


In [23]:
from torchsummary import summary

summary(model, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           3,136
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [9]:
from torch.utils.data import TensorDataset, DataLoader

# Convert the images and labels to PyTorch tensors

# Apply the transformation to training and validation images
train_images_tensor = torch.stack([torch.tensor(img, dtype=torch.float) for img in augmented_train_images])
val_images_tensor = torch.stack([torch.tensor(img, dtype=torch.float) for img in val_images]).unsqueeze(1)

# Now permute them
train_images_tensor = train_images_tensor.permute(0, 3, 1, 2)  # (N, 1, 244, 244)
val_images_tensor = val_images_tensor.permute(0, 1, 2, 3)      # (N, 1, 244, 244)
print(train_images_tensor.shape, val_images_tensor.shape)

# The tensors are now in the shape (N, 1, 244, 244), where N is the number of images

train_labels_tensor = torch.tensor(augmented_train_labels, dtype=torch.long)
val_labels_tensor = torch.tensor(val_labels, dtype=torch.long)

# Create the dataset and DataLoader
train_dataset = TensorDataset(train_images_tensor, train_labels_tensor)
val_dataset = TensorDataset(val_images_tensor, val_labels_tensor)

# Define the batch size
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)
print('Done!')

torch.Size([7750, 1, 224, 224]) torch.Size([16, 1, 224, 224])
Done!


### **Training**

In [19]:
from sklearn.metrics import classification_report
criterion = nn.CrossEntropyLoss()  # For multi-class or binary classification
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)  # AdamW with L2 regularization

# Now the data is ready for training and validation

# Function to calculate relevant metrics

# Training function with Early Stopping
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=100, patience=10):
    patience_counter = 0
    best_validation_score = 0
    for epoch in range(num_epochs):
        model.train()
        p_bar = tqdm(train_loader)
        running_loss = 0

        for i, (images, labels) in enumerate(p_bar):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            p_bar.set_description(f"Epoch {epoch+1}/{num_epochs} - Loss: {running_loss / (i + 1)}")


        if (epoch + 1) % 2 == 0:
            model.eval()
            p_bar = tqdm(val_loader)
            all_preds = []
            all_labels = []
            with torch.no_grad():
                for i, (images, labels) in enumerate(p_bar):
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    _, preds = torch.max(outputs, 1)
                    all_preds.extend(preds.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())
                    p_bar.set_description(f'Epoch {epoch+1}/{num_epochs} - Validation Batch: {i}')

            class_report = classification_report(all_labels, all_preds, target_names=['Pneumonia', 'Normal'], output_dict=True)
            validation_accuracy = class_report['accuracy']
            validation_f1_score = class_report['weighted avg']['f1-score']

            print(f"Epoch {epoch+1}/{num_epochs} - Validation Accuracy: {validation_accuracy} - Validation F1 Score: {validation_f1_score:.4f}")
            if validation_f1_score > best_validation_score:
                best_validation_score = validation_f1_score
                patience_counter = 0
                torch.save(model.state_dict(), os.path.join('best_model_resnet.pth'))

            else:
                patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break

# Start training
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=100, patience=10)

Epoch 1/100 - Loss: 0.34874547703945935: 100%|██████████| 484/484 [29:48<00:00,  3.70s/it]
Epoch 2/100 - Loss: 0.3417824539394418: 100%|██████████| 484/484 [29:09<00:00,  3.62s/it] 
Epoch 2/100 - Validation Batch: 1: 100%|██████████| 2/2 [00:00<00:00,  3.30it/s]


Epoch 2/100 - Validation Accuracy: 0.6875 - Validation F1 Score: 0.6537


Epoch 3/100 - Loss: 0.3526126408860797:  52%|█████▏    | 252/484 [12:19:27<11:20:46, 176.06s/it] 


KeyboardInterrupt: 

### **Testing**

In [28]:
state_dict = torch.load('/content/drive/MyDrive/model_results/best_model_resnet.pth')
model.load_state_dict(state_dict)

  state_dict = torch.load('/content/drive/MyDrive/model_results/best_model_resnet.pth')


<All keys matched successfully>

In [8]:
test_images_tensor = torch.stack([torch.tensor(img, dtype=torch.float) for img in test_images]).unsqueeze(1)  # Applying the same transformation as for train/val
test_images_tensor = test_images_tensor.permute(0, 1, 2, 3)
print(test_images_tensor.shape)

test_labels_tensor = torch.tensor(test_labels, dtype=torch.long)  # or torch.float if binary classification

# Create the dataset and DataLoader for the test set
test_dataset = TensorDataset(test_images_tensor, test_labels_tensor)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)

all_predictions = []
all_labels = []
for images, labels in test_loader:
  images, labels = images.to(device), labels
  outputs = model(images)
  _, preds = torch.max(outputs, 1)
  all_predictions.extend(preds.cpu().numpy())
  all_labels.extend(labels.numpy())

class_report = classification_report(all_labels, all_predictions, target_names=['Pneumonia', 'Normal'])
print(class_report)



torch.Size([624, 1, 224, 224])
              precision    recall  f1-score   support

   Pneumonia       0.77      1.00      0.87       390
      Normal       0.99      0.51      0.67       234

    accuracy                           0.81       624
   macro avg       0.88      0.75      0.77       624
weighted avg       0.85      0.81      0.80       624

