# Trabalho 01 - SCC0270 Redes Neurais

Este notebook contém a implementação do Fine-tuning de redes neurais para a classificação das imagens da base MedPix-2.0

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
import json 
from PIL import Image
import os 
from sklearn.metrics import classification_report

In [5]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
class MedicalImageDataset(Dataset):
    def __init__(self, json_path, images_dir, transform=None):
        self.images_dir = images_dir
        self.transform = transform

        # Load JSON or JSONL
        if json_path.endswith('.jsonl'):
            with open(json_path, 'r') as f:
                self.data = [json.loads(line) for line in f]
        else:
            with open(json_path, 'r') as f:
                self.data = json.load(f)

        # Build a list of (image_path, label) pairs
        self.samples = []
        for entry in self.data:
            image_file = entry['image'] + '.png' 
            label_str = entry['Type'].strip().upper()
            if label_str in ['MRI', 'CT']:
                label = 0 if label_str == 'MRI' else 1
                full_path = os.path.join(images_dir, image_file)
                if os.path.isfile(full_path):  # Ensure the file exists
                    self.samples.append((full_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        image_path, label = self.samples[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label


In [17]:
base_path = './MedPix-2.0'
images_dir = os.path.join(base_path, 'images')
train_json = os.path.join(base_path, 'splitted_dataset/descriptions_train.jsonl')
test_json  = os.path.join(base_path, 'splitted_dataset/descriptions_test.jsonl')
dev_json  = os.path.join(base_path, 'splitted_dataset/descriptions_dev.jsonl')

In [22]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [23]:
test_dataset = MedicalImageDataset(test_json, images_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) 

dev_dataset = MedicalImageDataset(dev_json, images_dir, transform=transform) 
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=False)


In [25]:
# Carrega a arquitetura da ResNet50
model = models.resnet50(weights=None)  # Não precisa dos pesos padrão agora

# Ajusta a última camada (precisa ser igual ao usado no treino!)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes: MR e CT

# Carrega pesos treinados
model.load_state_dict(torch.load('./model/resnet50_mri_ct_02.pth', map_location=device))
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [26]:
def evaluate_model(model, dataloader, device):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    # Gera relatório
    report = classification_report(y_true, y_pred)
    print(report)


In [27]:
evaluate_model(model, test_loader, device)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.99      0.99       100

    accuracy                           0.99       100
   macro avg       0.50      0.49      0.50       100
weighted avg       1.00      0.99      0.99       100



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Vemos que com uma ResNet50, com o fine-tuning nas 3 últimas camadas, a rede aprendeu a classificar apenas as imagens da Classe 1 (CT), com uma precisão de 100%. 

Como é um problema de classificação binária, temos um classificador útil, porém, não temos precisão alguma na identificação das imagens do tipo MR.