# Trabalho 01 - SCC0270 Redes Neurais

Este notebook contém a implementação do Fine-tuning de redes neurais para a classificação das imagens da base MedPix-2.0

In [46]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
import json 
from PIL import Image
import os 
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [47]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [49]:
labels = [
  'Chest, Pulmonary',
  'Genitourinary',
  'Head and Neck',
  'Cardiovascular',
  'Brain and Neuro',
  'Abdomen',
  'Spine',
  'Eye and Orbit',
  'Gastrointestinal',
  'Vascular',
  'Endocrine',
  'Musculoskeletal',
  'Pathology',
  'Generalized',
  'Hematopoietic',
  'Dental, Oral, or Tooth',
  'Nerve, central',
  'Breast and Mammography',
  'Bethesda, MD',
  'Ophthalmology',
  'Nerve, peripheral'
]
labels = sorted(labels) 

le = LabelEncoder()
le.fit(labels) 

print("Label Encoder Classes:", le.classes_) 
labels_encoded = le.transform(labels)
print("\nLabels Encoded:", labels_encoded)

encoder_map = {label: label_encoded for label, label_encoded in zip(labels, labels_encoded)}
print("\nEncoder Map:", encoder_map)

Label Encoder Classes: ['Abdomen' 'Bethesda, MD' 'Brain and Neuro' 'Breast and Mammography'
 'Cardiovascular' 'Chest, Pulmonary' 'Dental, Oral, or Tooth' 'Endocrine'
 'Eye and Orbit' 'Gastrointestinal' 'Generalized' 'Genitourinary'
 'Head and Neck' 'Hematopoietic' 'Musculoskeletal' 'Nerve, central'
 'Nerve, peripheral' 'Ophthalmology' 'Pathology' 'Spine' 'Vascular']

Labels Encoded: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]

Encoder Map: {'Abdomen': np.int64(0), 'Bethesda, MD': np.int64(1), 'Brain and Neuro': np.int64(2), 'Breast and Mammography': np.int64(3), 'Cardiovascular': np.int64(4), 'Chest, Pulmonary': np.int64(5), 'Dental, Oral, or Tooth': np.int64(6), 'Endocrine': np.int64(7), 'Eye and Orbit': np.int64(8), 'Gastrointestinal': np.int64(9), 'Generalized': np.int64(10), 'Genitourinary': np.int64(11), 'Head and Neck': np.int64(12), 'Hematopoietic': np.int64(13), 'Musculoskeletal': np.int64(14), 'Nerve, central': np.int64(15), 'Nerve, peripheral': np.int64(1

In [50]:
class MedicalImageDatasetModality(Dataset):
    def __init__(self, json_path, images_dir, transform=None):
        self.images_dir = images_dir
        self.transform = transform

        # Load JSON or JSONL
        if json_path.endswith('.jsonl'):
            with open(json_path, 'r') as f:
                self.data = [json.loads(line) for line in f]
        else:
            with open(json_path, 'r') as f:
                self.data = json.load(f)

        # Build a list of (image_path, label) pairs
        self.samples = []
        for entry in self.data:
            image_file = entry['image'] + '.png' 
            label_str = entry['Type'].strip().upper()
            if label_str in ['MR', 'CT']:
                label = 0 if label_str == 'MR' else 1
                full_path = os.path.join(images_dir, image_file)
                if os.path.isfile(full_path):  # Ensure the file exists
                    self.samples.append((full_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        image_path, label = self.samples[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label 
    

class MedicalImageDatasetLocation(Dataset):
        def __init__(self, json_path, images_dir, transform=None):
            self.images_dir = images_dir
            self.transform = transform

            # Load JSON
            with open(json_path, 'r') as f:
                self.data = [json.loads(line) for line in f]

            self.samples = []
            for entry in self.data:
                # print(entry['Location'])
                image_file = entry['image'] + '.png'  # assumes .jpg extension
                label_str = entry['Location']
                label_encoded = encoder_map.get(label_str, None)
                if label_encoded is None:
                    print(f"Warning: Label '{label_str}' not found in encoder map.")
                    continue
                # print("Label encoded", label_encoded)
                full_path = os.path.join(images_dir, image_file)
                if os.path.isfile(full_path):
                    self.samples.append((full_path, label_encoded))

        def __len__(self):
            return len(self.samples)

        def __getitem__(self, idx):
            image_path, label_encoded = self.samples[idx]
            image = Image.open(image_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label_encoded

In [51]:
base_path = './MedPix-2.0'
images_dir = os.path.join(base_path, 'images')
train_json = os.path.join(base_path, 'splitted_dataset/descriptions_train.jsonl')
test_json  = os.path.join(base_path, 'splitted_dataset/descriptions_test.jsonl')
dev_json  = os.path.join(base_path, 'splitted_dataset/descriptions_dev.jsonl')

In [52]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [53]:
test_dataset_modality = MedicalImageDatasetModality(test_json, images_dir, transform=transform)
test_loader_modality = DataLoader(test_dataset_modality, batch_size=32, shuffle=False) 

dev_dataset_modality = MedicalImageDatasetModality(dev_json, images_dir, transform=transform) 
dev_loader_modality = DataLoader(dev_dataset_modality, batch_size=32, shuffle=False)

test_dataset_location = MedicalImageDatasetLocation(test_json, images_dir, transform=transform)
test_loader_location = DataLoader(test_dataset_location, batch_size=32, shuffle=False) 

In [64]:
import pandas as pd
pd.DataFrame(test_loader_location.dataset.data)['Location'].value_counts()

Location
Brain and Neuro           51
Gastrointestinal          31
Spine                     26
Chest, Pulmonary          24
Musculoskeletal           14
Genitourinary             11
Generalized               10
Head and Neck             10
Eye and Orbit              7
Vascular                   6
Ophthalmology              4
Breast and Mammography     4
Abdomen                    1
Cardiovascular             1
Name: count, dtype: int64

In [40]:
# Carrega a arquitetura da ResNet50
model = models.resnet50(weights=None)  # Não precisa dos pesos padrão agora

# Ajusta a última camada (precisa ser igual ao usado no treino!)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes: MR e CT

# Carrega pesos treinados
model.load_state_dict(torch.load('./model/resnet50_mri_ct_02.pth', map_location=device))
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [54]:
def evaluate_model(model, dataloader, device):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    # Gera relatório
    report = classification_report(y_true, y_pred)
    print(report)


In [21]:
evaluate_model(model, test_loader_modality, device)

              precision    recall  f1-score   support

           0       0.99      0.98      0.98       100
           1       0.98      0.99      0.99       100

    accuracy                           0.98       200
   macro avg       0.99      0.98      0.98       200
weighted avg       0.99      0.98      0.98       200



Vemos que com uma ResNet50, com o fine-tuning nas 3 últimas camadas, a rede apresentou uma acurácia de 98%, para o problema de classificação de do tipo das imagens.
Um modelo, quase que perfeito. 

Foram treinadas apenas duas epochs.

## Avaliação do Modelo para `Location`

In [55]:
# Carrega a arquitetura da ResNet50
model_location = models.resnet50(weights=None) 

# Ajusta a última camada (precisa ser igual ao usado no treino!)
num_ftrs = model_location.fc.in_features
model_location.fc = nn.Linear(num_ftrs, 21)  # 21 classes

# Carrega pesos treinados
model_location.load_state_dict(torch.load('./model/resnet50_location_02_epochs.pth', map_location=device))
model_location.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [56]:
evaluate_model(model_location, test_loader_location, device)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           2       0.68      0.94      0.79        51
           3       0.00      0.00      0.00         4
           4       0.00      0.00      0.00         1
           5       0.70      0.88      0.78        24
           8       1.00      0.29      0.44         7
           9       0.75      0.29      0.42        31
          10       0.00      0.00      0.00        10
          11       0.18      0.27      0.21        11
          12       0.43      0.30      0.35        10
          14       0.48      0.71      0.57        14
          17       0.00      0.00      0.00         4
          19       0.82      0.69      0.75        26
          20       0.00      0.00      0.00         6

    accuracy                           0.57       200
   macro avg       0.36      0.31      0.31       200
weighted avg       0.58      0.57      0.54       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Usando uma ResNet50 com apenas 02 epochs e o retreinamento das 3 últimas camadas obtemos uma acurácia de 57%, superior aos 52% reportado pelos autores.
Informamos que observamos uma variância no treinamento dos modelos, devido à inicialização dos pesos.