## Imports

In [134]:
import warnings
import random
import pandas as pd
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as transform
from torchvision.io import read_image
from sklearn.metrics import confusion_matrix

## Preparar los datos
### Dataframes
1. El dataset consiste en 10 000 imagenes que se ordenan en carpetas train/test y benign/malign dentro de estas.
2. Tenemos 1 000 imagenes para test y 9 000 para train.
3. Para crear el Dataframe utilizamos pandas. Incluimos en este la url de las imagenes y si es maligno o benigno.

In [135]:
warnings.filterwarnings("ignore")

# Train
def train_df(train_path):
    classes = [] # Benignas o malignas
    class_paths = [] # Paths de las imagenes

    # Obtener el contenido de la carpeta Train (Benigna y Maligna)
    files = os.listdir(train_path)
    for file in files: 
        label_dir = os.path.join(train_path, file)
        label = os.listdir(label_dir)

        # Obtener el contenido de la carpeta Benigna y Maligna (Imagenes)
        for image in label:
            if not image.startswith('.'): # Descartar ficheros ocultos
                class_paths.append(label_dir + '/' + image) 
                classes.append(file) 

    # Crear el dataframe
    class_paths = pd.Series(class_paths, name='Class Path')
    image_classes = pd.Series(classes, name='Class') 
    tr_df = pd.concat([class_paths, image_classes], axis=1)
    
    return tr_df

tr_df = train_df('./melanoma_cancer_dataset/train')

# Test
def test_df(test_path):
    classes = [] # Benignas o malignas
    class_paths = [] # Ruta a las imagenes

    files = os.listdir(test_path) # Archivos
    for file in files: 
        label_dir = os.path.join(test_path, file) # Path del directorio
        label = os.listdir(label_dir) # Imagenes dentro del directorio
        for image in label:
            if not image.startswith('.'):
                class_paths.append(label_dir + '/' + image) 
                classes.append(file) # Añade el nombre del archivo

    # Series de pandas
    class_paths = pd.Series(class_paths, name='Class Path')
    image_classes = pd.Series(classes, name='Class') 

    # Crea el dataframe
    ts_df = pd.concat([class_paths, image_classes], axis=1)
    return ts_df

ts_df = test_df('./melanoma_cancer_dataset/test')

#### Normalización
Normalizar los datos Benigno y Maligno 0 o 1.

In [136]:
tr_df['Class'].replace({'benign': 0, 'malignant': 1}, inplace=True)
ts_df['Class'].replace({'benign': 0, 'malignant': 1}, inplace=True) 

print(f'Train dataframe: \n {tr_df} \n')
print(f'Test dataframe: \n {ts_df} \n')

Train dataframe: 
                                              Class Path  Class
0     ./melanoma_cancer_dataset/train\benign/melanom...      0
1     ./melanoma_cancer_dataset/train\benign/melanom...      0
2     ./melanoma_cancer_dataset/train\benign/melanom...      0
3     ./melanoma_cancer_dataset/train\benign/melanom...      0
4     ./melanoma_cancer_dataset/train\benign/melanom...      0
...                                                 ...    ...
9600  ./melanoma_cancer_dataset/train\malignant/mela...      1
9601  ./melanoma_cancer_dataset/train\malignant/mela...      1
9602  ./melanoma_cancer_dataset/train\malignant/mela...      1
9603  ./melanoma_cancer_dataset/train\malignant/mela...      1
9604  ./melanoma_cancer_dataset/train\malignant/mela...      1

[9605 rows x 2 columns] 

Test dataframe: 
                                             Class Path  Class
0    ./melanoma_cancer_dataset/test\benign/melanoma...      0
1    ./melanoma_cancer_dataset/test\benign/melanoma...  

#### Diferenciar los datos
Indicar al Dataframe train y test cuales son los datos X y cuales son los resultados Y.

In [146]:
# Train
trainX = tr_df.values[:, :-1]
trainY = tr_df.values[:, -1]

# Test
testX = ts_df.values[:, :-1]
testY = ts_df.values[:, -1]

Convertir las Y a tensor.

In [147]:
trainY = torch.tensor(trainY.astype(int))
#trainY = trainY.type(torch.LongTensor)

testY = torch.tensor(testY.astype(int))
#testY = testY.type(torch.LongTensor)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

### Dataset
Al obtener el dataset devolverá la imagen como tensor X y los resultados Y.
- Los paths se convierten a imagen tensor con read_image 
- Se normalizan los datos de los píxeles de 0-255 a 0-1

In [139]:
class myDataset(Dataset):
    def __init__(self, X, Y):
        self.image_path = X[:, 0]
        self.Y = Y
        
    def __len__(self):
        return len(self.Y)*4

    def __getitem__(self, idx):
        image = read_image(str(self.image_path[int(idx/4)]))/255.0 # Convertir a tensor y normalizar
        return self.__rotate__(image, idx%4), self.Y[int(idx/4)] # Lo devuelves directamente como una imagen
    
    def __rotate__(self, PIL_image, quarter):
        return transform.rotate(PIL_image, 90*quarter) # Girar imagen
    
train_dataset = myDataset(trainX, trainY)
test_dataset = myDataset(testX, testY)

### Dataloader
Objeto que utilizaremos en el train loop y test loop

In [140]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

## Red neuronal
1. Crear el dispositivo
2. Definir la clase *Module* con la función forward
    - Convulacional (como es en color, cada imagen se multiplica x 3)
    - Capas normales 
3. Crear el modelo y pasarlo a la GPU

In [141]:
# Ya están implementadas las clases de las capas para hacer el forward
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            # 3 imatges de 224 x 224
            nn.Conv2d(3, 96, (11, 11), stride=4), # Entrada, salida, filtro, filtro cada... 53x53x96 mida - filtre /stride
            nn.MaxPool2d((3, 3)), # 17x17x96

            nn.Conv2d(96, 256, (3, 3), padding=4), #15x15x256
            nn.MaxPool2d((3, 3), stride=2), # 5x5x256

            nn.Conv2d(256, 256, (2, 2)), # 
            nn.MaxPool2d((3, 3), stride=2),
            
            nn.Flatten(),

            nn.Dropout2d(p=0.5, inplace=True), # Para que no haya sobreentrenamiento

            nn.Linear(9216, 3000), 
            nn.ReLU(),
            nn.Linear(3000, 300),
            nn.ReLU(),
            nn.Linear(300, 30),
            nn.ReLU(),
            nn.Linear(30, 2),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x) # Crea las capas
        return logits
    
# Device
if torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

model = NeuralNetwork().to(device) # Otiene los valores predichos

### Train y test
Definimos las funciones para train y test.

In [144]:
batch_size=64

def train_loop(train_dataloader, model, loss_fn, optimizer):
    for batch, (X, Y) in enumerate(train_dataloader):
        X = X.to(device) 
        Y = Y.to(device) 
        
        pred = model(X) # Forward, ya ha calculado todos los gradientes
        loss = loss_fn(pred, Y) # Crear la función de costo: error

        loss.backward() # Le pasa el error al gradiente
        optimizer.step() # Actualiza los valores
        optimizer.zero_grad() # Pone el gradiente a 0

def test_loop(test_dataloader, model, loss_fn):
    size = len(test_dataloader.dataset)
    num_batches = len(test_dataloader)
    
    test_loss, correct = 0, 0

    # No calcula el gradiente automaticamente
    with torch.no_grad():
        for X, Y in test_dataloader:
            
            X = X.to(device)
            Y = Y.to(device)
            
            pred = model(X) # Forward
            test_loss += loss_fn(pred, Y).item() # Error
            predicted_labels = pred.argmax(1) 
            correct += (predicted_labels == Y).type(torch.float).sum().item() # Accuracy
            

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    # Confusion matrix
    Y = Y.cpu().numpy() # Tensors to arrays
    predicted_labels = predicted_labels.cpu().numpy()
    cm = confusion_matrix(Y, predicted_labels) # Confusion matrix
    print(f"Confusion Matrix:\n{cm}") # TP FP, TN FN

    TP = cm[0, 0]  # True positive
    TN = cm[1, 1]  # True negative
    FP = cm[0, 1]  # False positive
    FN = cm[1, 0]  # False negative
    specificity = (TN / (TN + FP))*100
    sensitivity = (TP / (TP + FN))*100
    print(f"Sensitivity: {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}\n")


print ()

Llamar a train y test

In [143]:
learning_rate = 0.0001
loss_fn = nn.CrossEntropyLoss()
model = NeuralNetwork().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

torch.save(model.state_dict(), f'./models/model{random.randint(0,10000)}.pth')

Epoch 1
-------------------------------


KeyboardInterrupt: 