## Cargar el dataset
Cargar el dataset que son imagenes que están dentro de carpetas separadas por test y train y cada una separada por maligno y benigno.

Crearemos un dataset en el que por cada fila se incluye el path y si es benigno o maligno. Crearemos uno para el train y otro para el test.

### Crear el dataframe de train

In [354]:
import os
import pandas as pd

# Train
def train_df(train_path):
    classes = [] # Benignas o malignas
    class_paths = [] # Paths de las imagenes
    image_names = [] # Nombre de las imagenes

    files = os.listdir(train_path) # Archivos
    for file in files: 
        label_dir = os.path.join(train_path, file) # Path del directorio
        label = os.listdir(label_dir) # Imagenes del directorio
        for image in label:
            image_names.append(image) 
            class_paths.append(label_dir) 
            classes.append(file) # Añade el nombre del archivo

    # Series de pandas
    class_paths = pd.Series(class_paths, name='Class Path')
    image_names = pd.Series(image_names, name='Image name')
    image_classes = pd.Series(classes, name='Class') 

    # Crea el dataframe
    tr_df = pd.concat([class_paths, image_names, image_classes], axis=1)
    return tr_df

tr_df = train_df('./skin-cancer-dataset/train')
print(len(tr_df))

4001


### Crear el dataframe de test

In [355]:
# Test
def test_df(test_path):
    classes = [] # Benignas o malignas
    class_paths = [] # Ruta a las imagenes
    image_names = [] # Nombre de las imagenes

    files = os.listdir(test_path) # Archivos
    for file in files: 
        label_dir = os.path.join(test_path, file) # Path del directorio
        label = os.listdir(label_dir) # Imagenes dentro del directorio
        for image in label:
            image_names.append(image) 
            class_paths.append(label_dir) 
            classes.append(file) # Añade el nombre del archivo

    # Series de pandas
    class_paths = pd.Series(class_paths, name='Class Path')
    image_names = pd.Series(image_names, name='Image name')
    image_classes = pd.Series(classes, name='Class') 

    # Crea el dataframe
    ts_df = pd.concat([class_paths, image_names, image_classes], axis=1)
    return ts_df

ts_df = test_df('./skin-cancer-dataset/test')
print(len(ts_df))

2000


### Normalizar los datos entre 0 y 1

In [356]:
tr_df['Class'].replace({'Benign': 0, 'Malignant': 1}, inplace=True)
ts_df['Class'].replace({'Benign': 0, 'Malignant': 1}, inplace=True) 
print(tr_df, ts_df)

                                 Class Path Image name  Class
0     ./skin-cancer-dataset/train/Malignant     63.jpg      1
1     ./skin-cancer-dataset/train/Malignant    823.jpg      1
2     ./skin-cancer-dataset/train/Malignant   1409.jpg      1
3     ./skin-cancer-dataset/train/Malignant    189.jpg      1
4     ./skin-cancer-dataset/train/Malignant     77.jpg      1
...                                     ...        ...    ...
3996     ./skin-cancer-dataset/train/Benign    190.jpg      0
3997     ./skin-cancer-dataset/train/Benign   1404.jpg      0
3998     ./skin-cancer-dataset/train/Benign   1410.jpg      0
3999     ./skin-cancer-dataset/train/Benign    184.jpg      0
4000     ./skin-cancer-dataset/train/Benign   1376.jpg      0

[4001 rows x 3 columns]                                 Class Path Image name  Class
0     ./skin-cancer-dataset/test/Malignant   6400.jpg      1
1     ./skin-cancer-dataset/test/Malignant   6366.jpg      1
2     ./skin-cancer-dataset/test/Malignant   637

### Diferenciar los datos
#### Partir las X Y
Indicar al Dataframe train y test cuales son los datos y cuales son los resultados

In [357]:
# Train
trainX = tr_df.values[:, :-1]
trainY = tr_df.values[:, -1]

# Test
testX = ts_df.values[:, :-1]
testY = ts_df.values[:, -1]

### Clase Dataset
Read image te lo convierte en tensor

In [358]:
from torch.utils.data import Dataset
from pathlib import Path
from torchvision.io import read_image

class myDataset(Dataset):
    def __init__(self, X, Y):
        self.image_path = X[:, 0]
        self.image_name = X[:, 1]
        self.Y = Y
        
    def __len__(self):
        return len(self.Y)

    def __getitem__(self, idx):
        return read_image(str(Path(self.image_path[idx]) / self.image_name[idx])), self.Y[idx] # Lo devuelves directamente como una imagen

#### Crear el propi dataset
Passar al train_dataloader i test_dataloader un objecte dataset, nosaltres hem de crear aquet dataset extenent de la clase Dataset.

In [359]:
from torch.utils.data import DataLoader

train_dataset = myDataset(trainX, trainY)
test_dataset = myDataset(testX, testY)

train_dataloader = DataLoader(train_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)

### Crear la red neuronal
1. Crear el dispositivo
2. Definir la clase *Module* con la función forward
3. Crear el modelo y pasarlo a la GPU

In [360]:
from torch import nn

# Ya están implementadas las clases de las capas para hacer el forward
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Conv2d(1, 30, (5, 5)), # 30 imatges de (222x242)*3
            nn.MaxPool2d((3, 3)), # 30 imatges de 74x80

            nn.Conv2d(30, 120, (5, 5)), # 120 imatges de 72x78
            nn.MaxPool2d((3, 3)), # 120 imatges de 14x16

            nn.Conv2d(120, 250, (5, 5)), # 250 imatges de 12x14
            nn.MaxPool2d((3, 3)), # 250 imatges de 4x4
            
            nn.Flatten(),

            nn.Linear(250*4*4, 1000), 
            nn.ReLU(),
            nn.Linear(1000, 100),
            nn.ReLU(),
            nn.Linear(100, 2),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x) # Crea las capas
        return logits
    
# Device
#if torch.backends.mps.is_available():
    #device = "mps"
#else:
device = "cpu"

model = NeuralNetwork().to(device) # Otiene los valores predichos

### Train y test
Definimos las funciones para train y test.

In [361]:
import torch

batch_size=64

def train_loop(train_dataloader, model, loss_fn, optimizer):
    size = len(train_dataloader.dataset)

    for batch, (X, Y) in enumerate(train_dataloader):
        X = X.to(device)
        Y = nn.functional.one_hot(Y, num_classes=2) # One hot
        Y = Y.to(device).to(int)
        
        pred = model(X) # Forward, ya ha calculado todos los gradientes
        loss = loss_fn(pred, Y) # Crear la función de costo: error

        loss.backward() # Le pasa el error al gradiente
        optimizer.step() # Actualiza los valores
        optimizer.zero_grad() # Pone el gradiente a 0

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(test_dataloader, model, loss_fn):
    size = len(test_dataloader.dataset)
    num_batches = len(test_dataloader)
    
    test_loss, correct = 0, 0

    # No calcula el gradiente automaticamente
    with torch.no_grad():
        for X, Y in test_dataloader:
            X = X.to(device)
            Y = Y.to(device)
            
            pred = model(X) # Forward
            test_loss += loss_fn(pred, Y).item() # Error
            correct += (pred.argmax(1) == Y).type(torch.float).sum().item() # Accuracy

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

### Llamar a train y test
En cada epoca hacer un train y un test

In [362]:
learning_rate = 0.01
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


AttributeError: '_OpNamespace' 'image' object has no attribute 'read_file'

### Ejemplo de error

In [None]:
import torch 
import torchvision 
from torchvision.io import read_image 
import torchvision.transforms as T 

# read the png image 
pic = read_image('/Users/cynthia/Developer/Git/Formación/Dev Git ITIC/Artificial Inteligence/Neural Network/Skin-cancer/skin-cancer-dataset/test/Benign/6299.jpg') 
  
# convert this torch tensor to PIL image  
PIL_img = T.ToPILImage()(pic) 
  
# display result 
PIL_img.show()

AttributeError: '_OpNamespace' 'image' object has no attribute 'read_file'