In [59]:
from os import listdir
from tqdm import tqdm
import numpy as np
from PIL import Image
import pandas as pd
import re
import glob
import os
import warnings
from torch.utils.data import Dataset
import torchvision
import cv2
from torchvision import datasets,transforms
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.io import read_image
import torch
from torch import nn
warnings.filterwarnings("ignore")

In [49]:
file = "archive\Brain Tumor\Brain Tumor"
df = pd.read_csv("archive\Brain Tumor.csv")
df["Image"] = df["Image"].apply(lambda x: x+".jpg")

In [60]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, height, width, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)[["Class"]]
        self.img_name =  pd.read_csv(annotations_file)[["Image"]].apply(lambda x: x+".jpg")
        self.img_name = self.img_name
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.height = height  # Debug
        self.width = width

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_name.iloc[idx, 0])
        image = Image.open(img_path)
        # resize = transforms.Resize([self.width, self.height])  # Debug: resize img to let all images in same size.
        # image = resize(image)
        label = self.img_labels.iloc[idx, 0]  # Debug: 要修改成[idx, 0]才是取數值，不會把欄位名稱一起誤抓        
        # print("label:", label)
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [82]:
dataset = CustomImageDataset(annotations_file="archive\Brain Tumor.csv",
                             img_dir="archive\Brain Tumor\Brain Tumor", height = 32, width=32,
                             transform = transforms.Compose([
                                transforms.Resize((32,32)),  # 將圖片從原先大小28x28改成LeNet可以接受的輸入大小32x32
                                transforms.ToTensor(),  # 轉換成tensor並且將像素範圍(range)從[0, 255]改到[0,1]
                                transforms.Normalize(mean = (0.1307,), std = (0.3081,))]))

# pick 1000th img data in dataset
first_data = dataset[1000]
features, labels = first_data
print(features.shape, labels)
print(len(dataset))

torch.Size([3, 32, 32]) 0
3762


In [116]:
val_size, test_size = 0.1, 0.1  # train:val:test=0.8:0.1:0.1
shuffle_dataset = True
random_seed= 42
# Define relevant variables for the ML task
batch_size = 64  # 每個batch有64張圖片
num_classes = 2  # 圖片共分成10種類別
learning_rate = 0.001  # 學習率
num_epochs = 30  #　訓練總共要跑的回合數，一回合(epoch)即將所有訓練數據都掃過一遍

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Create data indices for train & validatin spilt
# Set seed and shuffle, then spilt data to train, validation, test
dataset_size = len(dataset)
indices = list(range(dataset_size))
val_spilt = int(np.floor(val_size * dataset_size))
test_spilt = val_spilt + int(np.floor(test_size * dataset_size))

if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)
    
train_indices, val_indices, test_indices = indices[test_spilt:], indices[:val_spilt], indices[val_spilt:test_spilt]

In [117]:
# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)


train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=val_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=test_sampler)

In [118]:
#Defining the convolutional neural network
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Linear(400, 120)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(120, 84)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(84, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.relu(out)
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

In [119]:
model = LeNet5(num_classes).to(device)
cost = nn.CrossEntropyLoss()  # 交叉墒損失函數，適用多分類任務的損失函數
#Setting the optimizer with the model parameters and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # Adam優化器
#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

In [120]:
total_step = len(train_loader)
for epoch in range(num_epochs):  # 總共進行共num_epochs個回合的訓練
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)  # 將tensor移動到GPU或CPU上訓練
        labels = labels.to(device)
        
        # 前向傳播(Forward pass)
        outputs = model(images)
        loss = cost(outputs, labels)
        	
        # 反向傳播(Backward pass) and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        		
        if (i+1) % 48 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/30], Step [48/48], Loss: 0.2751
Epoch [2/30], Step [48/48], Loss: 0.0371
Epoch [3/30], Step [48/48], Loss: 0.4480
Epoch [4/30], Step [48/48], Loss: 0.2909
Epoch [5/30], Step [48/48], Loss: 0.9025
Epoch [6/30], Step [48/48], Loss: 0.0319
Epoch [7/30], Step [48/48], Loss: 0.0054
Epoch [8/30], Step [48/48], Loss: 0.0406
Epoch [9/30], Step [48/48], Loss: 2.4273
Epoch [10/30], Step [48/48], Loss: 0.0385
Epoch [11/30], Step [48/48], Loss: 0.0381
Epoch [12/30], Step [48/48], Loss: 1.2239
Epoch [13/30], Step [48/48], Loss: 0.2187
Epoch [14/30], Step [48/48], Loss: 0.1091
Epoch [15/30], Step [48/48], Loss: 0.0088
Epoch [16/30], Step [48/48], Loss: 0.0001
Epoch [17/30], Step [48/48], Loss: 0.0064
Epoch [18/30], Step [48/48], Loss: 0.0378
Epoch [19/30], Step [48/48], Loss: 0.0004
Epoch [20/30], Step [48/48], Loss: 0.2371
Epoch [21/30], Step [48/48], Loss: 0.0083
Epoch [22/30], Step [48/48], Loss: 0.0102
Epoch [23/30], Step [48/48], Loss: 0.0108
Epoch [24/30], Step [48/48], Loss: 0.0776
E

In [121]:
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)  # 總共預測的圖片張數
        correct += (predicted == labels).sum().item()  # 統計預測正確的圖片張數

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))  # 模型在測試資料的預測準確率(accuracy)
	 

Accuracy of the network on the 10000 test images: 95.47872340425532 %


In [127]:
from torchsummary import summary
summary(model, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
       BatchNorm2d-2            [-1, 6, 28, 28]              12
              ReLU-3            [-1, 6, 28, 28]               0
         MaxPool2d-4            [-1, 6, 14, 14]               0
            Conv2d-5           [-1, 16, 10, 10]           2,416
       BatchNorm2d-6           [-1, 16, 10, 10]              32
              ReLU-7           [-1, 16, 10, 10]               0
         MaxPool2d-8             [-1, 16, 5, 5]               0
            Linear-9                  [-1, 120]          48,120
             ReLU-10                  [-1, 120]               0
           Linear-11                   [-1, 84]          10,164
             ReLU-12                   [-1, 84]               0
           Linear-13                    [-1, 2]             170
Total params: 61,370
Trainable params: 