<a href="https://colab.research.google.com/github/YuannongMao01/CxC_Goldspot/blob/main/Goldspot_colab_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CxC Goldspot chellenge
### In this project used 3D Convolutional neural networks (3D CNN) based on PyTorch framework

# Util

In [2]:
from pathlib import Path
from random import shuffle
import pandas as pd
import torch
import numpy as np
# import torch.utils.data.Subset 
from PIL import Image
from torchvision.transforms import transforms
from torch.utils.data.dataset import Dataset

In [3]:
class IamgesWithLabelsDataset(Dataset):
    """
    Given each of image the corresponding 
    labels into three class(Fractured, Intact, Shattered)
    """

    def __init__(self, root, transform=None) -> None:
        super().__init__()
        self.label2index = {'Fractured': 0, 'Intact': 1, 'Shattered': 2}  

        self.transform = transforms.Compose([
            transforms.ToTensor(),  
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])  # Standardization
        ])

        root = '/content/drive/MyDrive/GoldSpot_Challenge_Waterloo_Dataset/'

        self.root = root

        images_path = Path(root + 'images/')

        images_list = list(
            images_path.glob('*.png'))  # Get the path of each of images
        images_list_str = [str(x) for x in images_list]  
        images_list_str.sort(
            key=lambda item: int(item[len(root + 'images/') - 0:-4]))
        self.images = images_list_str


    def __getitem__(self, item):
        """
        Arg: # of image
        Return:
            frames:shape->[patches_size,2*step,channel,width,height]
            label:shape->[patches_size,....]
        """
        image_path = self.images[item]

        labels_path = self.root + 'labels/' + str(item) + '.csv'   
        data = pd.read_csv(labels_path, header=0)
        id = data.loc[0, 'image_id']  
        label = data.loc[:, 'class']  
        for i in range(len(label)):
            label[i] = self.label2index[label[i]]   #convert strings in class to number
        label = torch.tensor(label)

        # Segmentation
        start_pixel = data.loc[:, 'start_pixel']
        end_pixel = data.loc[:, 'end_pixel']

        image = Image.open(image_path)  # Read RGB images
        width, height = image.size

        # Segmentation and resize the images
        image_patches = [
            image.crop(box=(start, 0, end, height)).resize((128, 171)) 
            for start, end in zip(start_pixel, end_pixel)   
        ]

        image_patches = [
            self.transform(_image_patch) for _image_patch in image_patches   
        ]  

        frames = []

        step = 1
        image_patches = [image_patches[0]] * step + image_patches + [image_patches[-1]] * step 

        for i in range(step, len(image_patches) - step):
            frame = image_patches[i - step:i + step + 1]   #There are 2*step+1 element in list
            frame = np.array([item.numpy() for item in frame])
            frames.append(frame)  
        frames = np.array(frames)

        return frames, label

    def __len__(self):
        return len(self.images)


In [4]:
def onehot2label(onehot):
    table = {
        0: 'Fractured',
        1: 'Intact',
        2: 'Shattered',
    }

    # onehot = onehot[0]
    index = onehot.item()
    return table[index]

In [5]:
class Data(Dataset):
    """
    Converting to Iterative ZIP file
    """

    def __init__(self, data) -> None:
        super().__init__()
        self.data = list(data)
        self.data = [list(item) for item in self.data]

    def add(self, frame, label):
        self.data.append((frame, label))

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)



In [6]:
def split_train_test(data):
    dataset_frame = []
    dataset_label = []
    for frames, labels in data:
        for frame, label in zip(frames, labels):
            dataset_frame.append(frame)
            dataset_label.append(label)

    # dataset_frame = np.array(dataset_frame)
    # dataset_label = np.array(dataset_label)
    # dataset = np.concatenate([dataset_frame, dataset_frame], axis=1)
    train_size = int(0.7 * len(dataset_frame))

    # Create train/test dataset
    train = list(zip(dataset_frame[:train_size], dataset_label[:train_size]))
    test = Data(zip(dataset_frame[train_size:], dataset_label[train_size:]))

    fractured_num = 0
    shattered_num = 0
    for i in dataset_label[:train_size]:
        if (i == 0): fractured_num += 1
        elif (i == 2): shattered_num += 1

    fractured_weight = train_size / fractured_num
    shattered_weight = train_size / shattered_num
    weights = [fractured_weight, shattered_weight]
    looptrain = [item for item in train]  
    for (frame, label) in looptrain:
        for _ in range(
                1,
                int(weights[0 if 'Fractured' == onehot2label(label) else 1])):
            train.append((frame, label))
            
    # random shuffle
    shuffle(train)
    train = Data(train)

    return train, test

# 3D-CNN Model

In [7]:
import torch.nn as nn
import torch.nn.functional as F


class C3D(nn.Module):
    """
    The C3D network.
    """

    def __init__(self, num_classes):
        super(C3D, self).__init__()

        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        # self.relu = nn.ReLU()
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.conv2 = nn.Conv3d(64,
                               128,
                               kernel_size=(3, 3, 3),
                               padding=(1, 1, 1))
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv3a = nn.Conv3d(128,
                                256,
                                kernel_size=(3, 3, 3),
                                padding=(1, 1, 1))
        self.conv3b = nn.Conv3d(256,
                                256,
                                kernel_size=(3, 3, 3),
                                padding=(1, 1, 1))
        self.pool3 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(2, 2, 2))

        self.conv4a = nn.Conv3d(256,
                                512,
                                kernel_size=(3, 3, 3),
                                padding=(1, 1, 1))
        self.conv4b = nn.Conv3d(512,
                                512,
                                kernel_size=(3, 3, 3),
                                padding=(1, 1, 1))
        self.pool4 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(2, 2, 2))

        self.conv5a = nn.Conv3d(512,
                                512,
                                kernel_size=(2, 3, 3),
                                padding=(1, 1, 1))
        self.conv5b = nn.Conv3d(512,
                                512,
                                kernel_size=(3, 3, 3),
                                padding=(1, 1, 1))
        self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2),
                                  stride=(2, 2, 2),
                                  padding=(0, 1, 1))

        self.fc6 = nn.Linear(512 * 6 * 5, 512)
        self.fc7 = nn.Linear(512, 100)
        self.fc8 = nn.Linear(100, num_classes)

        self.dropout = nn.Dropout(p=0.5)

        self.relu = nn.ReLU(inplace=True)  

        # self.sigmoid = nn.Sigmoid()

    def forward(self, x):

        x = self.relu(self.conv1(x))  

        x = self.pool1(x)  


        x = self.relu(self.conv2(x))  

        x = self.pool2(x)  


        x = self.relu(self.conv3a(x)) 

        x = self.relu(self.conv3b(x))  

        x = self.pool3(x)  


        x = self.relu(self.conv4a(x)) 
     
        x = self.relu(self.conv4b(x))  

        x = self.pool4(x)  
       

        x = self.relu(self.conv5a(x)) 
  
        x = self.relu(self.conv5b(x))  

        x = self.pool5(x)  

        x = x.view(x.shape[0], 512 * 6 * 5)  

        x = self.relu(self.fc6(x))

        x = self.dropout(x)
        x = self.relu(self.fc7(x))
        x = self.dropout(x)

        logits = self.fc8(x)

        return logits


# Training the Model

In [8]:
from tqdm import tqdm
import torch.optim as optim
from torch.utils.data.dataloader import DataLoader

In [10]:
# Enables benchmark mode in cudnn
# torch.backends.cudnn.benchmark = t = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
is_relearn = False
VERSION = 2.0   

root = '/content/drive/MyDrive/GoldSpot_Challenge_Waterloo_Dataset/'

dataset = IamgesWithLabelsDataset(root)   
train_dataset, test_dataset = split_train_test(dataset)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [11]:
loader_dataset = DataLoader(train_dataset,
                            batch_size=10,   
                            shuffle=True,
                            num_workers=0)

In [13]:
model = C3D(3).to(device)

accumulate_steps = 2  
lr = 0.003
momentum = 0.9
EPOCH = 12
EPOCH_START = 0
average_loss = []
if is_relearn:
    EPOCH_START = 4
    PATH = f'/content/drive/MyDrive/GoldSpot_Challenge_Waterloo_Dataset/{VERSION}-{EPOCH_START}-c3d.pt'
    lr = 0.0003
    m_state_dict = torch.load(PATH)
    model.load_state_dict(m_state_dict)

optimizer = optim.SGD(model.parameters(),
                      lr=lr,
                      momentum=momentum,
                      weight_decay=5e-4)  

crossEntropyLoss = nn.CrossEntropyLoss()

losses = pd.DataFrame([],
                      columns=['Train Epoch', 'lr', 'loss', 'average_loss'])
lables = []

In [14]:
# training the model
for epoch in range(EPOCH_START, EPOCH):
    if (epoch + 1) % 4 == 0: lr /= 10
    pbar = tqdm(loader_dataset, total=len(loader_dataset))
    for cnt, batch in enumerate(pbar):
        frame, label = batch
        frame = frame.permute(0, 2, 1, 3, 4).to(device)
        prd = model(frame)
        # Expected floating point type for target with class probabilities, got Long
        loss = crossEntropyLoss(prd.to('cpu').float(), label)
        loss.backward()
        average_loss.append(loss.item())
        losses = losses.append(
            {
                'Train Epoch': f'{epoch + 1 } / {EPOCH}',
                'lr': lr,
                'loss': loss.item(),
                'average_loss': round(np.mean(average_loss), 4)
            },
            ignore_index=True)
        pbar.set_description(
            f'Train Epoch:{epoch + 1}/{EPOCH} lr:{lr} train_loss:{loss.item()} average_train_loss:{round(np.mean(average_loss), 4)}'
        )
        
        #To solve CUDA error: out of memory
        if (cnt + 1) % accumulate_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
        del loss
    if (epoch + 1) % 4 == 0:
        # The model are saved every four epochs
        PATH = f'/content/drive/MyDrive/GoldSpot_Challenge_Waterloo_Dataset/{VERSION}-{epoch + 1}-c3d.pt'
        torch.save(model.state_dict(), PATH)
        losses.to_csv(
            f'/content/drive/MyDrive/GoldSpot_Challenge_Waterloo_Dataset/{VERSION}-losses-lr-{lr} momentum-{momentum} epoch-{epoch}.csv'
        )
        del losses
        losses = pd.DataFrame(
            [], columns=['Train Epoch', 'lr', 'loss', 'average_loss'])
        del average_loss
        average_loss = []

Train Epoch:1/12 lr:0.003 train_loss:0.747889518737793 average_train_loss:0.8658: 100%|██████████| 255/255 [00:54<00:00,  4.65it/s]
Train Epoch:2/12 lr:0.003 train_loss:0.7267696857452393 average_train_loss:0.7972: 100%|██████████| 255/255 [00:57<00:00,  4.46it/s]
Train Epoch:3/12 lr:0.003 train_loss:0.7111523151397705 average_train_loss:0.7668: 100%|██████████| 255/255 [00:57<00:00,  4.47it/s]
Train Epoch:4/12 lr:0.00030000000000000003 train_loss:0.6944613456726074 average_train_loss:0.7513: 100%|██████████| 255/255 [00:57<00:00,  4.45it/s]
Train Epoch:5/12 lr:0.00030000000000000003 train_loss:0.686608076095581 average_train_loss:0.6996: 100%|██████████| 255/255 [00:57<00:00,  4.45it/s]
Train Epoch:6/12 lr:0.00030000000000000003 train_loss:0.6827861666679382 average_train_loss:0.701: 100%|██████████| 255/255 [00:57<00:00,  4.47it/s]
Train Epoch:7/12 lr:0.00030000000000000003 train_loss:0.6751766800880432 average_train_loss:0.7011: 100%|██████████| 255/255 [00:57<00:00,  4.46it/s]
Trai

# Testing the model

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# EPOCH:12 VERSION:2.0——Accuracy is 36.8%
# EPOCH:16 VERSION:2.0—— Accuracy is 18%
# EPOCH:8 VERSION:3.0——Accuracy is 19%    
EPOCH = 12
VERSION = 2.0

dataset = IamgesWithLabelsDataset('/content/drive/MyDrive/GoldSpot_Challenge_Waterloo_Dataset/')
train_dataset, test_dataset = split_train_test(dataset)
loader_dataset = DataLoader(test_dataset,
                            batch_size=1,
                            shuffle=True,
                            num_workers=0)

model = C3D(3)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [19]:
# Loading the model
PATH = f'/content/drive/MyDrive/GoldSpot_Challenge_Waterloo_Dataset/{VERSION}-{EPOCH}-c3d.pt'
m_state_dict = torch.load(PATH)
model.load_state_dict(m_state_dict)

pbar = tqdm(loader_dataset, total=len(loader_dataset))

T = 0
A = 0
for cnt, batch in enumerate(pbar):
    frame, label = batch
    frame = frame.permute(0, 2, 1, 3, 4)
    prd = F.softmax(model(frame))
    target = onehot2label(label[0])
    source = onehot2label(torch.argmax(prd[0]))
    if (target == source): T += 1
    A += 1
    pbar.set_description(f'True:{target},Predict:{source},Accuracy:{T*100/A}%')


  del sys.path[0]
True:Fractured,Predict:Fractured,Accuracy:67.24436741767764%: 100%|██████████| 577/577 [02:20<00:00,  4.11it/s]
