In [1]:
!pip install imageio
!pip install torchmetrics
!pip install -U ray[data,train,tune,serve]
!pip install -U torchvision



In [2]:
import os
import torch
import imageio.v3 as iio
import numpy as np
import matplotlib.pyplot as plt
import torchmetrics 
import os 
from torch import nn, optim
from torchvision.io import read_image
from sys import getsizeof
from torch.utils.data import random_split,Dataset,DataLoader 
from torchvision import transforms, tv_tensors
from torchvision.transforms import v2
import random 
import torch.nn.functional as F
import PIL.Image

In [3]:
train_location = "./Project Data/Dataset_Student/train/"
val_location = "./Project Data/Dataset_Student/val/"

In [4]:
class Semantic_Segmentation_Dataset(Dataset):
    def __init__(self, mask_list,input_transform = None,target_transform = None):
        self.mask_list = mask_list
        self.input_transform = input_transform 
        self.target_transform = target_transform 

    def __len__(self):
        return len(self.mask_list)
    

    def transform(self, image,mask):
        if self.input_transform:
            image = self.input_transform(image)
        if self.target_transform:
            mask = self.target_transform(mask)

        identical_transform = v2.Compose([
            v2.ToImage(),
            v2.RandomCrop(size=(160,240),padding=(40,60),padding_mode='edge'),
            v2.RandomHorizontalFlip(p=.5),
            v2.RandomVerticalFlip(p=.5),
            v2.RandomRotation(degrees=45),
        ])

        new_img,new_msk = identical_transform((image,mask)
                                      )
        return new_img,new_msk
    
    def __getitem__(self, idx):
        image = self.mask_list[idx][0]
        target = self.mask_list[idx][1]

        if self.input_transform:
            image = self.input_transform(image)
        if self.target_transform:
            target = self.target_transform(target)

        image,target = self.transform(image,target)

        return image, target

**Loading Data**

In [5]:
def split_video_mask(video_folder):
    '''split_video_mask Creats a list of tuples (image,mask)

    :param video_folder: directory to video folder
    :type video_folder: path
    :return: list of tuples of form (pytorch tensor, pytorch tensor) (size [3, 160, 240],[160, 240])
    '''
    directory = video_folder
    
    img_mask_dict = {}
    list_of_segments = []

    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        if filename.endswith(".png"):
            index = str(os.path.basename(file)[6:-4])
            img = tv_tensors.Image(PIL.Image.open(os.path.join(directory,file)))
            img_mask_dict[index] = img.to(torch.get_default_dtype())
 

        if filename.endswith(".npy"):
            #print(os.path.join(directory, filename))
            mask = torch.from_numpy(np.load(os.path.join(directory,file))).to(torch.get_default_dtype())
            mask = tv_tensors.Image(mask)
            

    for key,val in img_mask_dict.items():
        list_of_segments.append((val,tv_tensors.Image(mask[int(key)])))

    
    return list_of_segments

In [6]:
def load_data(video_folder):
    train_folder = video_folder
    
    subdir_list = os.listdir(train_folder)
    mask_list = []

    for subdir in subdir_list:
        train_mask_list = split_video_mask(os.path.join(train_folder,subdir))
        mask_list = mask_list + train_mask_list


    data_sample = random.sample(mask_list,400)
    input_mean = torch.mean(torch.Tensor(data_sample[0][0]),dim=[1,2])
    input_std = torch.std(torch.Tensor(data_sample[0][0]),dim=[1,2])


    height = mask_list[0][0].shape[1]
    width = mask_list[0][0].shape[2]

    input_transform = v2.Normalize(input_mean,input_std)


    ss_dataset = Semantic_Segmentation_Dataset(mask_list[:63], 
                                            input_transform=input_transform,
                                            target_transform=None
                                            )

    generator = torch.Generator().manual_seed(10)

    train_set, test_set = random_split(ss_dataset,[.7, .3],generator=generator)

    return train_set, test_set

In [7]:
class PracticeNet(nn.Module):
    def __init__(self, chan_1=8):
        super(PracticeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, chan_1, kernel_size=7,padding=3)
        self.conv2 = nn.Conv2d(chan_1, 49, kernel_size=5,padding=2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        return x.view(-1,49,160,240)

In [12]:
def train_practice(hyperparameters, video_folder=None):
    #net = PracticeNet(hyperparameters["chan_1"])
    net = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', weights=None)
    net.classifier._modules['4'] = torch.nn.Conv2d(256, 49, kernel_size=(1, 1), stride=(1, 1))
    optimizer = optim.Adam(net.parameters(),lr=hyperparameters["lr"])
    
    if os.path.isfile("./practice_stop.pth") and os.path.getsize("./practice_stop.pth") > 0:
        checkpoint = torch.load("./practice_stop.pth")
        net.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch']
        running_loss = checkpoint['loss']
        best_val_jac = checkpoint['best_val_jac']
        print("Resuming training from epoch: ",start_epoch)
    else: 
        best_val_jac = 0
        start_epoch = 0 
        running_loss = 0 

    jaccard = torchmetrics.JaccardIndex(task="multiclass", num_classes=49)
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)

    net.to(device)

    criterion = jaccard
    
    
    train_subset, val_subset = load_data(video_folder)
    height = train_subset[0][1].shape[0]
    width = train_subset[0][1].shape[1]
    print("data loaded")
    trainloader = torch.utils.data.DataLoader(
        train_subset, batch_size=int(hyperparameters["batch_size"]), shuffle=True, drop_last=True,
    )
    valloader = torch.utils.data.DataLoader(
        val_subset, batch_size=len(val_subset), 
    )

    for epoch in range(start_epoch,hyperparameters["max_epochs"]):
        print(epoch)
        net.train()
        batch_loss = 0
        count = 1
        for batch in trainloader:
            count += 1
            data = batch[0].to(device)
            labels = batch[1].to(device).squeeze()
            optimizer.zero_grad()

            predicted = torch.nn.functional.softmax(net(data)['out'],dim=1)
            loss = jaccard(predicted,labels).requires_grad_(True)
            batch_loss += batch_loss + loss.item()
            #loss.requires_grad =  True
            loss.backward()
            optimizer.step
            
            torch.save({'epoch': epoch+1,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': running_loss,
            'best_val_jac':best_val_jac,
            }, "practice_stop.pth")


        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        batch_cnt = 1

        for batch in valloader:
            with torch.no_grad():
                data = batch[0].to(device)
                labels = batch[1].to(device).squeeze()

      
                predicted = F.softmax(net(data)['out'],dim=1)
                predicted = torch.argmax(predicted,dim=1)       #(Shape N,H,W)
     
                loss = criterion(predicted, labels)
                val_jac = 100 * jaccard(predicted,labels)
                # Save the best model
                if val_jac > best_val_jac:
                    best_val_jac = val_jac
                    torch.save({'epoch': epoch+1,
                    'model_state_dict': net.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': running_loss,
                    'best_val_jac':best_val_jac,
                    }, "practice_best.pth")

                val_loss += loss.cpu().numpy()
                val_steps += 1
                batch_size = predicted.shape[0]
                correct = 0 
                
                for k in range(batch_size):
                    for i in range(height):
                        for j in range(width):
                            if predicted[k][i][j].item() == labels[k][i][j].item():
                                correct += 1  
                    
            val_jac = 100 * jaccard(predicted,labels)
            print(f"Epoch: {epoch + 1}, Validation Accuracy (Jaccard): {val_jac:.2f}%")

           
            batch_cnt += 1
            total = batch_cnt * height * width
            print("epoch, pixel acc = ",100*correct/total)


            if epoch == 5:
                print("exiting at epoch ",epoch)
                raise SystemExit(1)
    print(running_loss)
    print("Finished Training")

In [11]:
hyperparamters = {"chan_1":16,"batch_size":2,"max_epochs":20,"lr":0.001}
data_dir = train_location
train_practice(hyperparamters,video_folder=data_dir)

Using cache found in C:\Users\Miles/.cache\torch\hub\pytorch_vision_v0.10.0


data loaded
0
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
Epoch: 1, Validation Accuracy (Jaccard): 0.07%
epoch, pixel acc =  11.875
1
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size([2, 49, 160, 240])
torch.Size

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [13]:
train_practice(hyperparamters,video_folder=data_dir)

Using cache found in C:\Users\Miles/.cache\torch\hub\pytorch_vision_v0.10.0


Resuming training from epoch:  6
data loaded
6
Epoch: 7, Validation Accuracy (Jaccard): 0.08%
epoch, pixel acc =  8.4375
7
Epoch: 8, Validation Accuracy (Jaccard): 0.06%
epoch, pixel acc =  2.1875
8
Epoch: 9, Validation Accuracy (Jaccard): 0.10%
epoch, pixel acc =  5.0
9
Epoch: 10, Validation Accuracy (Jaccard): 0.08%
epoch, pixel acc =  1.5625
10
Epoch: 11, Validation Accuracy (Jaccard): 0.05%
epoch, pixel acc =  6.5625
11
Epoch: 12, Validation Accuracy (Jaccard): 0.10%
epoch, pixel acc =  8.125
12
Epoch: 13, Validation Accuracy (Jaccard): 0.04%
epoch, pixel acc =  3.4375
13
Epoch: 14, Validation Accuracy (Jaccard): 0.10%
epoch, pixel acc =  6.875
14
Epoch: 15, Validation Accuracy (Jaccard): 0.05%
epoch, pixel acc =  7.1875
15
Epoch: 16, Validation Accuracy (Jaccard): 0.06%
epoch, pixel acc =  1.875
16
Epoch: 17, Validation Accuracy (Jaccard): 0.04%
epoch, pixel acc =  1.25
17
Epoch: 18, Validation Accuracy (Jaccard): 0.07%
epoch, pixel acc =  12.5
18
Epoch: 19, Validation Accuracy (J