In [1]:
# Imports 
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import cv2
import os
import json
import math
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import DataLoader
from torchmetrics import F1Score,JaccardIndex
from torch_poly_lr_decay import PolynomialLRDecay
# Set seed for randomize functions (Ez reproduction of results)
random.seed(100)

# Import TuSimple loader
import sys
sys.path.insert(0,'../resources/')
from tusimple import TuSimple
from mask_transformer import MaskTransformer
from vit import ViT
import utils
from linear import DecoderLinear
from mlp_decoder import DecoderMLP

  warn(f"Failed to load image Python extension: {e}")


In [2]:
# ROOT DIRECTORIES
root_dir = os.path.dirname(os.getcwd())
annotated_dir = os.path.join(root_dir,'datasets/tusimple/train_set/annotations')
clips_dir = os.path.join(root_dir,'datasets/tusimple/train_set/')
annotated = os.listdir(annotated_dir)

# Get path directories for clips and annotations for the TUSimple dataset + ground truth dictionary
annotations = list()
for gt_file in annotated:
    path = os.path.join(annotated_dir,gt_file)
    json_gt = [json.loads(line) for line in open(path)]
    annotations.append(json_gt)
    
annotations = [a for f in annotations for a in f]

In [3]:
dataset = TuSimple(train_annotations = annotations, train_img_dir = clips_dir, resize_to = (640,640), subset_size = 0.001, val_size= 0.2)

# Create train and validation splits / Always use del dataset to free memory after this
train_set, validation_set = dataset.train_val_split()
del dataset


In [4]:
# Segmenter pipeline class for training with Dice loss and lightning wrapper
class Segmenter(nn.Module):
    def __init__(self,encoder, decoder, image_size = (640,640), output_act = nn.Sigmoid()):
        super().__init__()
        self.patch_size = encoder.patch_size
        self.encoder = encoder
        self.decoder = decoder
        self.image_size = image_size
        self.lane_threshold = 0.5
        self.output_act = output_act
        
        
    # Forward pass of the pipeline
    def forward(self, im):
        H, W = self.image_size
        
        # Pass through the pre-trained vit backbone
        x = self.encoder(im, return_features=True)
        
        # Pass through the masks transformer
        masks = self.decoder(x)
        
        print(masks)
        
        # Interpolate patch level class annotatations to pixel level and transform to original image size
        masks = F.interpolate(masks, size=(H, W), mode="bilinear")
        
        # Training time
        if self.training:
            class_prob_masks = self.output_act(masks)
            # predictions = torch.where(class_prob_masks > self.lane_threshold, torch.ones_like(class_prob_masks), torch.zeros_like(class_prob_masks)).requires_grad_()
            return class_prob_masks
        # Evaluation time
        else:
            act = self.output_act
            class_prob_masks = act(masks)
            predictions = torch.where(class_prob_masks > self.lane_threshold, torch.ones_like(class_prob_masks), torch.zeros_like(class_prob_masks))
            return predictions
        
    # Count pipeline trainable parameters
    def count_parameters(self):
        return sum(p.numel() for p in self.parameters() if p.requires_grad)
    
    # Load trained model
    def load_segmenter(self):
        self.load_state_dict(torch.load("path/to/save/model.pth"))
    


In [5]:
import pytorch_lightning as pl

class LightningSegmenter(pl.LightningModule):
    def __init__(self, encoder, decoder, loss_fn, lr):
        super().__init__()
        self.model = Segmenter(encoder, decoder)
        self.loss_fn = loss_fn
        self.lr = lr
        self.f1 = F1Score(task="binary")
        self.iou_score = JaccardIndex(task= 'binary')

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_fn(y_hat, y)
        f1_train = self.f1(y_hat,y)
        iou_train = self.iou_score(y_hat,y)
        self.log('train_loss', loss)
        self.log('f1_train', f1_train)
        self.log('iou_train', iou_train)
        return {'loss': loss, 'f1_train': f1_train, 'iou_train': iou_train}

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_fn(y_hat, y)
        f1_val = self.f1(y_hat,y)
        iou_val = self.iou_score(y_hat,y)
        self.log('f1_val', f1_val)
        self.log('iou_val', iou_val)
        self.log('val_loss', loss)
        
    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        avg_f1 = torch.stack([x['f1_train'] for x in outputs]).mean()
        avg_iou = torch.stack([x['iou_train'] for x in outputs]).mean()
        self.log('avg_train_loss', avg_loss, prog_bar=True)
        self.log('avg_f1_train', avg_f1, prog_bar=True)
        self.log('avg_iou_train', avg_iou, prog_bar=True)

        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

train_loader = DataLoader(train_set, batch_size=2, drop_last=True)
val_loader = DataLoader(validation_set, batch_size=2,drop_last=True)

encoder = ViT(image_size=640, patch_size=16, num_classes=1, dim=768, depth=12, heads=12, 
            mlp_dim=3072, dropout=0.1, load_pre=True, pre_trained_path='../pre-trained/jx_vit_base_p16_224-80ecf9dd.pth')
encoder.freeze_all_but_some([])
decoder = DecoderLinear(n_classes=1, d_encoder=768)

model = LightningSegmenter(encoder, decoder, loss_fn=utils.dice_loss, lr=0.001)

trainer = pl.Trainer(max_epochs=4, gpus=None)# set gpus to None if you're not using a GPU
trainer.fit(model, train_loader, val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | Segmenter          | 86.9 M
1 | f1        | BinaryF1Score      | 0     
2 | iou_score | BinaryJaccardIndex | 0     
-------------------------------------------------
769       Trainable params
86.9 M    Non-trainable params
86.9 M    Total params
347.505   Total estimated model params size (MB)


Succesfully created ViT with pre-trained weights...!


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  return self._grad


None
True
tensor([[[[-1.0540, -0.6615, -1.2019,  ..., -1.2406, -1.0157, -1.1614],
          [-1.5740, -1.0123, -0.7280,  ..., -0.9680, -0.9230, -1.1349],
          [-0.9501, -0.5610, -0.9685,  ..., -1.2770, -0.3816, -1.2686],
          ...,
          [-0.9270, -0.9515, -1.2219,  ..., -1.0449, -1.0171, -1.2321],
          [-0.7034, -0.9283, -1.0665,  ..., -1.3287, -0.9953, -1.3737],
          [-1.1453, -0.4749, -0.9549,  ..., -0.9806, -0.6495, -0.9634]]],


        [[[-1.1662, -1.1435, -0.9589,  ..., -1.0355, -1.1787, -1.1938],
          [-1.0234, -0.8492, -1.0747,  ..., -0.5613, -1.2337, -1.5813],
          [-1.2061, -1.0188, -0.4910,  ..., -1.0029, -1.1019, -0.9828],
          ...,
          [-1.2234, -1.0625, -1.0508,  ..., -1.1040, -0.8804, -1.1368],
          [-0.5823, -1.0939, -1.0164,  ..., -0.9710, -1.0670, -0.9950],
          [-1.2920, -1.2104, -0.2918,  ..., -1.2418, -0.8027, -0.6721]]]],
       grad_fn=<ReshapeAliasBackward0>)
tensor([[[[-1.0540, -1.0540, -1.0540,  ..., -1.16

AttributeError: 'int' object has no attribute 'sum'