<a href="https://colab.research.google.com/github/anh1811/trajectory-prediction/blob/main/Transformer_encoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import torch
from torch.utils.data import TensorDataset, DataLoader,  SequentialSampler
from sklearn.model_selection import train_test_split
from torch import nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm.notebook import tqdm
from torch.nn import functional as F
from torch.utils.tensorboard import SummaryWriter
from time import time
from prettytable import PrettyTable

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#DataSet

In [None]:
# %cd content
# %cd /content/drive/MyDrive/viettle/miniproject/sequencer
%cd /content/drive/MyDrive

/content/drive/MyDrive


In [None]:
from pycocotools.coco import COCO
import os
root_dir = 'tracking'
coco_annotation_file_path = os.path.join(root_dir, 'train.json')
coco_test_annotation_file_path = os.path.join(root_dir, 'val.json')
coco_annotation = COCO(annotation_file=coco_annotation_file_path)
coco_test_annotation = COCO(annotation_file=coco_test_annotation_file_path)

loading annotations into memory...
Done (t=5.99s)
creating index...
index created!
loading annotations into memory...
Done (t=1.44s)
creating index...
index created!


In [None]:
def xywh2xyxy(bbox):
  return bbox[0], bbox[1], bbox[2] + bbox[0], bbox[3] + bbox[1]


def xywh2cxcy(bbox):
  return bbox[0] + bbox[2]/2.0, bbox[1] + bbox[3]/2.0, bbox[2], bbox[3]

def Euclipe_dis(bbox1, bbox2):
  return np.sqrt((bbox1[0] - bbox2[0])**2 + (bbox1[1] - bbox2[1])**2)

def scale(bbox, h_pic = 1920, w_pic = 2560):
  if len(bbox) == 2:
    return bbox[0]/w_pic, bbox[1]/h_pic
  else: 
    return bbox[0]/w_pic, bbox[1]/h_pic, bbox[2]/w_pic, bbox[3]/h_pic

def checknois(bboxes):
  num_noise = 0
  for i in range(len(bboxes)-1):
    if Euclipe_dis(bboxes[i], bboxes[i+1]) > 0.1:
      num_noise += 1
  if num_noise >= 2:
    return True
  else:
    return False

def xywh2cxcyah(bbox):
  return bbox[0] + bbox[2]/2.0, bbox[1] + bbox[3]/2.0, bbox[3]/bbox[2], bbox[3]

 
def batch_xyxy2cxcy(bboxes):
  batch_size = bboxes.size(0)
  bbox_cxcy = torch.empty((batch_size,2), dtype=torch.float32)
  bbox_cxcy[:,0] = (bboxes[:,0] + bboxes[:,2])/2.
  bbox_cxcy[:,1] = (bboxes[:,1] + bboxes[:,3])/2.
  return bbox_cxcy


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def generalized_iou(gt_bboxes, pr_bboxes, reduction='mean'):
    """
    gt_bboxes: tensor (-1, 4) xyxy
    pr_bboxes: tensor (-1, 4) xyxy
    loss proposed in the paper of giou
    """
    gt_area = (gt_bboxes[:, 2]-gt_bboxes[:, 0])*(gt_bboxes[:, 3]-gt_bboxes[:, 1])
    pr_area = (pr_bboxes[:, 2]-pr_bboxes[:, 0])*(pr_bboxes[:, 3]-pr_bboxes[:, 1])

    # iou
    lt = torch.max(gt_bboxes[:, :2], pr_bboxes[:, :2])
    rb = torch.min(gt_bboxes[:, 2:], pr_bboxes[:, 2:])
    TO_REMOVE = 0
    wh = (rb - lt + TO_REMOVE).clamp(min=0)
    inter = wh[:, 0] * wh[:, 1]
    union = gt_area + pr_area - inter
    iou = inter / union
    # # enclosure
    # lt = torch.min(gt_bboxes[:, :2], pr_bboxes[:, :2])
    # rb = torch.max(gt_bboxes[:, 2:], pr_bboxes[:, 2:])
    # wh = (rb - lt + TO_REMOVE).clamp(min=0)
    # enclosure = wh[:, 0] * wh[:, 1]

    # giou = iou - (enclosure-union)/enclosure
    # loss = 1. - giou
    if reduction == 'mean':
        iou = iou.mean()
    elif reduction == 'sum':
        iou =iou.sum()
    elif reduction == 'none':
        pass
    return iou



In [None]:
# h = int(coco_annotation.loadImgs(1)[0]['height'])
# w = int(coco_annotation.loadImgs(1)[0]['width'])
# print(h)
# print(w)

In [None]:
def vid_path(x):
  return x[1]

In [None]:
def create_list_ID(coco_annotation, coco_test_annotation):
  list_ID = dict()
  vid_id = -1
  list_bbox = coco_annotation.getAnnIds()
  list_test_bbox = coco_test_annotation.getAnnIds()
  list_overall = list_bbox + list_test_bbox
  list_overall.sort()
  
  for i,bbox in enumerate(list_overall):
    try:
      bbox_info = coco_annotation.loadAnns([bbox])[0]
    except:
      bbox_info = coco_test_annotation.loadAnns([bbox])[0] 
    track_id = bbox_info['attributes']['track_id'] 
    try:
      img = coco_annotation.loadImgs([bbox_info['image_id']])[0]
    except:
      img = coco_test_annotation.loadImgs([bbox_info['image_id']])[0]
    vid_path, img_path = img["file_name"].split('/')
    if i == 0 or vid_path != old_vid_path: 
      vid_id += 1
      old_vid_path = vid_path
    label = vid_path + "_{}".format(track_id)
    if label in list_ID.keys():
      list_ID[label].append([bbox, img_path])
    else:
      list_ID[label] = list()
      list_ID[label].append([bbox, img_path])
  return list_ID

In [None]:
def create_train_list(list_ID, len_seq):
  train_list = []
  for items in list_ID.items():
    id_for_one_obj = items[1]
    len_items = len(id_for_one_obj) 
    if len_items >= len_seq:
      for i,id in enumerate(id_for_one_obj[:-len_seq - 1]):
          train_list.append(list(id_for_one_obj[i:i+len_seq]))
      train_list.append(list(id_for_one_obj[-len_seq:]))
  return train_list

In [None]:
def create_dataset(train_list, coco_annotation, coco_test_annotation, type_box = 'xyxy', transformer = False):
  datas = list()
  labels = list()
  for seq in train_list:
    bbox_list = list()
    for pre in seq[:-1]:
      try:
        bbox_list.append(coco_annotation.loadAnns(pre)[0])
      except:
        bbox_list.append(coco_test_annotation.loadAnns(pre)[0])
    bbox_cxcy = [scale(xywh2cxcy(attr['bbox'])) for attr in bbox_list]
    bbox_seq = [scale(xywh2xyxy(attr['bbox'])) for attr in bbox_list]
    if np.var(bbox_cxcy) > 1e-5 and not checknois(bbox_cxcy):
      # bbox_seq = [xywh2xyxy(attr['bbox']) for attr in bbox_list]
      try:
        label = coco_annotation.loadAnns([seq[-1]])[0]['bbox']
      except:
        label = coco_test_annotation.loadAnns([seq[-1]])[0]['bbox']
      if type_box == 'xyxy':
        datas.append(bbox_seq)
        if transformer:
          label = bbox_seq[1:] + [scale(xywh2xyxy(label))]
        else:
          label = scale(xywh2xyxy(label))
        labels.append(label)
      elif type_box == 'cxcy':
        datas.append(bbox_cxcy)
        labels.append(scale(xywh2cxcy(label)))
      
  return np.array(datas, dtype='float32'), np.array(labels, dtype='float32')

In [None]:
def dataloader(X, y, batch_size = 128):
  inputs = torch.tensor(X)
  labels = torch.tensor(y)

  data = TensorDataset(inputs, labels)
  sampler = SequentialSampler(data)
  dataloader = DataLoader(data, sampler=sampler,batch_size=batch_size)

  return dataloader

#Transformer


In [None]:
def d(tensor=None):
    """
    Returns a device string either for the best available device,
    or for the device corresponding to the argument
    :param tensor:
    :return:
    """
    if tensor is None:
        return 'cuda' if torch.cuda.is_available() else 'cpu'
    return 'cuda' if tensor.is_cuda else 'cpu'
class CTransformer(nn.Module):
    """
    Transformer for classifying sequences
    """

    def __init__(self, emb, heads, depth, seq_length, dropout=0.1):
        """
        :param emb: Embedding dimension
        :param heads: nr. of attention heads
        :param depth: Number of transformer blocks
        :param seq_length: Expected maximum sequence length
        :param num_tokens: Number of tokens (usually words) in the vocabulary
        :param num_classes: Number of classes.
        :param max_pool: If true, use global max pooling in the last layer. If false, use global
                         average pooling.
        """
        super().__init__()

        self.pos_embedding = nn.Embedding(embedding_dim=emb, num_embeddings=seq_length)

        self.unify_embeddings = nn.Linear(2 * emb, emb)
        self.encoderblock = nn.TransformerEncoderLayer(emb, heads)
        self.Encoder =nn.TransformerEncoder(self.encoderblock, depth, nn.LayerNorm(emb))


        self.fc = nn.Linear(emb, 4)

        self.do = nn.Dropout(dropout)

    def forward(self, x):
        """
        :param x: A batch by sequence length integer tensor of token indices.
        :return: predicted log-probability vectors for each token based on the preceding tokens.
        """
        b, t, e = x.size()

        positions = self.pos_embedding(torch.arange(t, device=d()))[None, :, :].expand(b, t, e)
        x = self.unify_embeddings(torch.cat((x, positions), dim=2).view(-1, 2 * e)).view(b, t, e)
        x = self.do(x)
        x = self.Encoder(x)
        x = x.transpose(1,2)
        x = F.avg_pool1d(x, x.size(2)).squeeze() # pool over the time dimension
        x = self.fc(x)

        return F.sigmoid(x)

In [None]:
import math
import warnings
def ciou_loss(pred, target, eps=1e-6, reduction='mean'):
    """`Implementation of paper `Enhancing Geometric Factors into
    Model Learning and Inference for Object Detection and Instance
    Segmentation <https://arxiv.org/abs/2005.03572>`_.

    Code is modified from https://github.com/Zzh-tju/CIoU.

    Args:
        pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
            shape (n, 4).
        target (Tensor): Corresponding gt bboxes, shape (n, 4).
        eps (float): Eps to avoid log(0).
    Return:
        Tensor: Loss tensor.
    """
    # overlap
    lt = torch.max(pred[:, :2], target[:, :2])
    rb = torch.min(pred[:, 2:], target[:, 2:])
    wh = (rb - lt).clamp(min=0)
    overlap = wh[:, 0] * wh[:, 1]

    # union
    ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
    ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
    union = ap + ag - overlap + eps

    ious = overlap / union

    # enclose area
    enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])
    enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])
    enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)

    cw = enclose_wh[:, 0]
    ch = enclose_wh[:, 1]

    c2 = cw**2 + ch**2 + eps

    b1_x1, b1_y1 = pred[:, 0], pred[:, 1]
    b1_x2, b1_y2 = pred[:, 2], pred[:, 3]
    b2_x1, b2_y1 = target[:, 0], target[:, 1]
    b2_x2, b2_y2 = target[:, 2], target[:, 3]

    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps

    left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
    right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
    rho2 = left + right

    factor = 4 / math.pi**2
    v = factor * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)

    with torch.no_grad():
        alpha = (ious > 0.5).float() * v / (1 - ious + v)

    # CIoU
    cious = ious - (rho2 / c2 + alpha * v)
    loss = 1 - cious.clamp(min=-1.0, max=1.0)
    if reduction == 'mean':
        loss = loss.mean()
    elif reduction == 'sum':
        loss = loss.sum()
    elif reduction == 'none':
        pass
    return loss


class CIoULoss(nn.Module):

    def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
        super(CIoULoss, self).__init__()
        self.eps = eps
        self.reduction = reduction
        self.loss_weight = loss_weight

    def forward(self,
                pred,
                target,
                weight=None,
                avg_factor=None,
                reduction_override=None,
                **kwargs):
        if weight is not None and not torch.any(weight > 0):
            if pred.dim() == weight.dim() + 1:
                weight = weight.unsqueeze(1)
            return (pred * weight).sum()  # 0
        assert reduction_override in (None, 'none', 'mean', 'sum')
        reduction = (
            reduction_override if reduction_override else self.reduction)
        if weight is not None and weight.dim() > 1:
            # TODO: remove this in the future
            # reduce the weight of shape (n, 4) to (n,) to match the
            # giou_loss of shape (n,)
            assert weight.shape == pred.shape
            weight = weight.mean(-1)
        loss = self.loss_weight * ciou_loss(
            pred,
            target,
            # weight,
            eps=self.eps,
            reduction=reduction,
            # avg_factor=avg_factor,
            # **kwargs
        )
        return loss



class LossMSE_YOLOv1(nn.Module):

  def __init__(self, reduction = 'mean'):
      super(LossMSE_YOLOv1, self).__init__()
      self.mse = nn.MSELoss(reduction = reduction)
  
  def forward(self, prediction, target):
    loss = self.mse(prediction[:,:2], target[:,:2]) + self.mse(torch.sqrt(prediction[:,2:4]),torch.sqrt(target[:,2:4]))
    return loss.float()

In [None]:
def train(model, X_train, y_train, X_val, y_val, lr, epochs, writer, path_save, loss_type = 'ciou', patience = 12, transformer = False):
  train_dataloader = dataloader(X_train, y_train)
  val_dataloader = dataloader(X_val, y_val)

  if loss_type == 'ciou':
    criterion = CIoULoss()
  elif loss_type == 'mse':
    criterion = LossMSE_YOLOv1()
  # metric = generalized_iou_loss
  opt = torch.optim.Adam(model.parameters(), lr=lr)
  sched = ReduceLROnPlateau(opt, threshold=1e-4, min_lr=1e-7, patience = patience)

  patience, trials = 25, 0
  train_losses, val_losses = [], []
  train_iou_losses, val_iou_losses = [], []
  val_loss_min = 100

  for epoch in range(epochs):
    print('======== Epoch {:} ========'.format(epoch + 1))

    total_loss = 0
    model.train()

    for step, batch in tqdm(enumerate(train_dataloader)):
      x_batch = batch[0].cuda()
      y_batch = batch[1].cuda()
      opt.zero_grad()
      out = model(x_batch)
      
      if transformer:
        num_fea = out.size(2)
        out = out.view(-1, num_fea)
        y_batch = y_batch.view(-1, num_fea)
      loss = criterion(out, y_batch)
      total_loss += loss.item()

      
      loss.backward()
      opt.step()

    train_loss = total_loss / len(train_dataloader)
    # train_iou_loss = giou_loss/ len(train_dataloader)

    model.eval()
    total_val_loss = 0
    val_iou_loss = 0

    with torch.no_grad():
      for batch in tqdm(val_dataloader):
        x_val = batch[0].cuda()
        y_val = batch[1].cuda()
        out = model(x_val)
        
        if transformer:
          num_fea = out.size(2)
          out = out.view(-1, num_fea)
          y_val = y_val.view(-1, num_fea)
        loss = criterion(out, y_val)
        total_val_loss += loss.item()
        # val_iou_loss += generalized_iou_loss(y_val, out)
      
      val_loss = total_val_loss/len(val_dataloader)
      # val_iou_loss = val_iou_loss/len(val_dataloader)
      sched.step(val_loss)

    if val_loss < val_loss_min:
        trials = 0
        torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
            }, path_save)
        val_loss_min = val_loss
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch + 1}')
            break
    
    # print('Epoch[{}/{}]: train_loss: {:.10f}, train_iou_loss: {:.3f}, val_loss:{:.10f}, val_giou_loss:{:3f}'.format(epoch, epochs, train_loss, train_iou_loss,
    #                                                                              val_loss , val_iou_loss, time.time() - start_time))
    print('Epoch[{}/{}]: train_loss: {:.10f}, val_loss:{:.10f}'.format(epoch, epochs, train_loss, val_loss))
    #add to tensorboard
    writer.add_scalars(f'loss traing', {
        'train': train_loss,
        'val': val_loss,
    }, epoch)
  return model

In [None]:
#tạo dataset
num_frames_input = 6
list_ID = create_list_ID(coco_annotation, coco_test_annotation)
for key in list_ID.keys():
  list_ID[key].sort(key=vid_path)
  b = [a[0] for a in list_ID[key]]
  list_ID[key] = b
train_list = create_train_list(list_ID, num_frames_input)
x_scale, y_scale = create_dataset(train_list, coco_annotation, coco_test_annotation)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_scale, y_scale, test_size=0.2, shuffle=True, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, shuffle = True, random_state=42)

In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./output/Transformer{}seq'.format(num_frames_input))
path_save = './output/Transformer.bestweight{}'.format(num_frames_input)
checkpoint = torch.load(path_save)

input_dim = 4 
hidden_dim = 256
output_dim = 4

lr = 0.0007
n_epochs = 100
model = CTransformer(input_dim, heads= 4, depth = 6, seq_length= 80, dropout = 0.1)
model = model.cuda()
model.load_state_dict(checkpoint['model_state_dict'])
model = model.cuda()

model = train(model, x_train, y_train, x_val, y_val,lr = lr, epochs = n_epochs, writer = writer, path_save = path_save, patience = 10, transformer = False)



0it [00:00, ?it/s]



  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[0/100]: train_loss: 0.7299819966, val_loss:0.6375883207


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[1/100]: train_loss: 0.7274466045, val_loss:0.6489398168


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[2/100]: train_loss: 0.7296283953, val_loss:0.6404769035


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[3/100]: train_loss: 0.7274629695, val_loss:0.6382736028


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[4/100]: train_loss: 0.7286755859, val_loss:0.6432364441


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[5/100]: train_loss: 0.7266891587, val_loss:0.6398571111


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[6/100]: train_loss: 0.7267554685, val_loss:0.6385781782


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[7/100]: train_loss: 0.7255381019, val_loss:0.6471919065


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[8/100]: train_loss: 0.7257197940, val_loss:0.6456220761


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[9/100]: train_loss: 0.7253074225, val_loss:0.6429522409


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[10/100]: train_loss: 0.7244675293, val_loss:0.6345072925


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[11/100]: train_loss: 0.7258821318, val_loss:0.6400638809


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[12/100]: train_loss: 0.7259089118, val_loss:0.6555740933


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[13/100]: train_loss: 0.7232994902, val_loss:0.6358063483


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[14/100]: train_loss: 0.7237830459, val_loss:0.6321289095


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[15/100]: train_loss: 0.7247412619, val_loss:0.6572629406


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[16/100]: train_loss: 0.7240878671, val_loss:0.6353930256


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[17/100]: train_loss: 0.7230229425, val_loss:0.6418944697


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[18/100]: train_loss: 0.7235863596, val_loss:0.6337491877


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[19/100]: train_loss: 0.7244065105, val_loss:0.6322061560


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[20/100]: train_loss: 0.7236588634, val_loss:0.6496510418


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[21/100]: train_loss: 0.7224310720, val_loss:0.6573202906


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[22/100]: train_loss: 0.7219587083, val_loss:0.6238108390


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[23/100]: train_loss: 0.7220702308, val_loss:0.6313495010


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[24/100]: train_loss: 0.7233153116, val_loss:0.6422689087


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[25/100]: train_loss: 0.7232780752, val_loss:0.6356802701


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[26/100]: train_loss: 0.7370968552, val_loss:0.6434452611


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[27/100]: train_loss: 0.7223669070, val_loss:0.6363919134


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[28/100]: train_loss: 0.7222259389, val_loss:0.6295193602


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[29/100]: train_loss: 0.7241977807, val_loss:0.6364056940


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[30/100]: train_loss: 0.7215941579, val_loss:0.6361237792


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[31/100]: train_loss: 0.7210891957, val_loss:0.6340715008


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[32/100]: train_loss: 0.7200449285, val_loss:0.6437142276


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[33/100]: train_loss: 0.7223637271, val_loss:0.6242899925


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[34/100]: train_loss: 0.7062065989, val_loss:0.6136165186


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[35/100]: train_loss: 0.7041708735, val_loss:0.6116989529


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[36/100]: train_loss: 0.7037676812, val_loss:0.6159051306


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[37/100]: train_loss: 0.7034853985, val_loss:0.6130165754


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[38/100]: train_loss: 0.7032116108, val_loss:0.6123728094


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[39/100]: train_loss: 0.7035020762, val_loss:0.6081595336


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[40/100]: train_loss: 0.7031252545, val_loss:0.6107960411


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[41/100]: train_loss: 0.7031821321, val_loss:0.6084371099


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[42/100]: train_loss: 0.7033069280, val_loss:0.6094890154


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[43/100]: train_loss: 0.7023958979, val_loss:0.6071217416


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[44/100]: train_loss: 0.7025881489, val_loss:0.6140388280


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[45/100]: train_loss: 0.7025686261, val_loss:0.6143254733


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[46/100]: train_loss: 0.7020173465, val_loss:0.6066126773


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[47/100]: train_loss: 0.7023169556, val_loss:0.6109762062


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[48/100]: train_loss: 0.7022361339, val_loss:0.6161088441


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[49/100]: train_loss: 0.7015147774, val_loss:0.6144280139


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[50/100]: train_loss: 0.7026160749, val_loss:0.6143861786


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[51/100]: train_loss: 0.7014782099, val_loss:0.6123998482


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[52/100]: train_loss: 0.7013844378, val_loss:0.6086371391


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[53/100]: train_loss: 0.7021894021, val_loss:0.6107828964


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[54/100]: train_loss: 0.7021200948, val_loss:0.6130096764


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[55/100]: train_loss: 0.7010954097, val_loss:0.6093976771


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[56/100]: train_loss: 0.7009249204, val_loss:0.6140111567


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[57/100]: train_loss: 0.7013599628, val_loss:0.6125621023


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[58/100]: train_loss: 0.7001816709, val_loss:0.6084233980


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[59/100]: train_loss: 0.6994330336, val_loss:0.6071707061


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[60/100]: train_loss: 0.6987576119, val_loss:0.6074974661


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[61/100]: train_loss: 0.6991311360, val_loss:0.6079405829


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[62/100]: train_loss: 0.6989541782, val_loss:0.6068457929


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[63/100]: train_loss: 0.6992568841, val_loss:0.6077686876


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[64/100]: train_loss: 0.6984914125, val_loss:0.6076628877


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[65/100]: train_loss: 0.6989605561, val_loss:0.6071855525


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[66/100]: train_loss: 0.6986427401, val_loss:0.6077948299


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[67/100]: train_loss: 0.6991559642, val_loss:0.6067738589


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[68/100]: train_loss: 0.6994046561, val_loss:0.6079103101


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[69/100]: train_loss: 0.6992823623, val_loss:0.6078143547


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Epoch[70/100]: train_loss: 0.7001910212, val_loss:0.6070805923


0it [00:00, ?it/s]

  0%|          | 0/318 [00:00<?, ?it/s]

Early stopping on epoch 72


In [None]:
encoder_layer =nn.TransformerEncoderLayer(d_model = 4, nhead = 4)
transfer_encode = nn.TransformerEncoder(encoder_layer, 6, norm = nn.LayerNorm(4))
transfer_encode = transfer_encode.cuda()
transfer_encode(torch.as_tensor(x_train[:10]).cuda()).size()


In [None]:
model.eval()
with torch.no_grad():
  pred = model(torch.as_tensor(x_test).cuda())
  y_true = torch.as_tensor(y_test).cuda()
  loss = nn.MSELoss()
  print('mseLoss = {}'.format(loss(batch_xyxy2cxcy(pred), batch_xyxy2cxcy(y_true))))
  print('IOU = {}'.format(generalized_iou(y_true, pred)))

In [None]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(transfer_encode)