In [0]:
import zipfile
from google.colab import drive
drive.mount('/content/drive/')

In [0]:
import zipfile
zip_ref = zipfile.ZipFile("/content/drive/My Drive/retinanet.zip", 'r')
zip_ref.extractall()
zip_ref.close()
#path of train and val annotation
ref = zipfile.ZipFile("/content/drive/My Drive/data.zip", 'r')
ref.extractall()
ref.close()

In [0]:
#download dataset
!wget http://detrac-db.rit.albany.edu/Data/DETRAC-train-data.zip

In [0]:
zip_ref = zipfile.ZipFile("DETRAC-train-data.zip", 'r')
zip_ref.extractall()
zip_ref.close()

In [0]:
import argparse
import collections

import numpy as np
import warnings
warnings.simplefilter("ignore", UserWarning)
import torch
import torch.optim as optim
from torchvision import transforms

from retinanet import model
from retinanet.dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, \
    Normalizer
from torch.utils.data import DataLoader

from retinanet import coco_eval
from retinanet import csv_eval

assert torch.__version__.split('.')[0] == '1'

print('CUDA available: {}'.format(torch.cuda.is_available()))


csv_train = "./data/retina_train.csv"
csv_classes = "./data/class.csv"
csv_val = "./data/retina_test.csv"
depth = 50
batch_size = 4
def main(args=None):
  
    if csv_train is None:
        raise ValueError('Must provide --csv_train when training on COCO,')

    if csv_classes is None:
        raise ValueError('Must provide --csv_classes when training on COCO,')

    dataset_train = CSVDataset(train_file=csv_train, class_list=csv_classes,
                                transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

    if csv_val is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes,
                                  transform=transforms.Compose([Normalizer(), Resizer()]))

  
    
    sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=batch_size, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    #---
    start_iter = 99000
    path = "/content/drive/My Drive/retina_checkpoints/retina_ckpt_%d.pt" % start_iter
    load_checkpoints = torch.load(path)
    

    retinanet = torch.nn.DataParallel(retinanet).cuda()
    retinanet.load_state_dict( load_checkpoints['model'])

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    #--
    optimizer.load_state_dict(load_checkpoints['optimizer_state_dict'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()
    total_iter = 100000

    print('Num training images: {}'.format(len(dataset_train)))
    log_path = "/content/drive/My Drive/retina_log_%d.txt"% (start_iter+1000)
  
    train_iterator = iter(dataloader_train)
    
    epoch_loss = []
    num_loss = 0
    all_loss = 0
    message_s= ""
    total_loss_print = 0
    total_loss_print_num=0
    
    for iter_num in range(start_iter+1, total_iter):

        retinanet.train()
        retinanet.module.freeze_bn()
        try:
            data = next(train_iterator)
        except StopIteration:
            train_iterator = iter(dataloader_train)
            data = next(train_iterator)

        optimizer.zero_grad()

        classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])

        classification_loss = classification_loss.mean()
        regression_loss = regression_loss.mean()

        loss = classification_loss + regression_loss

        if bool(loss == 0):
            continue
        all_loss += loss
        num_loss+=1
        loss.backward()
        torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
        optimizer.step()
        loss_hist.append(float(loss))
        epoch_loss.append(float(loss))
        log = 'Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                  iter_num, float(classification_loss), float(regression_loss), float(loss))
        
        message_s += log+"\n"
      
        del classification_loss
        del regression_loss
    
        if iter_num % 1000 == 0:

          print("iter_num: ",iter_num, " ",all_loss/num_loss)
          all_loss = 0
          num_loss = 0

        if iter_num % 4000== 0:
          print("saving...")
          saving_file = {
            'model': retinanet.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            
            }
          torch.save(saving_file, f"/content/drive/My Drive/retina_checkpoints/retina_ckpt_%d.pt" % iter_num) 
          
          torch.save(retinanet.module, f"/content/drive/My Drive/retina_checkpoints/retina_%d.pt" % iter_num)
          th_file = open("/content/drive/My Drive/retina_message%d.txt" % iter_num,"w")
          th_file.write(message_s)
          th_file.close()
          message_s = ""



if __name__ == '__main__':
    main()

In [0]:
#eval script
from __future__ import print_function

import numpy as np
import json
import os

import torch

import argparse
import collections
import zipfile
import numpy as np
import warnings
warnings.simplefilter("ignore", UserWarning)
import torch
import torch.optim as optim
from torchvision import transforms

from retinanet import model
from retinanet.dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, \
    Normalizer
from torch.utils.data import DataLoader

from retinanet import coco_eval
from retinanet import csv_eval

assert torch.__version__.split('.')[0] == '1'

print('CUDA available: {}'.format(torch.cuda.is_available()))

dataset = "csv"
csv_train = "./data/retina_train.csv"
csv_classes = "./data/class.csv"
csv_val = "./data/retina_test.csv"
depth = 50

def main(args=None):

    dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes,
                                     transform=transforms.Compose([Normalizer(), Resizer()]))

    if depth == 50:
        retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)
    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    path = "/content/drive/My Drive/retina_checkpoints/retina_ckpt_%d.pt" % 99000
    load_checkpoints = torch.load(path)

    retinanet = torch.nn.DataParallel(retinanet).cuda()
    retinanet.load_state_dict(load_checkpoints['model'])
    retinanet.eval()

    mAP,r,p,f1 = csv_eval.evaluate(dataset_val, retinanet)

    print("val")
    print("map: ", mAP)
    print("recall: ",r)
    print("precision: ",p)
    print("f1_score: ",f1)

if __name__ == '__main__':
    main()

In [0]:
#detect test image

import numpy as np
import torchvision
import time
import os
import copy
import pdb
import time
import argparse
import skimage.io
import skimage.transform
import skimage.color
import skimage

from PIL import Image
import sys
import cv2

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms

from retinanet.dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, \
	UnNormalizer, Normalizer

import zipfile


from torch.autograd import Variable
assert torch.__version__.split('.')[0] == '1'
from google.colab.patches import cv2_imshow
print('CUDA available: {}'.format(torch.cuda.is_available()))
import time

def main(args=None):
	parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')


	path = "/content/drive/My Drive/retina_checkpoints/retina_%d.pt" % 99000
	retinanet = torch.load(path)

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	retinanet.eval()

	unnormalize = UnNormalizer()

	def draw_caption(image, box, caption):

		b = np.array(box).astype(int)
		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)


	im = skimage.io.imread("./Insight-MVT_Annotation_Train/MVI_20061/img00300.jpg")
 
	main_im=  cv2.imread("./Insight-MVT_Annotation_Train/MVI_20061/img00300.jpg",1)
 
	if len(im.shape) == 2:
		im = skimage.color.gray2rgb(im)
	im = im.astype(np.float32)/255.0
	im = torch.from_numpy(im)
	im = im.permute(2, 0, 1)
	im = Variable(im.unsqueeze(0))
	print(im.shape)

	with torch.no_grad():
		st = time.time()
	
		scores, classification, transformed_anchors = retinanet(im.cuda().float())	
		print("diff time ",time.time() - st)
		idxs = np.where(scores.cpu()>0.5)
		img = np.array(255 * unnormalize(im[0, :, :, :])).copy()

		img[img<0] = 0
		img[img>255] = 255

		img = np.transpose(img, (1, 2, 0))

		img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

		for j in range(idxs[0].shape[0]):
			bbox = transformed_anchors[idxs[0][j], :]
			x1 = int(bbox[0])
			y1 = int(bbox[1])
			x2 = int(bbox[2])
			y2 = int(bbox[3])
		

			cv2.rectangle(main_im, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
			

		cv2.imwrite(str(1)+".jpg",main_im)
		



if __name__ == '__main__':

 main()

CUDA available: True
torch.Size([1, 3, 540, 960])
diff time  0.10545849800109863


In [0]:
!pip install filterpy
from sort import *


In [0]:
#tracking script

import numpy as np
import torchvision
import time
import os
import copy
import pdb
import time
import argparse
from sort import *
import sys
import cv2
from skimage import io
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms

from retinanet.dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, \
	UnNormalizer, Normalizer

import zipfile

from google.colab.patches import cv2_imshow
print('CUDA available: {}'.format(torch.cuda.is_available()))


VIDEO_SAVE_DIR="traffic_result"
if(os.path.exists(VIDEO_SAVE_DIR)):
  !rm -r traffic_result
  os.mkdir(VIDEO_SAVE_DIR)
else:
  os.mkdir(VIDEO_SAVE_DIR)

path = "/content/drive/My Drive/retina_checkpoints/retina_%d.pt" % 99000

retinanet = torch.load(path)

retinanet = retinanet.cuda()
retinanet.eval()

unnormalize = UnNormalizer()

mot_tracker= Sort()

from skimage import io

import skimage
frames = []
frame_count=0
mot_tracker = Sort()
capture = cv2.VideoCapture("/content/drive/My Drive/b_traffic.mp4")

frame_path = []
for name in os.listdir("/content/Insight-MVT_Annotation_Train/MVI_40131"):
  frame_path.append("/content/Insight-MVT_Annotation_Train/MVI_40131/"+name)

frame_path = sorted(frame_path)
for path in frame_path:
    frame = cv2.imread(path)
    # ret, frame = capture.read()
    # if not ret:
    #     break 
    #frame = frame.astype(np.float32)  
    #frame = cv2.imread("11.jpg")
    frame = cv2.resize(frame,(1056, 608))  
    frame2 = frame.copy()
    img = frame.copy()
    img = cv2.resize(img,(1056, 608))  
    
    img = img.astype(np.float32)/255.0
    img = torch.from_numpy(img.copy()).float()
    img = img.permute(2, 0, 1)
    img = img.unsqueeze(0)

    height, width = frame.shape[:2]
    
    
    # Bail out when the video file ends
           
    # Save each frame of the video to a list
    scores, classification, transformed_anchors = retinanet(img.cuda().float())	
    boxes = transformed_anchors.cpu().detach().numpy()
    scores_ = scores.cpu().detach().numpy()
    s = scores_.reshape(scores_.shape[0],1)
    result = np.concatenate((boxes,s), axis=1)
    

    frame_count += 1

    if result.size != 0:
      #trackers_objects  = mot_tracker.update(detections)
      trackers = mot_tracker.update(result)

      for d in trackers:
            #print("d",d)
            xmin=int(d[0])
            ymin=int(d[1])
            xmax=int(d[2])
            ymax=int(d[3])
            label=int(d[4])
            cv2.rectangle(frame2,(xmin,ymin),(xmax,ymax),(0,0,255),2)
            cv2.putText(frame2, str(label), (int(xmin), int(ymin)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
      

    if(frame_count % 100==0):
      print(frame_count)
  
   
    name = '{}.jpg'.format(frame_count)
    name = os.path.join(VIDEO_SAVE_DIR, name)
    cv2.imwrite(name, frame2)
  


#convert tracked image to video

import glob
import os
import cv2
video = cv2.VideoCapture("/content/drive/My Drive/b_traffic.mp4");

# Find OpenCV version
(major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')

if int(major_ver)  < 3 :
    fps = video.get(cv2.cv.CV_CAP_PROP_FPS)
    print("Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps))
else :
    fps = video.get(cv2.CAP_PROP_FPS)
    print("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
fps=25
video.release();

def make_video(outvid, images=None, fps=30, size=None,
               is_color=True, format="FMP4"):
  
    from cv2 import VideoWriter, VideoWriter_fourcc, imread, resize
    fourcc = VideoWriter_fourcc(*format)
    vid = None
    for image in images:
        #print(image)
        if not os.path.exists(image):
            raise FileNotFoundError(image)
        img = imread(image)
        if vid is None:
            if size is None:
                size = img.shape[1], img.shape[0]
            vid = VideoWriter(outvid, fourcc, float(fps), size, is_color)
        if size[0] != img.shape[1] and size[1] != img.shape[0]:
            img = resize(img, size)
        vid.write(img)
    vid.release()
    return vid



images = list(glob.iglob(os.path.join("traffic_result", '*.*')))
# Sort the images by name index.
images = sorted(images, key=lambda x: float(os.path.split(x)[1][:-3]))  

outvid =  "/content/drive/My Drive/retina_out.mp4"
make_video(outvid, images, fps)
	 	

				

In [0]:
#-------------------------CALCULATE ACCURACY TRACKING-------------------------------

import numpy as np
import torchvision
import time
import os
import copy
import pdb
import time
import argparse

import sys
import cv2
from skimage import io
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
from sort import *

import zipfile

from google.colab.patches import cv2_imshow
print('CUDA available: {}'.format(torch.cuda.is_available()))


path = "/content/drive/My Drive/retina_checkpoints/retina_%d.pt" % 99000

retinanet = torch.load(path)

retinanet = retinanet.cuda()
retinanet.eval()

mot_tracker= Sort()

from skimage import io
import skimage
from torch.autograd import Variable

frames = []
frame_count=0
mot_tracker = Sort()

the_file = open('test_tracking.txt')
list_of_test_img = []

for line in the_file:
  list_of_test_img.append(line.strip())


list_of_test_img = sorted(list_of_test_img)

all_ = []
path = "/content/Insight-MVT_Annotation_Train/"
s_t=0

for id,filename in enumerate(list_of_test_img):

    
    frame = skimage.io.imread(path+"/"+str(filename))
 
    if len(frame.shape) == 2:
      frame = skimage.color.gray2rgb(frame)

    frame = frame.astype(np.float32)/255.0
    frame = torch.from_numpy(frame)
    frame = frame.permute(2, 0, 1)
    frame = Variable(frame.unsqueeze(0))

    height, width = frame.shape[:2]
    
    
    # Bail out when the video file ends
           
    # Save each frame of the video to a list
    scores, classification, transformed_anchors = retinanet(frame.cuda().float())	
    boxes = transformed_anchors.cpu().detach().numpy()
    scores_ = scores.cpu().detach().numpy()
    s = scores_.reshape(scores_.shape[0],1)
    result = np.concatenate((boxes,s), axis=1)
    # print(classification)


    idxs = np.where(scores.cpu()>0.5)
    result_ = np.zeros((len(idxs[0]),5))

    for j in range(len(idxs[0])):
      result_[j] = result[idxs[0][j], :]

    #print(result_)

    frame_count += 1

    if result.size != 0:
      #trackers_objects  = mot_tracker.update(detections)
      trackers = mot_tracker.update(result_)
      s_t += trackers.shape[0]
      for d in trackers:
            #print("d",d)
            xmin=int(d[0])
            ymin=int(d[1])
            xmax=int(d[2])
            ymax=int(d[3])
            label=int(d[4])
            
        # cv2.waitKey(1)

            all_.append([filename,label,xmin,ymin,xmax,ymax])
      

    if(frame_count % 100==0):
      print(frame_count,s_t)
  
  
import pandas as pd

df = pd.DataFrame(all_, columns=["name","label","x1","y1",'x2','y2'])
df.to_csv('retina_track_out.csv', index=False) 
  