### **Real Time Tracking**

Based in the research papers listed below:
1. https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=784637&tag=1
2. https://www.sciencedirect.com/science/article/abs/pii/S1574013718303101?casa_token=i9zjlPDZ3f8AAAAA:nzHOeOqRaRKApweuMYSKDrhepsn8YmcsQwYcpweVGzVNiBPWxZ6opSYPadqOj6WW83ZBtD6z
3. https://hal.archives-ouvertes.fr/hal-00338206/en/

Algorithm implemented on stock free video from https://mixkit.co/free-stock-video/highway-traffic-seen-through-drone-611/

Necessary imports

In [5]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
from scipy import stats as st

Taking note of the params of the video for frame extraction

In [6]:
videoCapture = cv2.VideoCapture(r"final-2.mpg") #Opens a video file in our case for video capturing
videoCapture.set(cv2.CAP_PROP_FRAME_HEIGHT, 120) #Sets height of the frame in the video 
videoCapture.set(cv2.CAP_PROP_FRAME_WIDTH, 176) #Sets width of the frame in the video 
fps  = videoCapture.get(cv2.CAP_PROP_FPS) #returns the frame rate of the video
size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)));
print("\nfps =", fps) #fps = frames per second
print("\nSize =",size) #Size of the video
success, frame = videoCapture.read() #returns the next input frame


fps = 25.0

Size = (162, 288)


Frame Extraction

In [7]:
# Create the frames from the given video
num_frames = 0
while success:
    cv2.imwrite(r"frames/frame%d.jpg" % num_frames, frame) # Saving the frames with num_frames
    num_frames += 1    
    success, frame = videoCapture.read()
print("Total number of frames extracted from video = ", num_frames)

Total number of frames extracted from video =  50


Copying frames for foreground and background

In [8]:
frame_arr_fg = np.zeros((num_frames, size[0], size[1]), dtype = int) # Numpy ndarray for foreground
frame_arr_bg = np.zeros((num_frames, size[0], size[1]), dtype = float) # Numpy ndarray for background
frame_no = 0
while frame_no < num_frames:
    curr_frame = cv2.imread(r"frames/frame{}.jpg".format(frame_no), cv2.IMREAD_GRAYSCALE) #read from frame
    frame_arr_fg[frame_no] = np.copy(curr_frame)
    frame_arr_bg[frame_no] = np.copy(curr_frame)
    frame_no+=1 
print(frame_no) #sanity check

50


Model Hyperparameters

In [9]:
r_result = 255*(np.ones((num_frames, size[0], size[1]), dtype = int))  
threshold = 0.70
num_gaussian = 5 # number of Gaussians for each pixel
alpha = 0.2 #Learning rate for updation

The Main Algorithm - We are iterating through each frame pixel wise i.e. if there are 50 frames and the number of pixels are 162 cross 288, then it will parse through all the fifty frames 162x288 times each frame. Then we initialize all the params of the gaussians in a 5 cross the number of gaussians defined per pixel. We store the mean, variance, distance and the weight arrays as per the research paper. We match the gaussians of those frame pixels who lie within +-2.5 sigma of the gaussians representing the pixel. If there is no such gaussian, then we replace the least weighted gaussian and add this gaussian to the pixel representation. After this process we change the values of the params as per the research paper. In the end we form the frames from the array and convert them to a video to look at the background and foreground extraction.

In [10]:
for i in range(0,size[0]):
    print(i)
    
    for j in range(0,size[1]): # For each pixel(i, j), prepare a GMM with all frames(50) here	   
        fr_no = frame_arr_fg[0:num_frames, i, j]
        params = np.zeros((num_gaussian, 5), dtype = float) # Store the mean, variance, distance and weight array for each pixel
        params[0,0] = np.mean(fr_no) # mean
        params[0,1] = np.var(fr_no) # variance
        params[0,2] = 2.5 * np.sqrt(params[0,1]) # +-2.5 sigma range
        params[0,3] = 1 # weight
        params[0,4] = params[0,3] / np.sqrt(params[0,1]) # distance 
        rho = 0 # second-learning rate
        frame = 0		
        for frame in range(0,num_frames-1):
            temp_frame = frame_arr_fg[frame, i, j] # pixel i,j in num_frames = frame
            for gaussian in range(0, num_gaussian):        
                if ((params[gaussian,3] == 0) or (gaussian == num_gaussian - 1)): 
                    params[gaussian,0] = temp_frame
                    params[gaussian,1] =  params[gaussian,1] * 10
                    params[gaussian,2] = 2.5 * np.sqrt(params[gaussian,1])
                    params[gaussian,3] = params[gaussian,3] / 10
                    params[gaussian,4] = params[gaussian,3] / np.sqrt(params[gaussian,1])
                    
                if ((temp_frame < params[gaussian,0] + params[gaussian,2]) and (temp_frame > params[gaussian,0] - params[gaussian,2])): # checking whether the new frame pixel lies within 2.5 quantile range i.e. within 2.5 sigma or not
                    sd = st.norm.pdf(params[gaussian,0], temp_frame, np.sqrt(params[gaussian,1])) # new gaussian with mean and standard deviation
                    rho = alpha * sd # second-learning rate
                    #updation of parameters on matching of gaussian
                    params[gaussian,0] = (1 - rho) * params[gaussian,0] + rho * (temp_frame)
                    params[gaussian,1] =  (1 - rho) * params[0,1] + rho * (temp_frame - params[gaussian,0]) ** 2
                    params[gaussian,2] = 2.5 * np.sqrt(params[gaussian,1])
                    params[gaussian,3] += alpha 
                    params[gaussian,4] = params[gaussian,3] / np.sqrt(params[gaussian,1]) 
                              
            for gaussian in range(0, num_gaussian): # updating the weights 
                params[gaussian,3] = (1 - alpha) * params[gaussian,3]
                  
            newarray = sorted(params, key=lambda x:x[4]) # sorting the params based on weight
            params = np.vstack(newarray)
            sum = 0.0
            max_gaussian = 0
        
            for gaussian in range(0, num_gaussian): # verify if it is foreground or background
                sum = sum + params[gaussian,3]
                if (sum > threshold):
                    max_gaussian = gaussian
                    break
            
            # verify if it is foreground or background
            sum = 0
            for gaussian in range(max_gaussian,num_gaussian):
                if ((temp_frame < params[gaussian,0] + params[gaussian,2]) and
                    (temp_frame > params[gaussian,0] - params[gaussian,2])):
                    r_result[frame][i][j] = 0
                    
                sum = params[gaussian, 0] * params[gaussian, 4]
                break
            frame_arr_bg[frame][i][j] = sum

0


  params[gaussian,4] = params[gaussian,3] / np.sqrt(params[gaussian,1])
  params[gaussian,2] = 2.5 * np.sqrt(params[gaussian,1])
  params[gaussian,4] = params[gaussian,3] / np.sqrt(params[gaussian,1])


1


  params[0,4] = params[0,3] / np.sqrt(params[0,1]) # distance
  sum = params[gaussian, 0] * params[gaussian, 4]


2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161


Recreate the frames from the resultant matrices

In [11]:
for index in range(r_result.shape[0]):
    cv2.imwrite("final/new_file"+str(index)+".jpeg",r_result[index])
    
for index in range(frame_arr_bg.shape[0]):
    cv2.imwrite("final/new_frame"+str(index)+".jpeg",frame_arr_bg[index])

Forming the video for foreground and background respectively. The moving cars are the foreground since they are the ones which account for the maximum variance which in turn leads to matching distributions and this helps to detect the moving object since its gaussians change faster over time. The background is relatively simple since not much changes so variance is less so it remains there. This accounts for the Real-Time aspect of the algorithm. 

In [12]:
fps = 20

frame_array = [] # storing the frames produced

for filename in range(0,50):
    img = cv2.imread(r"final/new_file{}.jpeg".format(filename)) #reading the frame
    height, width, layers = img.shape
    size = (width,height)
    frame_array.append(img) #appending the frames one after the other
bg = cv2.VideoWriter("final/fg.avi",cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
for i in range(len(frame_array)):
    bg.write(frame_array[i]) #adding the frames for video
        
frame_array = [] # storing the frames produced

for filename in range(0,50):
    img = cv2.imread(r"final/new_frame{}.jpeg".format(filename)) #reading each frames
    
    height, width, layers = img.shape
    size = (width,height)
    frame_array.append(img) #appending the frames one after the other
fg = cv2.VideoWriter("final/bg.avi",cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
for i in range(len(frame_array)):
    fg.write(frame_array[i]) #adding the frames for video
    
bg.release()                  
fg.release()