<a href="https://colab.research.google.com/github/Jeremy26/optical_flow_course/blob/main/RAFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RAFT

Let's run a RAFT optical flow algorithm
<p>


In [None]:
!git clone https://github.com/princeton-vl/RAFT.git

In [None]:
!ls RAFT

In [None]:
!./RAFT/download_models.sh

In [None]:
!wget https://thinkautonomous-raft.s3.eu-west-3.amazonaws.com/raft_data.zip && unzip xf raft_data.zip && rm raft_data.zip

In [None]:
!mv raft_data/raft.py RAFT/core/raft.py 

In [None]:
!mv raft_data/update.py RAFT/core/update.py

# Run RAFT on 2 images

In [None]:
import sys
sys.path.append('RAFT/core')
from raft import RAFT
from utils import flow_viz
from utils.utils import InputPadder
from collections import OrderedDict
import os
import cv2
import numpy as np
import torch
import matplotlib.pyplot as plt
from utils import flow_viz

In [None]:
def bgr2rgb(img):
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
def frame_preprocess(frame, device):
    frame = torch.from_numpy(frame).permute(2, 0, 1).float()
    frame = frame.unsqueeze(0)
    frame = frame.to(device)
    return frame

In [None]:
def get_cpu_model(model):
    new_model = OrderedDict()
    # get all layer's names from model
    for name in model:
        # create new name and update new model
        new_name = name[7:]
        new_model[new_name] = model[name]
    return new_model

In [None]:
def load_model(weights_path):
    model = RAFT()
    pretrained_weights = torch.load(weights_path, map_location=torch.device("cpu"))
    if torch.cuda.is_available():
        device = "cuda"
        # parallel between available GPUs
        model = torch.nn.DataParallel(model)
        # load the pretrained weights into model
        model.load_state_dict(pretrained_weights)
        model.to(device)
    else:
        device = "cpu"
        # change key names for CPU runtime
        pretrained_weights = get_cpu_model(pretrained_weights)
        # load the pretrained weights into model
        model.load_state_dict(pretrained_weights)
    return model

In [None]:
def inference_imgs(model, frame_1, frame_2):
    
    # change model's mode to evaluation
    model.eval()
    device="cuda" if torch.cuda.is_available() else "cpu"

    with torch.no_grad():
        # Read images
        frame_1 = frame_preprocess(frame_1, device)
        frame_2 = frame_preprocess(frame_2, device)
        # preprocessing
        padder = InputPadder(frame_1.shape, mode="kitti")
        frame_1, frame_2 = padder.pad(frame_1, frame_2)

        # predict the flow
        flow_low, flow_up = model(frame_1, frame_2, iters=12, test_mode=True)

        # transform to image
        flo = flow_up[0].permute(1,2,0).cpu().numpy()
        flo = flow_viz.flow_to_image(flo)
    return flow_up, flo

In [None]:
img_1 = cv2.imread("raft_data/0000000148.png")
img_2 = cv2.imread("raft_data/0000000149.png")

model = load_model("models/raft-kitti.pth")

flow_up, flo = inference_imgs(model, img_1, img_2)

f, (ax0, ax1) = plt.subplots(1,2, figsize=(20,10))
ax0.imshow(bgr2rgb(img_1))
ax1.imshow(flo)
plt.show()

In [None]:
solved_flow = cv2.resize(flo, (1242,375))
added_image = cv2.addWeighted(solved_flow,0.9,bgr2rgb(img_1),0.1,0)

In [None]:
#concatenated = cv2.vconcat([added_image, solved_flow])

plt.figure(figsize = (30,30))
plt.imshow(added_image,  interpolation='nearest')
plt.show()

In [None]:
plt.figure(figsize = (20,40))
plt.imshow(added_image,  interpolation='nearest')
plt.show

# Understand the Output

In [None]:
print(flow_up.shape)
print(flow_up[0][0])
print(flow_up[0][0].shape)

# Run an Object Detection algorithm to identify individual objects

In [None]:
!python3 -m pip install yolov4==2.0.2 # After Checking, YOLO 2.0.2 works without modifying anything. Otherwise keep 1.2.1
from yolov4.tf import YOLOv4
import tensorflow as tf
import time

In [None]:
yolo = YOLOv4(tiny=True)
yolo.classes = "raft_data/coco.names"
yolo.make_model()
yolo.load_weights("raft_data/yolov4-tiny.weights", weights_type="yolo")

def run_obstacle_detection(img):
    start_time=time.time()
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    resized_image = yolo.resize_image(img)
    # 0 ~ 255 to 0.0 ~ 1.0
    resized_image = resized_image / 255.
    #input_data == Dim(1, input_size, input_size, channels)
    input_data = resized_image[np.newaxis, ...].astype(np.float32)

    candidates = yolo.model.predict(input_data)

    _candidates = []
    result = img.copy()
    for candidate in candidates:
        batch_size = candidate.shape[0]
        grid_size = candidate.shape[1]
        _candidates.append(tf.reshape(candidate, shape=(1, grid_size * grid_size * 3, -1)))
        #candidates == Dim(batch, candidates, (bbox))
        candidates = np.concatenate(_candidates, axis=1)
        #pred_bboxes == Dim(candidates, (x, y, w, h, class_id, prob))
        pred_bboxes = yolo.candidates_to_pred_bboxes(candidates[0], iou_threshold=0.35, score_threshold=0.40)
        pred_bboxes = pred_bboxes[~(pred_bboxes==0).all(1)] #https://stackoverflow.com/questions/35673095/python-how-to-eliminate-all-the-zero-rows-from-a-matrix-in-numpy?lq=1
        pred_bboxes = yolo.fit_pred_bboxes_to_original(pred_bboxes, img.shape)
        exec_time = time.time() - start_time
        #print("time: {:.2f} ms".format(exec_time * 1000))
        result = yolo.draw_bboxes(img, pred_bboxes)
    return result, pred_bboxes

In [None]:
result, pred_bboxes = run_obstacle_detection(bgr2rgb(img_1))

plt.imshow(result)
plt.show()

# Evaluate the Motion of each obstacle through time

In [None]:
def add_arrow_to_box(result, pred_bboxes, fl_vectors):
    h, w, _ = result.shape
    image_arr = []
    
    for box in pred_bboxes:
        center_x = int(box[0]*w)
        center_y = int(box[1]*h)
        width_box_2 = int(box[2]*w/2)
        height_box_2 = int(box[3]*h/2)

        box_x1 = int(center_x - width_box_2)
        box_y1 = int(center_y - height_box_2)
        box_x2 = int(center_x + width_box_2)
        box_y2 = int(center_y + height_box_2)

        flows_u = fl_vectors[0][0][box_y1:box_y2,box_x1:box_x2]
        mean_u = flows_u.mean()
        flows_v = fl_vectors[0][1][box_y1:box_y2,box_x1:box_x2]
        mean_v  =flows_v.mean()
        image_arr = cv2.arrowedLine(result, (center_x,center_y), (center_x + int(mean_u)*2,center_y+int(mean_v)*2), (255,0,0), 13)
    return image_arr

image_arr = add_arrow_to_box(result, pred_bboxes, flow_up)
plt.imshow(image_arr)
plt.show()

In [None]:
f, (ax0, ax1)= plt.subplots(1, 2, figsize=(20,10))
ax0.imshow(image_arr)
ax1.imshow(flo)
plt.show()

In [None]:
print(flo.shape)
print(image_arr.shape)

# Run on a Video 🙌🏼

In [None]:
def inference_video(video_path):
    model = load_model("models/raft-kitti.pth")
    # change model's mode to evaluation
    model.eval()
    # capture the video and get the first frame
    cap = cv2.VideoCapture(video_path)
    ret, cap1 = cap.read()
    video_frames_arrow = []
    video_frames_flow = []

    with torch.no_grad():
        while True:
            # read the next frame
            ret, cap2 = cap.read()
            if not ret:
                break
            # Predict the Flow
            flow_up, flo = inference_imgs(model, cap1.copy(), cap2.copy())
            # Run obstacle Detection
            result, pred_bboxes = run_obstacle_detection(bgr2rgb(cap2))
            # Add Motion Prediction
            image_arr = bgr2rg(add_arrow_to_box(result, pred_bboxes, flow_up))
            video_frames_arrow.append(image_arr)
            video_frames_flow.append(flo)
            # mode forward one frame
            cap1 = cap2
    return video_frames_arrow, video_frames_flow

In [None]:
video_frames_arrow, video_frames_flow = inference_video("raft_data/kitti_3.mp4")

In [None]:
out = cv2.VideoWriter("output_flow.mp4",cv2.VideoWriter_fourcc(*'mp4v'), 15.0, (video_frames_flow[0].shape[1] ,video_frames_flow[0].shape[0]))
for i in range(len(video_frames_flow)):
    out.write(video_frames_flow[i].astype(np.uint8))
out.release()

In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('output_flow.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=800 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
out = cv2.VideoWriter("output_arrow.mp4",cv2.VideoWriter_fourcc(*'mp4v'), 15.0, (video_frames_arrow[0].shape[1] ,video_frames_arrow[0].shape[0]))
for i in range(len(video_frames_arrow)):
    out.write(video_frames_arrow[i].astype(np.uint8))
out.release()

In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('output_arrow.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=800 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

# Legacy

In [None]:
def inference():
    # Outputs to return
    result_flows_vectors = []
    result_flows_images = []
    images = []

    # Get the RAFT model
    model = RAFT()

    # Load pretrained weights
    pretrained_weights = torch.load("models/raft-kitti.pth", map_location=torch.device("cpu"))

    if torch.cuda.is_available():
        device = "cuda"
        # parallel between available GPUs
        model = torch.nn.DataParallel(model)
        # load the pretrained weights into model
        model.load_state_dict(pretrained_weights)
        model.to(device)
    else:
        device = "cpu"
        # change key names for CPU runtime
        pretrained_weights = get_cpu_model(pretrained_weights)
        # load the pretrained weights into model
        model.load_state_dict(pretrained_weights)

    # change model's mode to evaluation
    model.eval()

    video_path = "kitti_3.mp4"

    # capture the video and get the first frame
    cap = cv2.VideoCapture(video_path)
    ret, frame_1 = cap.read()

    # Save the image
    images.append(cv2.cvtColor(frame_1, cv2.COLOR_BGR2RGB))

    # frame preprocessing
    frame_1 = frame_preprocess(frame_1, device)

    counter = 0
    with torch.no_grad():
        while True:
            # read the next frame
            ret, frame_2_b = cap.read()
            if not ret:
                break
            # save the image
            images.append(cv2.cvtColor(frame_2_b, cv2.COLOR_BGR2RGB))
            # preprocessing
            frame_2_b = frame_preprocess(frame_2_b, device)
            padder = InputPadder(frame_1.shape, mode="kitti")
            frame_1, frame_2 = padder.pad(frame_1, frame_2_b)
            # predict the flow
            flow_low, flow_up = model(frame_1, frame_2, iters=12, test_mode=True)
            # save the flow
            result_flows_vectors.append(flow_up.cpu().detach().numpy())
            # transform to image
            flo = flow_up[0].permute(1,2,0).cpu().numpy()
            flo = flow_viz.flow_to_image(flo)
            # save the image
            result_flows_images.append(flo)
            # mode forward one frame
            frame_1 = frame_2_b    
    return result_flows_vectors, result_flows_images, images

In [None]:
fl_vectors, fl_images, images = inference()

In [None]:
final_vid = []

for idx, img in enumerate(images):
    if idx != 0:
        # Run an Object Detection Algorithm
        result, pred_bboxes = run_obstacle_detection(img)

        if len(pred_bboxes)>0:
            #If we have boxes, get the Optical Flow we ran before
            fl_vec = fl_vectors[idx-1]
            fl_out = cv2.resize(add_arrow_to_box(result, pred_bboxes, fl_vec), (1248,376))
            fl_img = fl_images[idx-1]
            img_final = np.concatenate([fl_out, fl_img], axis=0)
        final_vid.append(img_final)
        out = cv2.VideoWriter("output.mp4",cv2.VideoWriter_fourcc(*'mp4v'), 15.0, (fl_out.shape[1] ,fl_out.shape[0]))
        for i in range(len(final_vid)):
            out.write(final_vid[i].astype(np.uint8))
        out.release()