In [1]:
import os
import cv2
import time
import glob
import torch
import shutil
import random
import subprocess
import numpy as np
import torch.nn as nn
import torchvision.models as models
import albumentations.augmentations.transforms as transforms
from PIL import Image
from predictor import COCODemo
from maskrcnn_benchmark.config import cfg
from albumentations import Compose
from albumentations.pytorch import ToTensor

In [2]:
# specify device
device = "cuda"

# specify paths
path_to_videos = "/home/SharedFolder/CurrentDatasets/bdd100k_video_samples"
weights_classification = "/home/SharedFolder/trained_models/night-drive/weather_classifier/without_cloudy/20190323_ResNet18_without_cloudy_train_A_over/resnet18_weather_classifierbdd100k_sorted_train_A_over_epoch_39.pth"
weights_detection = "/home/SharedFolder/trained_models/night-drive/detector/20190309_RetinaNet_train_A_over_final_iteration/model_final.pth"
config_file_detection = "/home/SharedFolder/git/csoehnel/maskrcnn-benchmark/configs/retinanet/retinanet_R-50-FPN_1x_finetune_nightdrive.yaml"

# confidence thresholds, set between 0 and 1 to enable module
conf_thresh_detection = 0.7
conf_thresh_classification = 0.7

# mapping dict for weather predictions
dict_weather = {
     0: "Weather: Clear",
     1: "Weather: Rainy",
     2: "Weather: Snowy",        
}

# for separating temporary folders when using multiple workers
worker_name = "worker1"

In [3]:
outfile_suffix = "_" + weights_classification.split(os.sep)[-1].split("sorted_")[-1].split("_epoch")[0]
outfile_suffix += "_C" + weights_classification.split("_epoch_")[-1].split(".pth")[0]
outfile_suffix += "_" + str(conf_thresh_classification)
outfile_suffix += "_D" + weights_detection.split("model_")[-1].split(".pth")[0]
outfile_suffix += "_" + str(conf_thresh_detection)
print(outfile_suffix)

_train_A_over_C39_0.7_Dfinal_0.7


In [4]:
videos = glob.glob(path_to_videos + "/*.mov")
random.seed(123)
random.shuffle(videos)

In [None]:
def init_weather_classification(weights_weather, dict_weather, device):
    # init data transform
    transform_weather = Compose([transforms.Resize(height = 224, width = 224), 
                                 ToTensor(normalize = {"mean": [0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]})])
    # create model
    net_weather = models.resnet18(pretrained = True)
    # Adaptive Pooling needed for resolutions > 224 x 224
    net_weather.avgpool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), nn.Dropout(p = 0.1))
    net_weather.fc = nn.Linear(net_weather.fc.in_features, len(dict_weather))
    # send model to device
    net_weather.to(torch.device(device))
    # load weights
    net_weather.load_state_dict(torch.load(weights_weather)["model_state_dict"])
    # eval mode, disables dropout, etc.
    net_weather.eval()
    return net_weather, transform_weather

In [None]:
def classify_weather(image_bgr, net_weather, dict_weather, transform_weather):
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    with torch.no_grad():
        transformed_image = transform_weather(image = image_rgb)["image"]
        transformed_image = transformed_image.unsqueeze(0).to(torch.device(device))
        prediction_scores = net_weather(transformed_image)
        prediction_scores = torch.exp(nn.LogSoftmax(dim = 1)(prediction_scores)).detach().cpu().numpy()
        predicted_class = dict_weather[np.argmax(prediction_scores)]
        predicted_class_score = np.max(prediction_scores)
    return predicted_class, predicted_class_score 

In [None]:
def init_detection(weights_detection, config_file_detection, conf_thresh_detection, device):
    # specify config file
    cfg.merge_from_file(config_file_detection)
    # specify model-weight file
    cfg.merge_from_list(["MODEL.WEIGHT", weights_detection])
    cfg.merge_from_list(["MODEL.DEVICE", device])
    coco_detector = COCODemo(cfg, min_image_size = 800, confidence_threshold = conf_thresh_detection)
    return coco_detector

In [None]:
def detect(image_bgr, coco_detector):
    image_bgr = np.array(image_bgr)
    # predict
    prediction_bgr = coco_detector.run_on_opencv_image(image_bgr, line_width = 3)
    return prediction_bgr

In [None]:
def process_video(video, net_weather, transform_weather, dict_weather, conf_thresh_classification, coco_detector):
    # (re-) create directories for extracted frames and target video
    file_name = os.path.basename(video)
    path_name = os.path.dirname(video)
    temp_path = os.path.join(path_name, worker_name, file_name.split(".")[0])
    temp_pred_path = os.path.join(temp_path, "prediction")
    target_path = os.path.join(path_name, "demovideos")
    target_file = os.path.join(target_path,file_name.split(".mov")[0] + outfile_suffix + ".mp4")
    if os.path.isdir(temp_path):
        shutil.rmtree(temp_path)
    os.makedirs(temp_path, exist_ok = True)
    os.makedirs(temp_pred_path, exist_ok = True)
    if not os.path.isdir(target_path):
        os.makedirs(target_path, exist_ok = True)
    elif os.path.exists(target_file):
        # do nothing if file already processed
        return 0
    # extract frames from video
    bash_cmd = ["ffmpeg", "-i", video, "-start_number", "0", "-qscale:v", "2", temp_path + "/frame-%d.jpg"]
    subprocess.call(bash_cmd)
    # process frames
    frames = glob.glob(temp_path + "/*.jpg")
    for frame in frames:
        # read as bgr
        img_bgr = cv2.imread(frame)
        # classify weather
        pred_weather_class, pred_weather_score = classify_weather(img_bgr, net_weather, dict_weather, transform_weather)
        # detect
        frame_with_detections_bgr = detect(img_bgr, coco_detector)
        # write weather on detection output image
        weather_color = [(255, 255, 255) if pred_weather_score >= conf_thresh_classification else (80, 80, 80)]
        cv2.putText(frame_with_detections_bgr,
                    f"{pred_weather_class} ({pred_weather_score:.2f})", 
                    (5, 715), 
                    cv2.FONT_HERSHEY_SIMPLEX, 
                    0.8, 
                    weather_color[0], 
                    2)
        # write bgr (will be transformed to rgb by open cv)
        cv2.imwrite(os.path.join(temp_pred_path, os.path.basename(frame)), 
                    frame_with_detections_bgr, 
                    [int(cv2.IMWRITE_JPEG_QUALITY), 95])
    # construct video
    bash_cmd = ["ffmpeg", "-r", "30", "-f", "image2", "-i", temp_pred_path + "/frame-%d.jpg", "-vcodec", "libx264", "-crf", "18", target_file]
    subprocess.call(bash_cmd)
    # clean-up
    shutil.rmtree(temp_path)
    return len(frames)

In [None]:
# init weather classifier
net_weather, transform_weather = init_weather_classification(weights_classification, dict_weather, device)

# init detector
coco_detector = init_detection(weights_detection, config_file_detection, conf_thresh_detection, device)

for i in range(len(videos)):
    print(f"Processing video {i + 1} of {len(videos)}", end = "")
    tic = time.time()
    n_frames = process_video(videos[i], net_weather, transform_weather, dict_weather, conf_thresh_classification, coco_detector)
    toc = time.time()
    if n_frames > 0:
        print(f"... done in {toc - tic:.2f}s ({((toc - tic) / n_frames):.2f}s / frame)")
    else:
        print(f"... skipped. File exists.")

Processing video 1 of 1000... done in 331.00s (0.27s / frame)
Processing video 2 of 1000... done in 343.93s (0.28s / frame)
Processing video 3 of 1000... done in 331.56s (0.28s / frame)
Processing video 4 of 1000... done in 170.78s (0.28s / frame)
Processing video 5 of 1000... done in 330.09s (0.27s / frame)
Processing video 6 of 1000... done in 335.08s (0.28s / frame)
Processing video 7 of 1000... done in 341.35s (0.28s / frame)
Processing video 8 of 1000... done in 337.62s (0.28s / frame)
Processing video 9 of 1000... done in 339.35s (0.28s / frame)
Processing video 10 of 1000... done in 334.82s (0.28s / frame)
Processing video 11 of 1000... done in 327.97s (0.27s / frame)
Processing video 12 of 1000... done in 663.37s (0.28s / frame)
Processing video 13 of 1000... done in 336.90s (0.28s / frame)
Processing video 14 of 1000... done in 337.28s (0.28s / frame)
Processing video 15 of 1000... done in 659.28s (0.27s / frame)
Processing video 16 of 1000... done in 340.66s (0.28s / frame)
P