# Data and Label Processing

## 1.1 Extract the zip files

In [1]:
import glob
import zipfile

In [14]:
for file in glob.glob("*.zip"): # Extract all the downloaded zip file
    print("Started Extracting", file)
    try:
        with zipfile.ZipFile(file) as f:
            f.extractall()
        print("Ended Extracting", file)
    except Exception as e:
        print(e, file)

Started Extracting s1.zip

Ended Extracting s1.zip

Started Extracting s2.zip

Ended Extracting s2.zip

Started Extracting s3.zip

Ended Extracting s3.zip

Started Extracting s4 (2).zip

File is not a zip file s4 (2).zip

Started Extracting s5.zip

Ended Extracting s5.zip

Started Extracting s6.zip

Ended Extracting s6.zip


## 1.2 Explore The Lables

In [15]:
import json

In [16]:
with open("s1/gB_9_s1_2019-03-07T16;36;24+01;00_rgb_ann_distraction.json", "r") as f:
    j = json.load(f)

In [17]:
j.keys()

dict_keys(['vcd'])

In [18]:
j["vcd"]["actions"] # Check the labels

{'0': {'name': '',
  'type': 'gaze_on_road/looking_road',
  'frame_intervals': [{'frame_start': 0, 'frame_end': 506},
   {'frame_start': 526, 'frame_end': 537},
   {'frame_start': 566, 'frame_end': 579},
   {'frame_start': 608, 'frame_end': 621},
   {'frame_start': 662, 'frame_end': 694},
   {'frame_start': 719, 'frame_end': 736},
   {'frame_start': 746, 'frame_end': 779},
   {'frame_start': 798, 'frame_end': 810},
   {'frame_start': 822, 'frame_end': 857},
   {'frame_start': 868, 'frame_end': 1307},
   {'frame_start': 1332, 'frame_end': 1464},
   {'frame_start': 1483, 'frame_end': 1835},
   {'frame_start': 1937, 'frame_end': 2029},
   {'frame_start': 2065, 'frame_end': 2079},
   {'frame_start': 2115, 'frame_end': 2164},
   {'frame_start': 2216, 'frame_end': 2265},
   {'frame_start': 2306, 'frame_end': 2312},
   {'frame_start': 2370, 'frame_end': 2472},
   {'frame_start': 2509, 'frame_end': 2938},
   {'frame_start': 2960, 'frame_end': 3008},
   {'frame_start': 3034, 'frame_end': 3174},

In [20]:
[o["type"] for o in j["vcd"]["actions"].values()]

['gaze_on_road/looking_road',
 'gaze_on_road/not_looking_road',
 'talking/talking',
 'hands_using_wheel/both',
 'hands_using_wheel/only_right',
 'hands_using_wheel/only_left',
 'hand_on_gear/hand_on_gear',
 'driver_actions/safe_drive',
 'driver_actions/radio',
 'driver_actions/drinking',
 'driver_actions/reach_side',
 'driver_actions/talking_to_passenger',
 'driver_actions/change_gear',
 'driver_actions/unclassified']

## 1.3 Convert Videos To Frames For Furthur Processing

In [1]:
import cv2
import os
import glob

In [2]:
def save_frames(video_file, des_fol):
    video_name = video_file.split("\\")[-1].split(".")[0] # Get the video name without the other part
    cap = cv2.VideoCapture(video_file) # Start the video capture
    frame_count = 0
    while cap.isOpened(): # Until all the frames are over loop though those
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(des_fol, f"img_{frame_count}.png") # Save RGB images
        cv2.imwrite(frame_path, frame)
        frame_count+=1
    cap.release()
    cv2.destroyAllWindows()

In [None]:
output_directory = "s1/frames"
video_file = "s1/gB_9_s1_2019-03-07T16;36;24+01;00_rgb_body.mp4"
save_frames(video_file, output_directory)

## 1.4 Create Tabular Data Mapping Images and Respective Actions

In [1]:
import json
import pandas as pd

In [2]:
with open("s1/gB_9_s1_2019-03-07T16;36;24+01;00_rgb_ann_distraction.json", "r") as f:
    j = json.load(f)

In [3]:
actions = [o["type"] for o in j["vcd"]["actions"].values()]
actions # available labels

['gaze_on_road/looking_road',
 'gaze_on_road/not_looking_road',
 'talking/talking',
 'hands_using_wheel/both',
 'hands_using_wheel/only_right',
 'hands_using_wheel/only_left',
 'hand_on_gear/hand_on_gear',
 'driver_actions/safe_drive',
 'driver_actions/radio',
 'driver_actions/drinking',
 'driver_actions/reach_side',
 'driver_actions/talking_to_passenger',
 'driver_actions/change_gear',
 'driver_actions/unclassified']

In [71]:
actions_considered = ['driver_actions/safe_drive',
 'driver_actions/radio',
 'driver_actions/drinking',
 'driver_actions/reach_side',
 'driver_actions/talking_to_passenger',
 'driver_actions/change_gear',
 'driver_actions/unclassified'] # Taken only lables related to body camera
df = pd.DataFrame(columns=["frameid", "classname"])
for act in j["vcd"]["actions"].values(): # For each action types
    type_ = act["type"]
    if type_ not in actions_considered: # If not available in the action considered then not processing that
        continue
    intervals = act["frame_intervals"]
    for interval in intervals: # for rach frames keep record of what the driver is doing
        fStart = interval["frame_start"]
        fEnd = interval["frame_end"]
        for i in range(fStart, fEnd+1):
            df.loc[i, "classname"]=type_ # mark that specific frame for related action
df["frameid"] = df.index
df.fillna(0, inplace=True)
df

Unnamed: 0,frameid,classname
19,19,driver_actions/safe_drive
20,20,driver_actions/safe_drive
21,21,driver_actions/safe_drive
22,22,driver_actions/safe_drive
23,23,driver_actions/safe_drive
...,...,...
6741,6741,driver_actions/unclassified
6742,6742,driver_actions/unclassified
6743,6743,driver_actions/unclassified
6744,6744,driver_actions/unclassified


In [72]:
df = df.sort_values(by='frameid')
df

Unnamed: 0,frameid,classname
19,19,driver_actions/safe_drive
20,20,driver_actions/safe_drive
21,21,driver_actions/safe_drive
22,22,driver_actions/safe_drive
23,23,driver_actions/safe_drive
...,...,...
7131,7131,driver_actions/safe_drive
7132,7132,driver_actions/safe_drive
7133,7133,driver_actions/safe_drive
7134,7134,driver_actions/safe_drive


In [73]:
df["img"] = df["frameid"].apply(lambda x : f"img_{str(x)}.jpg")
df

Unnamed: 0,frameid,classname,img
19,19,driver_actions/safe_drive,img_19.jpg
20,20,driver_actions/safe_drive,img_20.jpg
21,21,driver_actions/safe_drive,img_21.jpg
22,22,driver_actions/safe_drive,img_22.jpg
23,23,driver_actions/safe_drive,img_23.jpg
...,...,...,...
7131,7131,driver_actions/safe_drive,img_7131.jpg
7132,7132,driver_actions/safe_drive,img_7132.jpg
7133,7133,driver_actions/safe_drive,img_7133.jpg
7134,7134,driver_actions/safe_drive,img_7134.jpg


In [8]:
df.to_csv("driver_imgs_list.csv")