In [3]:
import os, cv2
import numpy as np

def read_anno_file(anno_file):
    assert os.path.exists(anno_file), "Annotation file does not exist!" + anno_file
    result = []
    with open(anno_file, 'r') as f:
        for line in f.readlines():
            items = {}
            items['vid'] = line.strip().split(',[')[0]
            labels = line.strip().split(',[')[1].split('],')[0]
            items['label'] = [int(val) for val in labels.split(',')]
            others = line.strip().split(',[')[1].split('],')[1].split(',')
            items['startframe'], items['vid_ytb'], items['lighting'], items['weather'], items['ego_involve'] = others
            result.append(items)
    f.close()
    return result


def get_video_frames(video_file, topN=50):
    # get the video data
    cap = cv2.VideoCapture(video_file)
    ret, frame = cap.read()
    video_data = []
    while (ret):
        video_data.append(frame)
        ret, frame = cap.read()
    print("original # frames: %d"%(len(video_data)))
    assert len(video_data) >= topN
    video_data = video_data[:topN]
    return video_data



In [None]:

""""if __name__ == "__main__":
    anno_file = "../videos/Crash-1500.txt"
    anno_data = read_anno_file(anno_file)

    video_path = "../videos/Crash-1500"
    for anno in anno_data:
        video_file = os.path.join(video_path, anno['vid'] + ".mp4")
        assert os.path.exists(video_file), "video file does not exist!" + video_file
        # read frames
        frames = get_video_frames(video_file, topN=50)
        labels = anno['label']
        print("file: %s, # frames: %d, # labels: %d."%(video_file, len(frames), len(labels)))
        print(len(labels))
        for idx, im in enumerate(frames):
           if labels[idx] == 1:
               cv2.putText(im, 'Accident', (int(im.shape[1] / 2)-60, 60), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2, cv2.LINE_AA)
           else:
               cv2.putText(im, 'Normal', (int(im.shape[1] / 2)-60, 60), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 255), 2, cv2.LINE_AA)
           cv2.imshow('frame', im)
           cv2.waitKey(100)""""

In [4]:
annotation = read_anno_file("Dataset/CarCrash/videos/Crash-1500.txt")

In [5]:
import pandas as pd
annotation_df=pd.DataFrame(annotation)

la colonne label contiens 50 label qui sont des booléens. 1 donne la frame ou se situe l'accident.

In [6]:
annotation_df.head()

Unnamed: 0,vid,label,startframe,vid_ytb,lighting,weather,ego_involve
0,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",285,0,Day,Normal,Yes
1,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",646,0,Day,Normal,Yes
2,3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",972,0,Day,Normal,Yes
3,4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1257,0,Day,Normal,Yes
4,5,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",462,10,Day,Snowy,Yes


In [7]:
def get_accident_frame(row):
    return row["label"].index(1)

annotation_df["accident_frame"]=annotation_df.apply(get_accident_frame,axis=1)


In [8]:
annotation_df["accident_frame"].describe()

count    1500.000000
mean       37.189333
std         5.787557
min        30.000000
25%        32.000000
50%        36.000000
75%        42.000000
max        49.000000
Name: accident_frame, dtype: float64

In [9]:
annotation_df.head()

Unnamed: 0,vid,label,startframe,vid_ytb,lighting,weather,ego_involve,accident_frame
0,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",285,0,Day,Normal,Yes,32
1,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",646,0,Day,Normal,Yes,30
2,3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",972,0,Day,Normal,Yes,31
3,4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1257,0,Day,Normal,Yes,41
4,5,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",462,10,Day,Snowy,Yes,30


In [10]:
annotation_df.ego_involve.value_counts()

ego_involve
Yes    801
No     699
Name: count, dtype: int64

On fait le choix de ne garder que les voitures qui sont elles même dans un accident.

In [11]:
annotation_df_reduced = annotation_df[annotation_df.ego_involve=="Yes"]

annotation_df_reduced.shape

(801, 8)

In [12]:
annotation_df_reduced=annotation_df_reduced[["vid","label","accident_frame"]]
annotation_df_reduced["accident"]=1

In [13]:
annotation_df_reduced

Unnamed: 0,vid,label,accident_frame,accident
0,000001,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",32,1
1,000002,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",30,1
2,000003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",31,1
3,000004,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",41,1
4,000005,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",30,1
...,...,...,...,...
1494,001495,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",35,1
1495,001496,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",45,1
1496,001497,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",48,1
1497,001498,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",47,1


In [14]:
def read_features_file(file):
    #Code to open and read feature in npz files
    features = np.load(file)    
    return features
    

- data: Extracted 4096-dim features with shape (50, 20, 4096). It contains frame-level feature with shape (50, 1, 4096) and 19 box-level features with shape (50, 19, 4096).
- det: Detected bounding boxes with shape (50, 19, 6), where the last dim denotes (x1, y1, x2, y2, prob, cls).
- labels: One-hot video labels to indicate whether the video contains an accident, i.e., [0, 1] denotes positive (accident) and [1, 0] denotes negative (normal).
- ID: The video name for current feature file.


In [15]:
np_file=read_features_file("Dataset/CarCrash/vgg16_features/positive/000004.npz")

In [16]:
np_file.files

['data', 'det', 'labels', 'ID']

In [17]:
np_file["labels"].shape

(2,)

In [21]:
df_test=pd.read_csv("Dataset/CarCrash/vgg16_features/test.txt", sep=" ", header=None)
df_test.columns=["npz_file","label"]
df_test.head()

Unnamed: 0,npz_file,label
0,positive/000353.npz,1
1,positive/000849.npz,1
2,negative/001320.npz,0
3,negative/000210.npz,0
4,negative/002373.npz,0


In [22]:
df_train=pd.read_csv("Dataset/CarCrash/vgg16_features/train.txt", sep=" ", header=None)
df_train.columns=["npz_file","label"]
df_train.head()


Unnamed: 0,npz_file,label
0,negative/001355.npz,0
1,negative/002192.npz,0
2,negative/002082.npz,0
3,negative/000754.npz,0
4,positive/000713.npz,1


In [37]:
display(df_train.shape)
df_test.shape

(3600, 4)

(900, 4)

In [33]:
df=pd.concat([df_train,df_test])

In [34]:
df["video_file"]=df["npz_file"].apply(lambda x: x.split("/")[-1].split(".")[0])

df.head()


Unnamed: 0,npz_file,label,video_file,video_path
0,negative/001355.npz,0,1355,Dataset/CarCrash/videos/Normal/001355.mp4
1,negative/002192.npz,0,2192,Dataset/CarCrash/videos/Normal/002192.mp4
2,negative/002082.npz,0,2082,Dataset/CarCrash/videos/Normal/002082.mp4
3,negative/000754.npz,0,754,Dataset/CarCrash/videos/Normal/000754.mp4
4,positive/000713.npz,1,713,Dataset/CarCrash/videos/Crash-1500/000713.mp4


In [36]:
df["video_path"] = df.apply(lambda x: "Dataset/CarCrash/videos/Crash-1500/" + x["video_file"] + ".mp4" if x["label"] == 1 else "Dataset/CarCrash/videos/Normal/" + x["video_file"] + ".mp4", axis=1)

df.head()   

Unnamed: 0,npz_file,label,video_file,video_path
0,negative/001355.npz,0,1355,Dataset/CarCrash/videos/Normal/001355.mp4
1,negative/002192.npz,0,2192,Dataset/CarCrash/videos/Normal/002192.mp4
2,negative/002082.npz,0,2082,Dataset/CarCrash/videos/Normal/002082.mp4
3,negative/000754.npz,0,754,Dataset/CarCrash/videos/Normal/000754.mp4
4,positive/000713.npz,1,713,Dataset/CarCrash/videos/Crash-1500/000713.mp4


In [54]:
df_tronc=df_train[["npz_file","label","video_file"]]


#on ne garde que les video d'accident ou le conducteur est impliqué
df_tronc=df_tronc[(df_tronc["video_file"].isin(annotation_df_reduced["vid"]) & (df_tronc["label"]==1)) | (df_tronc["label"]==0)]

df_tronc.head()



Unnamed: 0,npz_file,label,video_file
0,negative/001355.npz,0,1355
1,negative/002192.npz,0,2192
2,negative/002082.npz,0,2082
3,negative/000754.npz,0,754
5,negative/000847.npz,0,847


In [55]:
df_tronc.label.value_counts()

label
0    2400
1     643
Name: count, dtype: int64

In [56]:
df_tronc=df_tronc.merge(annotation_df_reduced, left_on=["video_file","label"],right_on=["vid","accident"], how="left")

In [57]:
df_tronc.drop(["vid","label_x","label_y"],axis=1,inplace=True)

In [60]:
df_tronc.accident.fillna(0,inplace=True)
df_tronc.accident_frame.fillna(-1,inplace=True)
df_tronc.head()

Unnamed: 0,npz_file,video_file,accident_frame,accident
0,negative/001355.npz,1355,-1.0,0.0
1,negative/002192.npz,2192,-1.0,0.0
2,negative/002082.npz,2082,-1.0,0.0
3,negative/000754.npz,754,-1.0,0.0
4,negative/000847.npz,847,-1.0,0.0


In [63]:
df_tronc.accident.value_counts()

accident
0.0    2400
1.0     643
Name: count, dtype: int64