In [1]:
import json

In [2]:
import os
print (os.environ['CONDA_DEFAULT_ENV'])
assert os.environ["CONDA_DEFAULT_ENV"] == "rekall"

rekall


# Read metadata

We read in the video metadata.

In [3]:
with open('/share/pi/cleemess/danfu/2019-05-good-mp4s-metadata.json') as f:
    video_metadata = list(json.load(f))
video_metadata[0]

{'id': 0,
 'filename': 'stanford/2019/5/46392/DA0036RI.IMG/0036RI01.mp4',
 'num_frames': '3827',
 'fps': 59.94005994005994,
 'width': 320,
 'height': 240}

In [4]:
print('There are {} videos with metadata'.format(len(video_metadata)))

There are 423 videos with metadata


# Filter videos

In [5]:
# from vgrid import VGridSpec, VideoMetadata, VideoBlockFormat, SpatialType_Caption
# from vgrid_jupyter import VGridWidget
# from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D
from glob import glob
import eeghdf

Filter out videos that we can't find the annotation file for:

In [6]:
annotation_exists_count = 0
for i, vm in enumerate(video_metadata):
    # Convert video filename to annotation filename
#     vm["fps"] = 29.97
    vm["filename"] = vm["filename"].replace("stanford/","")
    fn = vm["filename"]
    intermediate = fn.split("/")[-1]
    num = int(intermediate[-6:-4])
    parent = "/".join(fn.split("/")[:-2])
    img_folder = fn.split("/")[-2][:-4]
    annotation_file = img_folder + f"_1-{num+1}+.h5"
    full_fn = os.path.join("/share/pi/cleemess/file-conversion-pipeline", parent, annotation_file)
    vm["annotation_filename"] = full_fn if os.path.exists(full_fn) else None
    annotation_exists_count += int(os.path.exists(full_fn))
print(f"We found annotations for {annotation_exists_count} of the {len(video_metadata)} videos.")

We found annotations for 221 of the 423 videos.


Now filter out videos that aren't broken up into pieces.

In [7]:
only_video_count = 0
for vm in video_metadata:
    fn = vm["filename"]
    parent = os.path.join("/share/pi/cleemess/file-conversion-pipeline", "/".join(fn.split("/")[:-1]))
    fn = fn.split("/")[-1]
    other_vids = glob(os.path.join(parent, fn[:-6]) + "*.mp4")
    vm["only_video"] = len(other_vids) == 1
    only_video_count += int(len(other_vids) == 1)
print(f"{only_video_count} of the {len(video_metadata)} videos are the only video in their directory (so this video is not a part of a sequence of broken up clips).")

62 of the 423 videos are the only video in their directory (so this video is not a part of a sequence of broken up clips).


# Visualize videos with annotations

In [8]:
from vgrid import VGridSpec, VideoMetadata, VideoBlockFormat, SpatialType_Caption
from vgrid_jupyter import VGridWidget
from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D

We wrap the video metadata in a special object for use in VGrid.

In [9]:
video_metadata_wrapper = [
    VideoMetadata(
        vm["filename"], id=str(id), fps=vm["fps"],
        num_frames=int(vm["num_frames"]), width=vm["width"], height=vm["height"])
    for id, vm in enumerate(video_metadata)
]

Finally, we create a  function that filters the videos for annotations that contain the keyword.

In [10]:
def find_clips_for_keyword(keyword):
    ism = {}
    for vm in video_metadata:
        if not vm["annotation_filename"] or not vm["only_video"]:
            continue
        h5 = eeghdf.Eeghdf(vm["annotation_filename"])
        starts = [start / 10**7 for start in h5._annotation_start100ns]
        texts = h5._annotation_text

        if not keyword or any(keyword.lower() in text.lower() for text in texts):
            interval_set = IntervalSet([
                    Interval(
                        Bounds3D(start , start + 5),
                        {
                            'spatial_type': SpatialType_Caption(text + "\n"),
                            'metadata': {}
                        }
                    ) for start, text in zip(starts, texts)
                ]) 
            ism[vm["id"]] = interval_set

    vgrid_spec = VGridSpec(
        video_meta=video_metadata_wrapper,
        vis_format=VideoBlockFormat(imaps=[('bboxes', ism)]),
        video_endpoint='http://localhost:8080'
    )
    return VGridWidget(vgrid_spec=vgrid_spec.to_json_compressed())

Then we use the function to visualize clips with seizures.

In [11]:
find_clips_for_keyword("seizure")

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xed]ko\x1c\xb7\x92\xfd+\x86>-\xb0\x9b\x11\xdf\x8f|…

## Observations

Some of the videos are not as good, but the first video clearly shows a seizure at the annotation's time stamp. 

The final video also shows a seizure at the 9:40 mark, not where the annotation start time marker is though.

The other videos don't seem like seizures to me.