In [1]:
VIDEO_FRAMERATE = 30 #nr of frames per second
#it takes my pc 3 min 15 s with framerate 0.5. For 25 fps, estimated is 2 h and 45 mins
PATH_TO_VIDEO = '2022-06-14_16-38-43_S04_eye-tracking-video-world_frame.mp4'
PATH_TO_INFO_AND_CROSS = "2022-06-14_16-38-43_streamLog_actionNet-wearables_S04.hdf5"
VIDEO_ID = "S0401"
FRAMES_SAVE_PATH = "actionNet/" + VIDEO_ID
CALIBRATION_SHIFT = 14*60 + 21 # the first part of the video is usless
PIKLE_PATH = "actionNet/S0401"

VIDEO_SAMPLING_RATE = 1/VIDEO_FRAMERATE
import h5py, numpy as np
import cv2
import os
from datetime import datetime
import pandas as pd

#seeding uuid 
import uuid
import random
rd = random.Random()

## Video transformation into frames

In [6]:
directory = FRAMES_SAVE_PATH

# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)

In [None]:
#you need to have ffmpeg exe somewhere
%%bash

FFMPEG_PATH="C:/Users/NonAv/Desktop/ffmpeg-6.0-essentials_build/bin/ffmpeg.exe"
PATH_TO_VIDEO="./2022-06-14_16-38-43_S04_eye-tracking-video-world_frame.mp4"
FRAMES_SAVE_PATH="actionNet/S04_01"

$FFMPEG_PATH -ss 00:14:21 -i $PATH_TO_VIDEO $FRAMES_SAVE_PATH/img_%010d.jpg -vf "scale=456:256" 

In [19]:
vidcap = cv2.VideoCapture(PATH_TO_VIDEO)
video_fps = vidcap.get(cv2.CAP_PROP_FPS)
framecount = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
"number of frames expected: " + str(round(framecount - video_fps* CALIBRATION_SHIFT))

'number of frames expected: 83225'

In [23]:
import os
files = os.listdir(FRAMES_SAVE_PATH)
i = 1

for file in files:
    new_file_num = int(file.split("_")[1].split(".")[0])-1
    new_file_name = "img_"+f'{new_file_num:010d}'+".jpg"
    os.rename(os.path.join(FRAMES_SAVE_PATH, file), os.path.join(FRAMES_SAVE_PATH,new_file_name))

    #i = i+1

## Labels and dataframe (records)

In [2]:



h5_file = h5py.File(PATH_TO_INFO_AND_CROSS)

device_name = 'experiment-activities'
stream_name = 'activities'

# Get the timestamped label data.
# As described in the HDF5 metadata, each row has entries for ['Activity', 'Start/Stop', 'Valid', 'Notes'].
activity_datas = h5_file[device_name][stream_name]['data']
activity_times_s = h5_file[device_name][stream_name]['time_s']
activity_times_s = np.squeeze(np.array(activity_times_s))  # squeeze (optional) converts from a list of single-element lists to a 1D list
# Convert to strings for convenience.
activity_datas = [[x.decode('utf-8') for x in datas] for datas in activity_datas]

# Combine start/stop rows to single activity entries with start/stop times.
#   Each row is either the start or stop of the label.
#   The notes and ratings fields are the same for the start/stop rows of the label, so only need to check one.
exclude_bad_labels = True # some activities may have been marked as 'Bad' or 'Maybe' by the experimenter; submitted notes with the activity typically give more information
activities_labels = []
activities_start_times_s = []
activities_end_times_s = []
activities_ratings = []
activities_notes = []
for (row_index, time_s) in enumerate(activity_times_s):
  label    = activity_datas[row_index][0]
  is_start = activity_datas[row_index][1] == 'Start'
  is_stop  = activity_datas[row_index][1] == 'Stop'
  rating   = activity_datas[row_index][2]
  notes    = activity_datas[row_index][3]
  if exclude_bad_labels and rating in ['Bad', 'Maybe']:
    continue
  # Record the start of a new activity.
  if is_start:
    activities_labels.append(label)
    activities_start_times_s.append(time_s)
    activities_ratings.append(rating)
    activities_notes.append(notes)
  # Record the end of the previous activity.
  if is_stop:
    activities_end_times_s.append(time_s)

In [3]:
action_net = pd.read_pickle("./action-net/ActionNet_train.pkl")
action_dict = {}
for i,row in action_net[["description", "labels"]].iterrows():
    desc = row[0]
    label = row[1]
    action_dict[desc] = label

In [4]:
labels_dict = {}
i=0
for label in set(action_net["labels"]):
    labels_dict[label] = i
    i+=1

{'Clean',
 'Clear',
 'Get/Put',
 'Load',
 'Open/Close',
 'Peel',
 'Pour',
 'Set',
 'Slice',
 'Spread',
 'Stack',
 'Unload'}

In [39]:
#TODO decide if we want to merge the same actions into one
rd.seed(9341)

#"start_timestamp", "stop_timestamp"
records = []
shift = datetime.utcfromtimestamp(min(activities_start_times_s))
for i, label in enumerate(activities_labels) :
    #activities_end_times_s
    #start_timestamp = datetime.timestamp(datetime.utcfromtimestamp(activities_start_times_s[i]) - shift)
    #stop_timestamp = datetime.timestamp(datetime.utcfromtimestamp(activities_end_times_s[i]) - shift)
    start_frame = int((datetime.utcfromtimestamp(activities_start_times_s[i]) - shift).total_seconds() * VIDEO_FRAMERATE) +1 
    stop_frame = int((datetime.utcfromtimestamp(activities_end_times_s[i]) - shift).total_seconds() * VIDEO_FRAMERATE) +1 
    narration = label
    verb = action_dict[label]
    verb_class = labels_dict[verb]
    uuid = i
    #TODO remove asap
    records.append([uuid, VIDEO_ID, start_frame,stop_frame, narration, verb, verb_class])

records_pd = pd.DataFrame(records,columns=["uid", "video_id","start_frame", "stop_frame", "narration", "verb", "verb_class"])

In [40]:
records_pd

Unnamed: 0,uid,video_id,start_frame,stop_frame,narration,verb,verb_class
0,0,S0401,1,2288,Get/replace items from refrigerator/cabinets/d...,Get/Put,6
1,1,S0401,2710,4158,Peel a cucumber,Peel,5
2,2,S0401,4683,5780,Peel a cucumber,Peel,5
3,3,S0401,6109,7495,Peel a cucumber,Peel,5
4,4,S0401,7927,9184,Clear cutting board,Clear,0
5,5,S0401,9501,10957,Slice a cucumber,Slice,0
6,6,S0401,11489,12868,Slice a cucumber,Slice,0
7,7,S0401,13229,14385,Slice a cucumber,Slice,0
8,8,S0401,14944,16294,Clear cutting board,Clear,0
9,9,S0401,16596,18702,Get/replace items from refrigerator/cabinets/d...,Get/Put,6


In [41]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(records_pd, test_size=0.2, random_state=9341)

In [42]:
train_final = train.reset_index(drop=True)
test_final = test.reset_index(drop=True)
train_final

Unnamed: 0,uid,video_id,start_frame,stop_frame,narration,verb,verb_class
0,45,S0401,53457,53685,Clean a plate with a towel,Clean,4
1,16,S0401,27819,28550,Slice a potato,Slice,0
2,56,S0401,65066,66442,"Stack on table: 3 each large/small plates, bowls",Stack,3
3,23,S0401,35506,37480,Get/replace items from refrigerator/cabinets/d...,Get/Put,6
4,22,S0401,34113,34937,Clear cutting board,Clear,0
5,9,S0401,16596,18702,Get/replace items from refrigerator/cabinets/d...,Get/Put,6
6,11,S0401,21532,22759,Peel a potato,Peel,5
7,47,S0401,54146,54332,Clean a pan with a sponge,Clean,4
8,55,S0401,61560,64264,"Set table: 3 each large/small plates, bowls, m...",Set,2
9,51,S0401,55194,55374,Clean a pan with a towel,Clean,4


In [43]:
train_final.to_pickle(PIKLE_PATH + "_train.pkl", protocol=4)
test.to_pickle(PIKLE_PATH + "_test.pkl", protocol=4)

In [44]:
pd.read_pickle(PIKLE_PATH + "_train.pkl")

Unnamed: 0,uid,video_id,start_frame,stop_frame,narration,verb,verb_class
0,45,S0401,53457,53685,Clean a plate with a towel,Clean,4
1,16,S0401,27819,28550,Slice a potato,Slice,0
2,56,S0401,65066,66442,"Stack on table: 3 each large/small plates, bowls",Stack,3
3,23,S0401,35506,37480,Get/replace items from refrigerator/cabinets/d...,Get/Put,6
4,22,S0401,34113,34937,Clear cutting board,Clear,0
5,9,S0401,16596,18702,Get/replace items from refrigerator/cabinets/d...,Get/Put,6
6,11,S0401,21532,22759,Peel a potato,Peel,5
7,47,S0401,54146,54332,Clean a pan with a sponge,Clean,4
8,55,S0401,61560,64264,"Set table: 3 each large/small plates, bowls, m...",Set,2
9,51,S0401,55194,55374,Clean a pan with a towel,Clean,4
