In [3]:
import os
import sys
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import pandas as pd

sys.path.append(str(Path(Path.cwd()).parent))
from src.utils import FPHA, DATA_DIR, IMG

# Analyze actions

In [2]:
fpha_action_file = Path(DATA_DIR)/'First_Person_Action_Benchmark/action_object_info.txt'
fpha_data = pd.read_csv(fpha_action_file, delim_whitespace=True)
print(fpha_data)

fpha_nouns = [x.replace('_', ' ') for x in fpha_data.object_name.unique()]

fpha_verbs = fpha_data.action_name
fpha_verbs = [x.split('_')[0] for x in fpha_verbs]
fpha_verbs = list(dict.fromkeys(fpha_verbs))

fpha_action = [x.split('_') for x in fpha_data.action_name]
fpha_action = [' '.join(x) for x in fpha_action]

    action_id                 action_name    object_name  object_pose scenario
0           1           open_juice_bottle          juice            1  kitchen
1           2          close_juice_bottle          juice            1  kitchen
2           3           pour_juice_bottle          juice            1  kitchen
3           4          open_peanut_butter  peanut_butter            0  kitchen
4           5         close_peanut_butter  peanut_butter            0  kitchen
5           6                       prick           fork            0  kitchen
6           7                    sprinkle          spoon            0  kitchen
7           8                 scoop_spoon          spoon            0  kitchen
8           9                   put_sugar          spoon            0  kitchen
9          10                        stir          spoon            0  kitchen
10         11                   open_milk           milk            1  kitchen
11         12                  close_milk           

In [3]:
print('Actions')
# print(fpha_action)
for i, a in enumerate(fpha_action):
    print(i, a)

Actions
0 open juice bottle
1 close juice bottle
2 pour juice bottle
3 open peanut butter
4 close peanut butter
5 prick
6 sprinkle
7 scoop spoon
8 put sugar
9 stir
10 open milk
11 close milk
12 pour milk
13 drink mug
14 put tea bag
15 put salt
16 open liquid soap
17 close liquid soap
18 pour liquid soap
19 wash sponge
20 flip sponge
21 scratch sponge
22 squeeze sponge
23 open soda can
24 use flash
25 write
26 tear paper
27 squeeze paper
28 open letter
29 take letter from enveloppe
30 read letter
31 flip pages
32 use calculator
33 light candle
34 charge cell phone
35 unfold glasses
36 clean glasses
37 open wallet
38 give coin
39 receive coin
40 give card
41 pour wine
42 toast wine
43 handshake
44 high five


In [4]:
print('Nouns')
# print(fpha_nouns)
for i, n in enumerate(fpha_nouns):
    print(i, n)

Nouns
0 juice
1 peanut butter
2 fork
3 spoon
4 milk
5 mug
6 tea bag
7 salt
8 liquid soap
9 sponge
10 soda can
11 spray
12 pen
13 paper
14 letter
15 book
16 calculator
17 match
18 cell charger
19 glasses
20 wallet
21 coin
22 card
23 wine bottle
24 wine glass
25 hand


In [5]:
print('Verbs')
# print(fpha_verbs)
for i, v in enumerate(fpha_verbs):
    print(i, v)

Verbs
0 open
1 close
2 pour
3 prick
4 sprinkle
5 scoop
6 put
7 stir
8 drink
9 wash
10 flip
11 scratch
12 squeeze
13 use
14 write
15 tear
16 take
17 read
18 light
19 charge
20 unfold
21 clean
22 give
23 receive
24 toast
25 handshake
26 high


In [6]:
IMG_DIR = Path(DATA_DIR)/'First_Person_Action_Benchmark'
train_file_name, test_file_name, train_xyz_gt, test_xyz_gt = FPHA.get_train_test_pairs('color', IMG_DIR)

all_file_name = train_file_name + test_file_name
all_xyz_gt = np.concatenate((train_xyz_gt, test_xyz_gt))
all_uvd_gt = FPHA.xyz2uvd_color(all_xyz_gt)
all_index = range(0, len(all_file_name))

100%|██████████| 1177/1177 [05:45<00:00,  3.40it/s] 


In [8]:
def get_all_vid_idx(img_list):
    all_vid_idx = []
    for subject_action_seq in img_list:
        for i, fn in enumerate(all_file_name):
            if subject_action_seq in fn:
                all_vid_idx.append(i)
    return all_vid_idx

# Points Outside Image Bounds

In [None]:
index0 = list(np.unique(np.argwhere(all_uvd_gt[..., 0] > FPHA.ORI_WIDTH)[:, 0]))
index1 = list(np.unique(np.argwhere(all_uvd_gt[..., 0] < 0)[:, 0]))
index2 = list(np.unique(np.argwhere(all_uvd_gt[..., 1] > FPHA.ORI_HEIGHT)[:, 0]))
index3 = list(np.unique(np.argwhere(all_uvd_gt[..., 1] < 0)[:, 0]))

ob_index = np.unique(index0 + index1 + index2 + index3)
print(len(ob_index))

# Bad Image Sequences

In [9]:
bad_seqs = ['Subject_1/unfold_glasses/5', # one finger is really long
            'Subject_4/read_letter/3', # annotation gets stuck
           'Subject_5/use_flash/6', # annotation completely wrong, no hand even in image
           'Subject_1/handshake/3', # blurry image, wrong annotation
           'Subject_1/clean_glasses/2', # bad annotation
           'Subject_1/clean_glasses/4', # bad annotation
           'Subject_2/tear_paper/3', # bad annotation
           'Subject_1/high_five/3', # bad annotation
           'Subject_6/close_milk/3', # bad annotation
           'Subject_1/give_card/3', # bad annotation
           'Subject_1/receive_coin/3', # bad annotation
           'Subject_1/receive_coin/1', # bad annotation (bad at the end)
            'Subject_1/open_wallet/3', # bad annotation (not so bad)
           'Subject_3/open_milk/3'] # bad annotation (blurry for small part)
bad_index = get_all_vid_idx(bad_seqs)
print(len(bad_index))

1926


# Filter Images

In [11]:
# all_bad_index = np.unique(np.concatenate((bad_index, ob_index)))
all_bad_index = bad_index
filter_index = [i for i in all_index if i not in all_bad_index]
print(len(all_bad_index), len(filter_index))

1926 103533


# Visualize Images 

In [None]:
# Visualize
from IPython.display import clear_output
num_plt = 3
idx_list = filter_index

for idx in range(0, len(idx_list), num_plt*num_plt):
    fig, ax = plt.subplots(num_plt, num_plt, figsize=(15,15))
    idx_ofset = 0
    for i in range(0, num_plt):
        for j in range(0, num_plt):
            index = idx_list[idx + idx_ofset] 
            img = Image.open(os.path.join(IMG_DIR, 'Video_files', all_file_name[index]))
            ax[i, j].imshow(img)
            FPHA.visualize_joints_2d(ax[i, j], all_uvd_gt[index][FPHA.REORDER_IDX], joint_idxs=False)
            ax[i, j].set_title(str(index) + ' ' + all_file_name[index][:-21])
            idx_ofset += 1
    plt.show()
    input("Press Enter to continue...")

    clear_output()

# Visualize Images (By Sequence)

In [None]:
# Visualize
from IPython.display import clear_output
num_plt = 3
idx_list = filter_index
cur_file_name = ''
for idx in range(0, len(idx_list), num_plt*num_plt):
    idx_ofset = 0
    index = idx_list[idx] 
    if all_file_name[index][:-21] == cur_file_name:
        continue
    cur_file_name = all_file_name[index][:-21]
    fig, ax = plt.subplots(num_plt, num_plt, figsize=(15,15))
    for i in range(0, num_plt):
        for j in range(0, num_plt):
            index = idx_list[idx + idx_ofset] 
            img = Image.open(os.path.join(IMG_DIR, 'Video_files', all_file_name[index]))
            ax[i, j].imshow(img)
            FPHA.visualize_joints_2d(ax[i, j], all_uvd_gt[index][FPHA.REORDER_IDX], joint_idxs=False)
            ax[i, j].set_title(str(index) + ' ' + all_file_name[index][:-21])
            idx_ofset += 10
    plt.show()
    input("Press Enter to continue...")
    clear_output()

# Visualize Video

In [None]:
with open(os.path.join(IMG_DIR, "data_split_action_recognition.txt")) as f:
    lines = f.readlines()
lines = [l.split()[0] for l in lines]
print(len(lines))

In [None]:
from moviepy.editor import ImageSequenceClip
from tqdm import tqdm
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure

seqs = [lines[3]] # FROM data_split_action_recognition.txt
# seqs = ['Subject_1/open_juice_bottle/2']
# seqs = ['Subject_2/stir/3']
frame_idx = get_all_vid_idx(seqs)
frames = []
# print(frame_idx)
for idx in tqdm(frame_idx):
    img = Image.open(os.path.join(IMG_DIR, 'Video_files', all_file_name[idx]))
    img = np.asarray(img.resize((416, 416)))
    fig, ax = plt.subplots()
    ax = fig.gca()
    ax.imshow(img)
    ax.axis('off')
    uvd_gt = IMG.scale_points_WH(all_uvd_gt[idx], (FPHA.ORI_WIDTH, FPHA.ORI_HEIGHT), (416, 416))
    FPHA.visualize_joints_2d(ax, uvd_gt[FPHA.REORDER_IDX], joint_idxs=False)
    fig.canvas.draw()
    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    frames.append(data)
    plt.close()
    
segment_clip = ImageSequenceClip(frames, fps=12)
segment_clip.ipython_display()

# Visualize One Img

In [None]:
index = 93973
img = Image.open(os.path.join(DATA_DIR, 'First_Person_Action_Benchmark', 'Video_files', all_file_name[index]))
fig, ax = plt.subplots(figsize=(15,15))
FPHA.visualize_joints_2d(ax, all_uvd_gt[index][FPHA.REORDER_IDX], joint_idxs=False)
ax.imshow(img)

# Save to File

In [12]:
# Save to file
print(len(all_bad_index))
print("WRITING BAD IMGS TO FILE")
with open(os.path.join(IMG_DIR, 'bad_imgs_with_outbounds.txt'), 'w') as f:
    for index in tqdm(all_bad_index):
        f.write("%s\n" % all_file_name[index])

1926
WRITING BAD IMGS TO FILE


100%|██████████| 1926/1926 [00:00<00:00, 1473322.91it/s]


# Workspace

In [None]:
index = 93973
img = Image.open(os.path.join(DATA_DIR, 'First_Person_Action_Benchmark', 'Video_files', all_file_name[index]))
fig, ax = plt.subplots(figsize=(15,15))
ax.imshow(img)

bbox = np.asarray([500, 500, 500, 500])

crop = FPHA.crop_hand_from_bbox(np.asarray(img), bbox)

fig, ax = plt.subplots(figsize=(15,15))
ax.imshow(crop)

In [None]:
FPHA.get_bbox_for_normuvd()

In [None]:
index = 8228
img = Image.open(os.path.join(DATA_DIR, 'First_Person_Action_Benchmark', 'Video_files', all_file_name[index]))
fig, ax = plt.subplots(figsize=(15,15))
FPHA.visualize_joints_2d(ax, all_uvd_gt[index][FPHA.REORDER_IDX], joint_idxs=False)
ax.imshow(img)

normuvd, hand_center_uvd = FPHA.get_normuvd(all_xyz_gt[index])
xyz_ori, uvd_ori = FPHA.normuvd2xyzuvd_color(normuvd, hand_center_uvd)
print(hand_center_uvd)
img = Image.open(os.path.join(DATA_DIR, 'First_Person_Action_Benchmark', 'Video_files', all_file_name[index]))
fig, ax = plt.subplots(figsize=(15,15))
FPHA.visualize_joints_2d(ax, uvd_ori[FPHA.REORDER_IDX], joint_idxs=False)
ax.imshow(img)

print(np.allclose(uvd_ori, all_uvd_gt[index]))

In [None]:
cn_dict = {}
with open('/4TB/aaron/First_Person_Action_Benchmark/action_object_info.txt', 'r') as f:
        lines = f.readlines()[1:]
        for l in lines:
            l = l.split(' ')
            cn_dict[int(l[0]) - 1] = l[1]
            
for k, v in cn_dict.items():
    print(k, v)

In [None]:
action_to_verb_dict = {}
verb = []
noun = []
with open('/4TB/aaron/First_Person_Action_Benchmark/action_object_info.txt', 'r') as f:
        lines = f.readlines()[1:]
        for l in lines:
            l = l.split(' ')
            verb.append(l[1].split('_')[0])
            noun.append(l[2])
            action_to_verb_dict[int(l[0]) - 1] = (l[1].split('_')[0], l[2])
verb = np.unique(verb)
noun = np.unique(noun)
verb = {k: v for v, k in enumerate(verb)}
noun = {k: v for v, k in enumerate(noun)}

for k, v in verb.items():
    print(k, v)
for k, v in noun.items():
    print(k, v)
for k, v in action_to_verb_dict.items():
    print(k, v)

print(verb[action_to_verb_dict[1][0]])

In [21]:
with open('/4TB/aaron/First_Person_Action_Benchmark/train_fpha_hpo_img.txt', 'r') as f:
    img_labels = f.read().splitlines()
img_path = [i.split(' ')[0] for i in img_labels]
verb_id = [i.split(' ')[1] for i in img_labels]
noun_id = [i.split(' ')[2] for i in img_labels]

['9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9',