In [2]:
import os
import pandas as pd
import numpy as np

In [34]:
# read annotation file
anno_text = pd.read_csv('../../Annotations/fine_anno_txt/Chen-Zwiebler-R32-LondonOlympics-2012.txt', header=None, sep='', delimiter='\t', usecols=[2,3,5], names=["start", "end", "label"])


In [35]:
# 異常文字列検出
anno_text[anno_text["label"] == "n "]

Unnamed: 0,start,end,label


In [36]:
# create action label dictionary
action_labels = pd.read_csv('./labels.txt', header=None, sep='\t')
action_index = {l:i for i,l in enumerate(action_labels[0])}
action_index

{'bhpb': 6,
 'bhpt': 5,
 'fhpb': 8,
 'fhpt': 7,
 'lbpb': 10,
 'lbpt': 9,
 'n': 0,
 'rtpb': 4,
 'rtpt': 3,
 'smpb': 2,
 'smpt': 1,
 'spb': 12,
 'spt': 11}

In [37]:
# convert labels "string" to "int"
anno_tmp = np.ones(len(anno_text["label"]))
for i in range(len(anno_tmp)):
    anno_tmp[i] = action_index[anno_text["label"][i]]
label_num = pd.Series(anno_tmp, name="label_num")
anno_num = pd.concat([anno_text.loc[:, ["start","end"]], label_num], axis=1)
anno_num.head()

Unnamed: 0,start,end,label_num
0,63894,64720,9.0
1,64720,65440,4.0
2,65440,66040,2.0
3,66040,66343,3.0
4,66343,66880,9.0


In [38]:
# 25fpsだから、25[fps]/1000=1/40を計算する
start_time = anno_num["start"]/40
end_time = anno_num["end"]/40

In [52]:
start_time

0         1597.350
1         1618.000
2         1636.000
3         1651.000
4         1658.575
5         1672.000
6         1679.000
7         1688.000
8         1699.000
9         1712.000
10        2108.100
11        2124.000
12        2141.000
13        2152.000
14        2157.000
15        2167.000
16        2175.000
17        2184.000
18        2198.200
19        2209.000
20        2221.100
21        2238.000
22        2247.000
23        2258.000
24        2264.525
25        2283.000
26        2288.175
27        2300.000
28        2310.000
29        2325.000
           ...    
2695    122787.425
2696    122800.300
2697    122809.200
2698    122813.425
2699    122827.475
2700    122835.475
2701    122850.700
2702    122858.475
2703    122874.475
2704    122892.350
2705    122906.800
2706    122917.250
2707    122928.475
2708    122941.050
2709    122954.575
2710    122970.475
2711    122986.500
2712    122993.700
2713    123006.475
2714    123018.450
2715    123032.050
2716    1230

In [41]:
imlist = os.listdir('/home/daichi/Badminton/badminton_action_recognition_using_pose_estimation/datasets/match_8/chen/')

In [42]:
## read frames list
# "_"と"."を正規表現で取り除く
img_list = pd.Series(imlist).str.extract('(.+)_(.+)\.(.+)', expand=True).dropna()
# ソートしてnumpy配列にする
frames = np.array(img_list.sort_values(1)[1], np.int)

In [43]:
# frameにラベル付けする
frame_label = np.zeros(len(frames))
for i, frame in enumerate(frames):
    idx = np.nonzero((start_time <= frame) & (frame < end_time))[0]
    if len(idx)>0:
        frame_label[i] = anno_num["label_num"][idx]
    else:
        frame_label[i] = "NaN"

In [44]:
# concatする
pd_img = pd.Series(frames, name="Frame")
pd_label = pd.Series(frame_label, name="Label")
pd_img_label = pd.concat([pd_img, pd_label], axis=1)

In [45]:
pd_img_label["Label"].shape

(125112,)

In [46]:
# NaNを除去する
feature_labels = pd_img_label.dropna()

In [47]:
feature_labels["Label"].shape

(38399,)

In [48]:
# 読み込む画像のリストを作成する
listdir = []
for i, num in enumerate(feature_labels["Frame"]):
    listdir.append("img_{0:06d}.jpg".format(num))

In [49]:
feature_label = list(feature_labels["Label"])

In [50]:
# フレーム名とそのラベルをDataframeで作る
df_list = pd.Series(listdir, name="Img_name")
df_feat = pd.Series(feature_label, name="Labels")
df_label = pd.concat([df_list, df_feat], axis=1)

In [54]:
# text fileで出力
df_label.iloc[1:],.to_csv('/home/daichi/Badminton/badminton_action_recognition_using_pose_estimation/datasets/match_8/chen_feature_images.txt')#, index=False)

In [51]:
df_label.head()

Unnamed: 0,Img_name,Labels
0,img_001598.jpg,9.0
1,img_001599.jpg,9.0
2,img_001600.jpg,9.0
3,img_001601.jpg,9.0
4,img_001602.jpg,9.0
