In [1]:
import os
import pandas as pd
import numpy as np

In [46]:
# read annotation file
anno_text = pd.read_csv('../../Annotations/fine_anno_txt/Nguyen-Parupalli-GrpD-LondonOlympics-2012.txt', header=None, sep='', delimiter='\t', usecols=[2,3,5], names=["start", "end", "label"])


In [47]:
# 異常文字列検出
anno_text[anno_text["label"] == "n "]

Unnamed: 0,start,end,label


In [48]:
# create action label dictionary
action_labels = pd.read_csv('./labels.txt', header=None, sep='\t')
action_index = {l:i for i,l in enumerate(action_labels[0])}
action_index

{'bhpb': 6,
 'bhpt': 5,
 'fhpb': 8,
 'fhpt': 7,
 'lbpb': 10,
 'lbpt': 9,
 'n': 0,
 'rtpb': 4,
 'rtpt': 3,
 'smpb': 2,
 'smpt': 1,
 'spb': 12,
 'spt': 11}

In [49]:
# convert labels "string" to "int"
anno_tmp = np.ones(len(anno_text["label"]))
for i in range(len(anno_tmp)):
    anno_tmp[i] = action_index[anno_text["label"][i]]
label_num = pd.Series(anno_tmp, name="label_num")
anno_num = pd.concat([anno_text.loc[:, ["start","end"]], label_num], axis=1)
anno_num.head()

Unnamed: 0,start,end,label_num
0,111937,112080,11.0
1,112080,112440,4.0
2,112440,112800,8.0
3,112800,113360,3.0
4,113360,113680,9.0


In [50]:
# 
start_time = anno_num["start"]//40
end_time = anno_num["end"]//40

In [51]:
imlist = os.listdir('/home/daichi/Badminton/badminton_action_recognition_using_pose_estimation/datasets/match_5/nguyen/')

In [52]:
## read frames list
# "_"と"."を正規表現で取り除く
img_list = pd.Series(imlist).str.extract('(.+)_(.+)\.(.+)', expand=True).dropna()
# ソートしてnumpy配列にする
frames = np.array(img_list.sort_values(1)[1], np.int)

In [53]:
# frameにラベル付けする
frame_label = np.zeros(len(frames))
for i, frame in enumerate(frames):
    idx = np.nonzero((start_time <= frame) & (frame < end_time))[0]
    if len(idx)>0:
        frame_label[i] = anno_num["label_num"][idx]
    else:
        frame_label[i] = "NaN"

In [54]:
# concatする
pd_img = pd.Series(frames, name="Frame")
pd_label = pd.Series(frame_label, name="Label")
pd_img_label = pd.concat([pd_img, pd_label], axis=1)

In [55]:
pd_img_label["Label"].shape

(65648,)

In [56]:
# NaNを除去する
feature_labels = pd_img_label.dropna()

In [57]:
feature_labels["Label"].shape

(20941,)

In [58]:
# 読み込む画像のリストを作成する
listdir = []
for i, num in enumerate(feature_labels["Frame"]):
    listdir.append("img_{0:06d}.jpg".format(num))

In [59]:
feature_label = list(feature_labels["Label"])

In [60]:
# フレーム名とそのラベルをDataframeで作る
df_list = pd.Series(listdir, name="Img_name")
df_feat = pd.Series(feature_label, name="Labels")
df_label = pd.concat([df_list, df_feat], axis=1)

In [61]:
# text fileで出力
df_label.to_csv('/home/daichi/Badminton/badminton_action_recognition_using_pose_estimation/datasets/match_5/nguyen_feacture_lable.txt')

In [62]:
df_label.tail()

Unnamed: 0,Img_name,Labels
20936,img_060540.jpg,0.0
20937,img_060541.jpg,0.0
20938,img_060542.jpg,0.0
20939,img_060543.jpg,0.0
20940,img_060544.jpg,0.0
