In [30]:
import pandas as pd
from pathlib import Path

gifs = sorted(Path("../data/clips-masked-gif").glob("*.gif"))
data = []
for gif in gifs:
    category, num, name, _ = gif.stem.split("_")
    data.append([category, int(num), name, str(gif.name)])
info_df = pd.DataFrame(data, columns=["category", "num", "name", "path"])
info_df = info_df[["category", "num", "name"]]
info_df.to_csv("../data/gif-info.csv", index=False)
info_df

Unnamed: 0,category,num,name
0,ashi-waza,0,de-ashi-harai
1,ashi-waza,1,hiza-guruma
2,ashi-waza,2,sasae-tsurikomi-ashi
3,ashi-waza,3,o-soto-gari
4,ashi-waza,4,o-uchi-gari
...,...,...,...
106,yoko-sutemi-waza,11,uchi-mata-makikomi
107,yoko-sutemi-waza,12,harai-makikomi
108,yoko-sutemi-waza,13,ko-uchi-makikomi
109,yoko-sutemi-waza,14,kani-basami


In [31]:
info_df = pd.read_csv("../data/waza-info-translation.csv")
info_df.head()

Unnamed: 0,category,num,name,translated_name
0,ashi-waza,0,de-ashi-harai,Forward Foot Sweep
1,ashi-waza,1,hiza-guruma,Knee Wheel
2,ashi-waza,2,sasae-tsurikomi-ashi,Propping-Lifting Pulling Ankle Throw
3,ashi-waza,3,o-soto-gari,Major Outer Reaping
4,ashi-waza,4,o-uchi-gari,Major Inner Reaping


In [38]:
nagewaza_df = pd.read_csv("../data/nagewaza.csv")
nagewaza_df.head()

Unnamed: 0,technique,group,group_index
0,Deashi Hari,Dai Ikkyo (1st),1
1,Hiza Guruma,Dai Ikkyo (1st),2
2,Sasae Tsurikomi Ashi,Dai Ikkyo (1st),3
3,Uki Goshi,Dai Ikkyo (1st),4
4,Osoto Gari,Dai Ikkyo (1st),5


In [39]:
# let's encode the group names as integers, based on the order in which they appear in the list
mapping = {v: k + 1 for k, v in enumerate(nagewaza_df.group.unique())}
mapping["Unclassified"] = 0
mapping
nagewaza_df["group_id"] = nagewaza_df.group.map(mapping)
nagewaza_df

Unnamed: 0,technique,group,group_index,group_id
0,Deashi Hari,Dai Ikkyo (1st),1,1
1,Hiza Guruma,Dai Ikkyo (1st),2,1
2,Sasae Tsurikomi Ashi,Dai Ikkyo (1st),3,1
3,Uki Goshi,Dai Ikkyo (1st),4,1
4,Osoto Gari,Dai Ikkyo (1st),5,1
5,O Goshi,Dai Ikkyo (1st),6,1
6,Ouchi Gari,Dai Ikkyo (1st),7,1
7,Seoi Nage,Dai Ikkyo (1st),8,1
8,Kosoto Gari,Dai Nikyo (2nd),1,2
9,Kouchi Gari,Dai Nikyo (2nd),2,2


In [40]:
import editdistance


def closest_item(item, items):
    return min(items, key=lambda x: editdistance.eval(item, x))


closest_item("Deashi Hari", info_df.name)

'de-ashi-harai'

In [41]:
nagewaza_df["technique_id"] = nagewaza_df.technique.apply(
    lambda x: closest_item(x, info_df.name)
)
nagewaza_df

Unnamed: 0,technique,group,group_index,group_id,technique_id
0,Deashi Hari,Dai Ikkyo (1st),1,1,de-ashi-harai
1,Hiza Guruma,Dai Ikkyo (1st),2,1,hiza-guruma
2,Sasae Tsurikomi Ashi,Dai Ikkyo (1st),3,1,sasae-tsurikomi-ashi
3,Uki Goshi,Dai Ikkyo (1st),4,1,uki-goshi
4,Osoto Gari,Dai Ikkyo (1st),5,1,o-soto-gari
5,O Goshi,Dai Ikkyo (1st),6,1,o-goshi
6,Ouchi Gari,Dai Ikkyo (1st),7,1,o-uchi-gari
7,Seoi Nage,Dai Ikkyo (1st),8,1,seoi-nage
8,Kosoto Gari,Dai Nikyo (2nd),1,2,o-soto-gari
9,Kouchi Gari,Dai Nikyo (2nd),2,2,o-uchi-gari


In [48]:
df = info_df.merge(nagewaza_df, left_on="name", right_on="technique_id", how="left")
df = df[
    ["category", "num", "name", "translated_name", "group", "group_index", "group_id"]
]
df["group_index"] = df["group_index"].fillna(0).astype(int)
df["group_id"] = df["group_id"].fillna(0).astype(int)
df["group"] = df["group"].fillna("Uncategorized")
df

Unnamed: 0,category,num,name,translated_name,group,group_index,group_id
0,ashi-waza,0,de-ashi-harai,Forward Foot Sweep,Dai Ikkyo (1st),1,1
1,ashi-waza,1,hiza-guruma,Knee Wheel,Dai Ikkyo (1st),2,1
2,ashi-waza,2,sasae-tsurikomi-ashi,Propping-Lifting Pulling Ankle Throw,Dai Ikkyo (1st),3,1
3,ashi-waza,3,o-soto-gari,Major Outer Reaping,Dai Ikkyo (1st),5,1
4,ashi-waza,3,o-soto-gari,Major Outer Reaping,Dai Nikyo (2nd),1,2
...,...,...,...,...,...,...,...
112,yoko-sutemi-waza,11,uchi-mata-makikomi,Inner Thigh Wraparound,Shinmeisho No Waza,17,6
113,yoko-sutemi-waza,12,harai-makikomi,Sweeping Wraparound,Shinmeisho No Waza,16,6
114,yoko-sutemi-waza,13,ko-uchi-makikomi,Minor Inner Wraparound,Uncategorized,0,0
115,yoko-sutemi-waza,14,kani-basami,Crab Claw Scissors,Shinmeisho No Waza,13,6


In [None]:
df.to_csv("../data/meta/waza.csv", index=False)
df.to_json("../data/meta/waza.json", orient="records")