- https://chat.openai.com/share/f0416ebd-c077-433d-927b-33db4c47bd07
- https://chat.openai.com/share/773255ce-2ce3-4382-8afb-4a31b4c21dae

In [1]:
import pandas as pd
from pathlib import Path

gifs = sorted(Path("../data/clips-masked-gif").glob("*.gif"))
data = []
for gif in gifs:
    category, num, name, _ = gif.stem.split("_")
    data.append([category, int(num), name, str(gif.name)])
info_df = pd.DataFrame(data, columns=["category", "num", "name", "path"])
info_df = info_df[["category", "num", "name"]]
info_df.to_csv("../data/gif-info.csv", index=False)
info_df

Unnamed: 0,category,num,name
0,ashi-waza,0,de-ashi-harai
1,ashi-waza,1,hiza-guruma
2,ashi-waza,2,sasae-tsurikomi-ashi
3,ashi-waza,3,o-soto-gari
4,ashi-waza,4,o-uchi-gari
...,...,...,...
106,yoko-sutemi-waza,11,uchi-mata-makikomi
107,yoko-sutemi-waza,12,harai-makikomi
108,yoko-sutemi-waza,13,ko-uchi-makikomi
109,yoko-sutemi-waza,14,kani-basami


In [2]:
info_df = pd.read_csv("../data/waza-info-translation.csv")
info_df.head()

Unnamed: 0,category,num,name,translated_name
0,ashi-waza,0,de-ashi-harai,Forward Foot Sweep
1,ashi-waza,1,hiza-guruma,Knee Wheel
2,ashi-waza,2,sasae-tsurikomi-ashi,Propping-Lifting Pulling Ankle Throw
3,ashi-waza,3,o-soto-gari,Major Outer Reaping
4,ashi-waza,4,o-uchi-gari,Major Inner Reaping


In [9]:
info_df.category.unique()

array(['ashi-waza', 'kansetsu-waza', 'koshi-waza', 'ma-sutemi-waza',
       'osaekomi-waza', 'shime-waza', 'te-waza', 'yoko-sutemi-waza'],
      dtype=object)

In [17]:
category_mapping = {
    v: k + 1
    for k, v in enumerate(
        [
            "te-waza",
            "koshi-waza",
            "ashi-waza",
            "osaekomi-waza",
            "ma-sutemi-waza",
            "yoko-sutemi-waza",
            "shime-waza",
            "kansetsu-waza",
        ]
    )
}
info_df["category_id"] = info_df["category"].map(category_mapping)
info_df.sample(10)

Unnamed: 0,category,num,name,translated_name,category_id
15,ashi-waza,15,tsubame-gaeshi,Swallow Counter,3
54,osaekomi-waza,7,kata-gatame:escapes,Shoulder Hold Escapes,4
49,osaekomi-waza,2,kuzure-kesa-gatame,Modified Scarf Hold,4
32,koshi-waza,0,uki-goshi,Floating Hip,2
16,ashi-waza,16,o-soto-gaeshi,Major Outer Reversal,3
65,osaekomi-waza,18,ura-gatame,Rear Hold,4
92,te-waza,13,kibisu-gaeshi,Heel Trip Reversal,1
94,te-waza,15,ko-uchi-gaeshi,Minor Inner Reap Counter,1
60,osaekomi-waza,13,yoko-shiho-gatame:escapes,Sideways Four-Quarters Hold Escapes,4
79,te-waza,0,seoi-nage,Shoulder Throw,1


In [32]:
nagewaza_df = pd.read_csv("../data/nagewaza.csv")
nagewaza_df.head()

Unnamed: 0,technique,group,group_id,group_index
0,Deashi Hari,Dai Ikkyo,1,1
1,Hiza Guruma,Dai Ikkyo,1,2
2,Sasae Tsurikomi Ashi,Dai Ikkyo,1,3
3,Uki Goshi,Dai Ikkyo,1,4
4,Osoto Gari,Dai Ikkyo,1,5


In [33]:
import editdistance


def closest_item(item, items):
    return min(items, key=lambda x: editdistance.eval(item, x))


closest_item("Deashi Hari", info_df.name)

'de-ashi-harai'

In [34]:
nagewaza_df["technique_id"] = nagewaza_df.technique.apply(
    lambda x: closest_item(x, info_df.name)
)
nagewaza_df

Unnamed: 0,technique,group,group_id,group_index,technique_id
0,Deashi Hari,Dai Ikkyo,1,1,de-ashi-harai
1,Hiza Guruma,Dai Ikkyo,1,2,hiza-guruma
2,Sasae Tsurikomi Ashi,Dai Ikkyo,1,3,sasae-tsurikomi-ashi
3,Uki Goshi,Dai Ikkyo,1,4,uki-goshi
4,Osoto Gari,Dai Ikkyo,1,5,o-soto-gari
...,...,...,...,...,...
62,Kawazu Gake,Shinmeisho No Waza,7,15,kawazu-gake
63,Harai Makikomi,Shinmeisho No Waza,7,16,harai-makikomi
64,Uchi Mata Makikomi,Shinmeisho No Waza,7,17,uchi-mata-makikomi
65,Sode Tsurikomi Goshi,Shinmeisho No Waza,7,18,sode-tsurikomi-goshi


In [38]:
df = info_df.merge(nagewaza_df, left_on="name", right_on="technique_id", how="left")
df = (
    df[
        [
            "name",
            "translated_name",
            "category",
            "category_id",
            "num",
            "group",
            "group_id",
            "group_index",
        ]
    ]
    .rename({"num": "category_index"}, axis=1)
    .sort_values(["category_id", "category_index"])
    .reset_index(drop=True)
)
df["group_index"] = df["group_index"].fillna(0).astype(int)
df["group_id"] = df["group_id"].fillna(8).astype(int)
df["group"] = df["group"].fillna("Uncategorized")
df

Unnamed: 0,name,translated_name,category,category_id,category_index,group,group_id,group_index
0,seoi-nage,Shoulder Throw,te-waza,1,0,Dai Ikkyo,1,8
1,ippon-seoi-nage,One-arm Shoulder Throw,te-waza,1,1,Shinmeisho No Waza,7,19
2,seoi-otoshi,Shoulder Drop,te-waza,1,2,Habukareta Waza,6,2
3,tai-otoshi,Body Drop,te-waza,1,3,Dai Nikyo,2,6
4,kata-guruma,Shoulder Wheel,te-waza,1,4,Sankyo,3,8
...,...,...,...,...,...,...,...,...
112,ude-hishigi-hara-gatame,Arm Lock with Stomach,kansetsu-waza,8,6,Uncategorized,8,0
113,ude-hishigi-ashi-gatame,Arm Lock with Leg,kansetsu-waza,8,7,Uncategorized,8,0
114,ude-hishigi-te-gatame,Arm Lock with Hand,kansetsu-waza,8,8,Uncategorized,8,0
115,ude-hishigi-sankaku-gatame,Triangle Arm Lock,kansetsu-waza,8,9,Uncategorized,8,0


In [39]:
df.to_csv("../data/meta/waza.csv", index=False)
df.to_json("../data/meta/waza.json", orient="records")