- https://chat.openai.com/share/f0416ebd-c077-433d-927b-33db4c47bd07
- https://chat.openai.com/share/773255ce-2ce3-4382-8afb-4a31b4c21dae

In [99]:
import pandas as pd
from pathlib import Path

gifs = sorted(Path("../data/clips/waza-masked-gif").glob("*.gif"))
data = []
for gif in gifs:
    _, id, category, name = gif.stem.split("_")
    cid, cidx = id.split("-")
    data.append([category, int(cid), int(cidx), name, str(gif.name)])
info_df = pd.DataFrame(
    data, columns=["category", "category_id", "category_index", "name", "path"]
)
info_df.to_csv("../data/tmp/gif-info.csv", index=False)
info_df

Unnamed: 0,category,category_id,category_index,name,path
0,te-waza,1,0,seoi-nage,judo_1-00_te-waza_seoi-nage.gif
1,te-waza,1,1,ippon-seoi-nage,judo_1-01_te-waza_ippon-seoi-nage.gif
2,te-waza,1,2,seoi-otoshi,judo_1-02_te-waza_seoi-otoshi.gif
3,te-waza,1,3,tai-otoshi,judo_1-03_te-waza_tai-otoshi.gif
4,te-waza,1,4,kata-guruma,judo_1-04_te-waza_kata-guruma.gif
...,...,...,...,...,...
105,kansetsu-waza,8,5,ude-hishigi-hara-gatame,judo_8-05_kansetsu-waza_ude-hishigi-hara-gatam...
106,kansetsu-waza,8,6,ude-hishigi-ashi-gatame,judo_8-06_kansetsu-waza_ude-hishigi-ashi-gatam...
107,kansetsu-waza,8,7,ude-hishigi-te-gatame,judo_8-07_kansetsu-waza_ude-hishigi-te-gatame.gif
108,kansetsu-waza,8,8,ude-hishigi-sankaku-gatame,judo_8-08_kansetsu-waza_ude-hishigi-sankaku-ga...


In [100]:
translation = pd.read_csv("../data/datasets/waza-translation.csv")
translation

Unnamed: 0,name,translated_name
0,de-ashi-harai,Forward Foot Sweep
1,hiza-guruma,Knee Wheel
2,sasae-tsurikomi-ashi,Propping-Lifting Pulling Ankle Throw
3,o-soto-gari,Major Outer Reaping
4,o-uchi-gari,Major Inner Reaping
...,...,...
105,uchi-mata-makikomi,Inner Thigh Wraparound
106,harai-makikomi,Sweeping Wraparound
107,ko-uchi-makikomi,Minor Inner Wraparound
108,kani-basami,Crab Claw Scissors


In [102]:
nagewaza_df = pd.read_csv("../data/datasets/nagewaza.csv")
nagewaza_df["group"] = nagewaza_df["group"].str.lower().str.replace(" ", "-")
nagewaza_df["name"] = nagewaza_df["name"].str.lower().str.replace(" ", "-")
nagewaza_df.head()

Unnamed: 0,name,group,group_id,group_index
0,deashi-hari,dai-ikkyo,1,1
1,hiza-guruma,dai-ikkyo,1,2
2,sasae-tsurikomi-ashi,dai-ikkyo,1,3
3,uki-goshi,dai-ikkyo,1,4
4,osoto-gari,dai-ikkyo,1,5


In [103]:
import editdistance


def closest_item(item, items):
    return min(
        items,
        key=lambda x: editdistance.eval(item.replace("-", ""), x.replace("-", "")),
    )


nagewaza_df["canon_name"] = nagewaza_df.name.apply(
    lambda x: closest_item(x, translation.name)
)
# remove daki-age
nagewaza_df = nagewaza_df[nagewaza_df.name != "daki-age"]
nagewaza_df

Unnamed: 0,name,group,group_id,group_index,canon_name
0,deashi-hari,dai-ikkyo,1,1,de-ashi-harai
1,hiza-guruma,dai-ikkyo,1,2,hiza-guruma
2,sasae-tsurikomi-ashi,dai-ikkyo,1,3,sasae-tsurikomi-ashi
3,uki-goshi,dai-ikkyo,1,4,uki-goshi
4,osoto-gari,dai-ikkyo,1,5,o-soto-gari
...,...,...,...,...,...
62,kawazu-gake,shinmeisho-no-waza,7,15,kawazu-gake
63,harai-makikomi,shinmeisho-no-waza,7,16,harai-makikomi
64,uchi-mata-makikomi,shinmeisho-no-waza,7,17,uchi-mata-makikomi
65,sode-tsurikomi-goshi,shinmeisho-no-waza,7,18,sode-tsurikomi-goshi


In [104]:
for name in (
    nagewaza_df.canon_name.groupby(nagewaza_df.canon_name)
    .count()
    .sort_values(ascending=False)
    .head(10)
    .index
):
    subset = nagewaza_df[nagewaza_df.canon_name == name]
    if subset.shape[0] > 1:
        print(name)
        display(subset)

In [105]:
df = info_df.merge(
    nagewaza_df.drop(columns=["name"]),
    left_on="name",
    right_on="canon_name",
    how="left",
).merge(translation, on="name", how="left")
df = (
    df[
        [
            "name",
            "translated_name",
            "category",
            "category_id",
            "category_index",
            "group",
            "group_id",
            "group_index",
            "path",
        ]
    ]
    .rename(columns={"path": "filename"})
    .sort_values(["category_id", "category_index"])
    .reset_index(drop=True)
)
df["group_index"] = df["group_index"].fillna(0).astype(int)
df["group_id"] = df["group_id"].fillna(8).astype(int)
df["group"] = df["group"].fillna("uncategorized")
df

Unnamed: 0,name,translated_name,category,category_id,category_index,group,group_id,group_index,filename
0,seoi-nage,Shoulder Throw,te-waza,1,0,dai-ikkyo,1,8,judo_1-00_te-waza_seoi-nage.gif
1,ippon-seoi-nage,One-arm Shoulder Throw,te-waza,1,1,shinmeisho-no-waza,7,19,judo_1-01_te-waza_ippon-seoi-nage.gif
2,seoi-otoshi,Shoulder Drop,te-waza,1,2,habukareta-waza,6,2,judo_1-02_te-waza_seoi-otoshi.gif
3,tai-otoshi,Body Drop,te-waza,1,3,dai-nikyo,2,6,judo_1-03_te-waza_tai-otoshi.gif
4,kata-guruma,Shoulder Wheel,te-waza,1,4,sankyo,3,8,judo_1-04_te-waza_kata-guruma.gif
...,...,...,...,...,...,...,...,...,...
105,ude-hishigi-hara-gatame,Arm Lock with Stomach,kansetsu-waza,8,5,uncategorized,8,0,judo_8-05_kansetsu-waza_ude-hishigi-hara-gatam...
106,ude-hishigi-ashi-gatame,Arm Lock with Leg,kansetsu-waza,8,6,uncategorized,8,0,judo_8-06_kansetsu-waza_ude-hishigi-ashi-gatam...
107,ude-hishigi-te-gatame,Arm Lock with Hand,kansetsu-waza,8,7,uncategorized,8,0,judo_8-07_kansetsu-waza_ude-hishigi-te-gatame.gif
108,ude-hishigi-sankaku-gatame,Triangle Arm Lock,kansetsu-waza,8,8,uncategorized,8,0,judo_8-08_kansetsu-waza_ude-hishigi-sankaku-ga...


In [106]:
df.to_csv("../data/datasets/waza.csv", index=False)
df.to_json("../data/datasets/waza.json", orient="records")