In [285]:
import pandas as pd
import numpy as np
import glob
import os

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [286]:
df = pd.read_csv("data/innovaid_hackathon_anima/input/0aa0abPoXao4AYllJqUT.csv")
df = df[["RX", "RY", "SCENE_INDEX"]]
df.head()

Unnamed: 0,RX,RY,SCENE_INDEX
0,0.722431,0.189333,1.0
1,0.720382,0.170944,1.0
2,0.718333,0.152556,1.0
3,0.726667,0.195111,1.0
4,0.735,0.237667,1.0


In [287]:
def coord_mean(coordinates):
    chunks = np.array_split(coordinates, size)
    return [np.mean(chunk) for chunk in chunks]

def coord_std(coordinates):
    chunks = np.array_split(coordinates, size)
    return [np.std(chunk) for chunk in chunks]

In [288]:
size = 3

df = df.groupby(["SCENE_INDEX"]).aggregate({"RX": [coord_mean, coord_std],
                                            "RY": [coord_mean, coord_std]}).reset_index()
df.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in df.columns]
df.head()

Unnamed: 0,SCENE_INDEX,RX_coord_mean,RX_coord_std,RY_coord_mean,RY_coord_std
0,1.0,"[0.3831161492374728, 0.5782952069716775, 0.293...","[0.231742069333647, 0.17752823363344875, 0.083...","[0.6421448801742919, 0.48705991285403055, 0.55...","[0.17325026013249076, 0.09202646812482047, 0.0..."
1,3.0,"[0.5449180555555555, 0.3450249999999999, 0.399...","[0.06477554423083788, 0.18593439410726847, 0.2...","[0.5479466666666666, 0.6368766666666666, 0.653...","[0.03515053159775736, 0.045806988105083477, 0...."
2,5.0,"[0.5027171840958606, 0.6949993191721134, 0.216...","[0.14424024122914558, 0.05509810451549506, 0.1...","[0.18157734204793027, 0.4367298474945534, 0.54...","[0.2612471490240733, 0.04544137747880368, 0.06..."
3,7.0,"[0.42790464743589735, 0.6244580610021787, 0.37...","[0.18550992803407576, 0.04366127980260915, 0.1...","[0.26517307692307696, 0.5659618736383443, 0.63...","[0.35592856046345944, 0.03812493078547487, 0.0..."
4,9.0,"[0.50516359508547, 0.29256127450980385, 0.6697...","[0.09103099433547374, 0.14259226371502595, 0.0...","[0.5497179487179487, 0.5740642701525055, 0.494...","[0.08184984097704151, 0.07532192779752094, 0.0..."


In [289]:
df = df.apply(lambda col: col.explode(), axis=0).reset_index(drop=True)
df

Unnamed: 0,SCENE_INDEX,RX_coord_mean,RX_coord_std,RY_coord_mean,RY_coord_std
0,1.0,0.383116,0.231742,0.642145,0.17325
1,1.0,0.578295,0.177528,0.48706,0.092026
2,1.0,0.293778,0.083271,0.55852,0.058772
3,3.0,0.544918,0.064776,0.547947,0.035151
4,3.0,0.345025,0.185934,0.636877,0.045807
...,...,...,...,...,...
145,97.0,0.579729,0.186907,0.475472,0.051116
146,97.0,0.608046,0.034004,0.448942,0.03324
147,99.0,0.530132,0.133821,0.535813,0.22911
148,99.0,0.240055,0.148024,0.508878,0.070544


In [290]:
# size = 30
# os.mkdir(f"data/innovaid_hackathon_anima/input_coords_{size}/")
#
# files = glob.glob("data/innovaid_hackathon_anima/input/*.csv")
# for i, csv in enumerate(files):
#     print(f"{i}/{len(files)}: {csv}")
#
#     df = pd.read_csv(csv)
#     df = df[["RX", "RY", "SCENE_INDEX"]]
#     df = df.groupby(["SCENE_INDEX"]).aggregate({"RX": [coord_mean, coord_std],
#                                                 "RY": [coord_mean, coord_std]}).reset_index()
#     df.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in df.columns]
#     df = df.apply(lambda col: col.explode(), axis=0).reset_index(drop=True)
#
#     df.to_csv(csv.replace("/input/", f"/input_coords_{size}/"), index=False)

In [291]:
def image_pos_to_binary(image_pos):
    if image_pos == "left":
        return 0
    elif image_pos == "right":
        return 1
    else:
        return 2

In [292]:
df = pd.read_csv("data/innovaid_hackathon_anima/input/0aa0abPoXao4AYllJqUT.csv")
df = df[["IMAGE_POSITION", "SCENE_INDEX"]]

df["IMAGE_POSITION"] = df["IMAGE_POSITION"].apply(image_pos_to_binary)

df.head()

Unnamed: 0,IMAGE_POSITION,SCENE_INDEX
0,1,1.0
1,2,1.0
2,2,1.0
3,1,1.0
4,1,1.0


In [293]:
def agg_pos(image_positions):
    chunks = np.array_split(image_positions, size)
    return [np.argmax(np.bincount(chunk)) if len(chunk) >= 1 else 2 for chunk in chunks]

In [294]:
size = 3

df = df.groupby(["SCENE_INDEX"]).aggregate({"IMAGE_POSITION": agg_pos}).reset_index()
df.head()

Unnamed: 0,SCENE_INDEX,IMAGE_POSITION
0,1.0,"[0, 1, 0]"
1,3.0,"[2, 0, 1]"
2,5.0,"[2, 1, 0]"
3,7.0,"[2, 1, 0]"
4,9.0,"[2, 0, 1]"


In [295]:
df = df.apply(lambda col: col.explode(), axis=0).reset_index(drop=True)
df

Unnamed: 0,SCENE_INDEX,IMAGE_POSITION
0,1.0,0
1,1.0,1
2,1.0,0
3,3.0,2
4,3.0,0
...,...,...
145,97.0,1
146,97.0,1
147,99.0,1
148,99.0,0


In [296]:
size = 30
os.mkdir(f"data/innovaid_hackathon_anima/input_positions_{size}/")

files = glob.glob("data/innovaid_hackathon_anima/input/*.csv")
for i, csv in enumerate(files):
    print(f"{i}/{len(files)}: {csv}")

    df = pd.read_csv(csv)
    df = df[["IMAGE_POSITION", "SCENE_INDEX"]]
    df["IMAGE_POSITION"] = df["IMAGE_POSITION"].apply(image_pos_to_binary)

    df = df.groupby(["SCENE_INDEX"]).aggregate({"IMAGE_POSITION": agg_pos}).reset_index()

    df = df.apply(lambda col: col.explode(), axis=0).reset_index(drop=True)

    df.to_csv(csv.replace("/input/", f"/input_positions_{size}/"), index=False)

0/3102: data/innovaid_hackathon_anima/input/NIWf76Hey8eftwtH7Mhn.csv
1/3102: data/innovaid_hackathon_anima/input/5wkYPhQUJDJ5wj1V2Nk4.csv
2/3102: data/innovaid_hackathon_anima/input/krJ3q9NcKQ2elEdfUc2b.csv
3/3102: data/innovaid_hackathon_anima/input/ykb8Kau47lOpf94ogXqW.csv
4/3102: data/innovaid_hackathon_anima/input/9kaeHX0tKCwDx3YTqtKa.csv
5/3102: data/innovaid_hackathon_anima/input/UEGF9hSh8QZt8KSYiXWp.csv
6/3102: data/innovaid_hackathon_anima/input/6xXlQ54eY3eQmQK85PWZ.csv
7/3102: data/innovaid_hackathon_anima/input/x4kas0wczT56dyOOT6Qm.csv
8/3102: data/innovaid_hackathon_anima/input/rrU7PfbZjJ1PYPQfpsBC.csv
9/3102: data/innovaid_hackathon_anima/input/8He1aUWtiBw4Ql4pY9AD.csv
10/3102: data/innovaid_hackathon_anima/input/P7tbw1M1GKfYePp4lWeq.csv
11/3102: data/innovaid_hackathon_anima/input/ZfHt08ZCxlw8VKyTDHk2.csv
12/3102: data/innovaid_hackathon_anima/input/mMYeQaHJNjQ1XvvmT7IZ.csv
13/3102: data/innovaid_hackathon_anima/input/sHsuhtf1RpCPKAcc1vTW.csv
14/3102: data/innovaid_hackath

In [297]:
df = pd.read_csv("data/innovaid_hackathon_anima/input/0aa0abPoXao4AYllJqUT.csv")
df = df[["IMAGE_POSITION", "SCENE_INDEX"]]

df["IMAGE_POSITION"] = df["IMAGE_POSITION"].apply(image_pos_to_binary)

df.head()

Unnamed: 0,IMAGE_POSITION,SCENE_INDEX
0,1,1.0
1,2,1.0
2,2,1.0
3,1,1.0
4,1,1.0


In [301]:
def agg_pos_max(image_positions):
    return np.argmax(np.bincount(image_positions))

In [302]:
df = df.groupby(["SCENE_INDEX"]).aggregate({"IMAGE_POSITION": agg_pos_max}).reset_index()
df.head()

Unnamed: 0,SCENE_INDEX,IMAGE_POSITION
0,1.0,0
1,3.0,2
2,5.0,0
3,7.0,1
4,9.0,1


In [303]:
os.mkdir("data/innovaid_hackathon_anima/input_max_positions/")

files = glob.glob("data/innovaid_hackathon_anima/input/*.csv")
for i, csv in enumerate(files):
    print(f"{i}/{len(files)}: {csv}")

    df = pd.read_csv(csv)
    df = df[["IMAGE_POSITION", "SCENE_INDEX"]]
    df["IMAGE_POSITION"] = df["IMAGE_POSITION"].apply(image_pos_to_binary)

    df = df.groupby(["SCENE_INDEX"]).aggregate({"IMAGE_POSITION": agg_pos_max}).reset_index()

    df.to_csv(csv.replace("/input/", "/input_max_positions/"), index=False)

0/3102: data/innovaid_hackathon_anima/input/NIWf76Hey8eftwtH7Mhn.csv
1/3102: data/innovaid_hackathon_anima/input/5wkYPhQUJDJ5wj1V2Nk4.csv
2/3102: data/innovaid_hackathon_anima/input/krJ3q9NcKQ2elEdfUc2b.csv
3/3102: data/innovaid_hackathon_anima/input/ykb8Kau47lOpf94ogXqW.csv
4/3102: data/innovaid_hackathon_anima/input/9kaeHX0tKCwDx3YTqtKa.csv
5/3102: data/innovaid_hackathon_anima/input/UEGF9hSh8QZt8KSYiXWp.csv
6/3102: data/innovaid_hackathon_anima/input/6xXlQ54eY3eQmQK85PWZ.csv
7/3102: data/innovaid_hackathon_anima/input/x4kas0wczT56dyOOT6Qm.csv
8/3102: data/innovaid_hackathon_anima/input/rrU7PfbZjJ1PYPQfpsBC.csv
9/3102: data/innovaid_hackathon_anima/input/8He1aUWtiBw4Ql4pY9AD.csv
10/3102: data/innovaid_hackathon_anima/input/P7tbw1M1GKfYePp4lWeq.csv
11/3102: data/innovaid_hackathon_anima/input/ZfHt08ZCxlw8VKyTDHk2.csv
12/3102: data/innovaid_hackathon_anima/input/mMYeQaHJNjQ1XvvmT7IZ.csv
13/3102: data/innovaid_hackathon_anima/input/sHsuhtf1RpCPKAcc1vTW.csv
14/3102: data/innovaid_hackath