In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from tqdm.notebook import tqdm
import os
import json

plt.style.use("ggplot")

In [7]:
competition = 'asl-signs'

iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
if iskaggle:
    path = Path('..') / 'input'  / 'competition'
    # !pip install -Uqq fastai
else:
    import zipfile, kaggle
    path = Path.home() / '.data' / 'asl-signs'
    if not path.exists():
        path.mkdir(exist_ok=True)
        kaggle.api.competition_download_cli(competition, path=path)
        zipfile.ZipFile(path / f'{competition}.zip').extractall(path)

# Labeled Data

In [8]:
with open(path / 'sign_to_prediction_index_map.json') as f:
    sign_labels = json.load(f)

In [9]:
train = (pd.read_csv(path / 'train_with_meta.csv')
         .assign(idx=lambda x: x.participant_id.astype(str) + '_' + x.sequence_id.astype(str))
         .set_index('idx')
        )
train.head()

Unnamed: 0_level_0,path,participant_id,sequence_id,sign,cnt_partial_nulls,cnt_partial_nulls_by_frame,face,left_hand,pose,right_hand,total_frames
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
26734_1000035562,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow,0.0,0.0,23,0,23,11,23
28656_1000106739,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait,0.0,0.0,11,0,11,2,11
16069_100015657,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud,0.0,0.0,105,28,105,0,105
25571_1000210073,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird,0.0,0.0,12,0,12,12,12
62590_1000240708,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie,0.0,0.0,18,0,18,18,18


# 3D Plotly Viz

In [10]:
def create_landmark_lines(frame_landmarks, body_part='hand'):
    empty_row = pd.Series({
        'frame': None, 'row_id': None, 'type': None, 'x': None, 'y': None, 'z': None
    })
    frame_landmarks = frame_landmarks.set_index('landmark_index')
    
    if frame_landmarks.empty:
        line_indices = [None]
    elif body_part == 'hand':
        line_indices = [0, 1, None, 0, 5, None, 0, 17, None, 
                        1, 2, 3, 4, None,
                        5, 6, 7, 8, None,
                        5, 9, None,
                        9, 10, 11, 12, None,
                        9, 13, None,
                        13, 14, 15, 16, None,
                        13, 17, None,
                        17, 18, 19, 20, None
                       ]
    elif body_part == "pose":
        line_indices = [0, 1, 2, 3, 7, None, 
                        0, 4, 5, 6, 7, 8, None,
                        11, 13, 15, 17, 19, 15, 21, None,
                        11, 12, 14, 16, 18, 20, 16, 22, None,
                        11, 23, 25, 27, 31, 29, 27, None,
                        12, 24, 26, 28, 30, 32, 28, None,
                        23, 24, None
               ]
    else:
        raise ValueError()
    lines = pd.DataFrame([frame_landmarks.loc[i, ] if i is not None else empty_row
                          for i in line_indices
                         ])
    return lines

In [16]:
COORD_COLS = ['x', 'y', 'z']

def sample_by_sign(sign, random_state=69, mask=None):
    sign_mask = train.loc[:, 'sign'] == sign
    mask = sign_misk if mask is None else (sign_mask & mask)
    sample = train.loc[mask, :].sample(n=1, random_state=random_state)

    landmarks = pd.read_parquet(path / sample['path'].values[0])
    landmarks = landmarks.loc[~landmarks.loc[:, COORD_COLS].isna().all(axis=1), :]
    return sample, landmarks

In [26]:
mask = train.right_hand <= 4
sign_mask = train.loc[:, 'sign'] == 'brother'
mask = (sign_mask & mask)
sample = train.loc[mask, :]#.sample(n=1, random_state=69)
sample

Unnamed: 0_level_0,path,participant_id,sequence_id,sign,cnt_partial_nulls,cnt_partial_nulls_by_frame,face,left_hand,pose,right_hand,total_frames
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
55372_1032732873,train_landmark_files/55372/1032732873.parquet,55372,1032732873,brother,0.0,0.0,22,7,22,0,22
22343_1076129682,train_landmark_files/22343/1076129682.parquet,22343,1076129682,brother,0.0,0.0,163,92,163,0,163
37779_1081663,train_landmark_files/37779/1081663.parquet,37779,1081663,brother,0.0,0.0,6,0,6,4,6
37055_1086084178,train_landmark_files/37055/1086084178.parquet,37055,1086084178,brother,0.0,0.0,33,8,33,0,33
55372_1096964393,train_landmark_files/55372/1096964393.parquet,55372,1096964393,brother,0.0,0.0,6,6,6,0,6
...,...,...,...,...,...,...,...,...,...,...,...
32319_915811683,train_landmark_files/32319/915811683.parquet,32319,915811683,brother,0.0,0.0,37,14,37,0,37
55372_964553686,train_landmark_files/55372/964553686.parquet,55372,964553686,brother,0.0,0.0,28,25,28,0,28
25571_967877709,train_landmark_files/25571/967877709.parquet,25571,967877709,brother,0.0,0.0,6,0,6,3,6
16069_98889039,train_landmark_files/16069/98889039.parquet,16069,98889039,brother,0.0,0.0,15,3,15,0,15


In [12]:
def interpolate_values(landmarks):
    idx = pd.MultiIndex.from_product(
        [frames, landmarks.landmark_index.unique().tolist()],
        names=['frame', 'landmark_index']
    )

    landmarks = (landmarks.set_index(['frame', 'landmark_index'])
                 .reindex(idx, fill_value=np.nan))
    landmarks = (landmarks.sort_index(level=[1, 0])
                 .assign(x=lambda x: x.groupby('landmark_index').x.apply(lambda g: g.interpolate()),
                         y=lambda x: x.groupby('landmark_index').y.apply(lambda g: g.interpolate()),
                         z=lambda x: x.groupby('landmark_index').z.apply(lambda g: g.interpolate()),
                  )
                 .sort_index(level=[0, 1])
                 .reset_index()
                )
    return landmarks

In [30]:
sample, landmarks = sample_by_sign('brother', 696969, train.right_hand.between(1, 4))

pose_mask = landmarks.type.isin(['right_hand', 'pose', 'left_hand'])
landmarks = landmarks.loc[pose_mask, ]

frames = landmarks.frame.unique().tolist()

rounding_n = 2
(landmarks.groupby('type')
 .agg(
    cnt_landmarks=pd.NamedAgg('landmark_index', 'nunique'),
    cnt_frames=pd.NamedAgg('frame', 'nunique'),
    x_min=pd.NamedAgg('x', 'min'),
    x_max=pd.NamedAgg('x', 'max'),
    y_min=pd.NamedAgg('y', 'min'),
    y_max=pd.NamedAgg('y', 'max'),
    z_min=pd.NamedAgg('z', 'min'),
    z_max=pd.NamedAgg('z', 'max'),
 )
 .assign(
     x_min=lambda x: x.x_min.round(rounding_n),
     x_max=lambda x: x.x_max.round(rounding_n),
     y_min=lambda x: x.y_min.round(rounding_n),
     y_max=lambda x: x.y_max.round(rounding_n),
     z_min=lambda x: x.z_min.round(rounding_n),
     z_max=lambda x: x.z_max.round(rounding_n),
 )
 .T
)

type,pose,right_hand
cnt_landmarks,33.0,21.0
cnt_frames,6.0,3.0
x_min,-0.16,0.07
x_max,1.02,0.35
y_min,0.32,0.13
y_max,2.36,0.49
z_min,-1.74,-0.12
z_max,2.2,0.02


In [31]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2,     
                    specs=[[{"type": "scatter3d"}, {"type": "scatter3d"}]],
)


hand_mask = landmarks.type == 'right_hand'
hand_landmarks = interpolate_values(landmarks.loc[hand_mask, :])
for frame in frames:
    frame_landmarks = hand_landmarks.loc[hand_landmarks.frame == frame, ]
    fig.add_trace(
        go.Scatter3d(
            visible=False, 
            x=frame_landmarks.x, y=frame_landmarks.y, z=frame_landmarks.z, text=frame_landmarks.landmark_index,
            marker=dict(size=8, opacity=0.8),
            mode="markers",
        ),
        row=1, col=1,
    )

for frame in frames:
    frame_landmarks = hand_landmarks.loc[hand_landmarks.frame == frame, ]
    landmark_lines = create_landmark_lines(frame_landmarks)
    fig.add_trace(
        go.Scatter3d(
            visible=False, 
            x=landmark_lines.x, y=landmark_lines.y, z=landmark_lines.z,
            mode="lines",
            hoverinfo='skip',
        ),
        row=1, col=1,
    )

pose_mask = landmarks.type == 'pose'
for frame in frames:
    frame_landmarks = landmarks.loc[(landmarks.frame == frame) & pose_mask, :]
    fig.add_trace(
        go.Scatter3d(
            visible=False, 
            x=frame_landmarks.x, y=frame_landmarks.y, z=frame_landmarks.z, text=frame_landmarks.landmark_index,
            marker=dict(size=8, opacity=0.8),
            mode="markers",
        ),
        row=1, col=2,
    )

for frame in frames:
    frame_landmarks = landmarks.loc[(landmarks.frame == frame) & pose_mask, :]
    landmark_lines = create_landmark_lines(frame_landmarks, "pose")
    fig.add_trace(
        go.Scatter3d(
            visible=False, 
            x=landmark_lines.x, y=landmark_lines.y, z=landmark_lines.z,
            mode="lines",
            hoverinfo='skip',
        ),
        row=1, col=2,
    )

steps = list()
for i, frame in enumerate(frames):
    step = dict(
        method='update',
        args=[{"visible": [False] * len(frames) * 6},
              {"title": "Slider switched to step: " + str(frame)}],
        label=str(frame),
        value=i
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    for j in range(6):
        step["args"][0]["visible"][i + len(frames) * j] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=1,
    currentvalue={'prefix': "Frame: "},
    pad={'t': 50},
    steps=steps
)]

offset = 0.05
fig.update_layout(
    sliders=sliders,
)

fig.show()

In [24]:
landmarks

Unnamed: 0,frame,row_id,type,landmark_index,x,y,z
489,0,0-pose-0,pose,0,0.542829,0.405250,-2.738175
490,0,0-pose-1,pose,1,0.589455,0.361238,-2.628115
491,0,0-pose-2,pose,2,0.615198,0.361651,-2.627928
492,0,0-pose-3,pose,3,0.640987,0.362792,-2.628239
493,0,0-pose-4,pose,4,0.500855,0.364411,-2.640941
...,...,...,...,...,...,...,...
27688,50,50-right_hand-16,right_hand,16,0.465096,0.725446,-0.073096
27689,50,50-right_hand-17,right_hand,17,0.185912,0.682704,-0.066093
27690,50,50-right_hand-18,right_hand,18,0.258927,0.728942,-0.077748
27691,50,50-right_hand-19,right_hand,19,0.303691,0.756792,-0.077782
