In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D 
import pandas as pd
pd.options.mode.chained_assignment = None
import pitch_path.utils.preprocessing as pp

In [2]:
df = pd.read_feather('../data/input/sched_id429650_pitcher669302')
joints = pd.read_csv('../data/joint_ids.csv')

In [3]:
df_with_joints = pd.merge(df, joints, how='inner', on='joint_type_id')

In [4]:
df_with_joints.head()

Unnamed: 0,sched_id,joint_type_id,time,x,y,z,astros_pitch_id,pitcher_id,bats,throws,joint_type,hawkeye
0,429650,1,-1.995,1.394,60.263401,1.3348,17,669302,L,R,left ankle,lAnkle
1,429650,1,-1.992,1.3942,60.267899,1.3326,17,669302,L,R,left ankle,lAnkle
2,429650,1,-1.989,1.3942,60.270199,1.3308,17,669302,L,R,left ankle,lAnkle
3,429650,1,-1.985,1.3932,60.2719,1.3282,17,669302,L,R,left ankle,lAnkle
4,429650,1,-1.982,1.3938,60.271801,1.3262,17,669302,L,R,left ankle,lAnkle


## Pivot DF to Wide Table
This will result in a DF that has one row for each timestamp with all (x,y,z) values for each joint. 

In [5]:
wide_df = pd.pivot_table(df_with_joints,
               values=['x', 'y', 'z'],
               columns=['hawkeye', ],
               index=['astros_pitch_id', 'sched_id', 'pitcher_id', 'bats', 'throws', 'time'])\
            .reset_index()
wide_df.columns = [f"{col[1]}{'_' if col[1].strip() != '' else ''}{col[0]}" for col in wide_df.columns.values]

In [6]:
wide_df.head()

Unnamed: 0,astros_pitch_id,sched_id,pitcher_id,bats,throws,time,lAnkle_x,lEar_x,lElbow_x,lEye_x,...,lShoulder_z,lWrist_z,rAnkle_z,rEar_z,rElbow_z,rEye_z,rHip_z,rKnee_z,rShoulder_z,rWrist_z
0,17,429650,669302,L,R,-1.995,1.394,0.6206,1.0148,0.3326,...,6.2264,4.7556,1.2401,6.8644,5.1287,6.9131,4.3593,2.8157,6.1628,4.7455
1,17,429650,669302,L,R,-1.992,1.3942,0.6219,1.0159,0.3339,...,6.2268,4.7508,1.2396,6.8638,5.1282,6.9125,4.3587,2.8154,6.1624,4.7401
2,17,429650,669302,L,R,-1.989,1.3942,0.6233,1.0179,0.3351,...,6.2268,4.7445,1.2393,6.8624,5.127,6.9113,4.3576,2.8148,6.1608,4.7326
3,17,429650,669302,L,R,-1.985,1.3932,0.6241,1.0223,0.3355,...,6.2268,4.7356,1.2404,6.8622,5.1246,6.9116,4.3579,2.8158,6.1596,4.7229
4,17,429650,669302,L,R,-1.982,1.3938,0.6252,1.0258,0.3357,...,6.2272,4.7278,1.2416,6.8616,5.1231,6.9108,4.3585,2.8171,6.1588,4.7129


## Determine Columns To Keep
We want to keep the wrist, elbow, and shoulder of the throwing arm, as well as the front leg (opposite leg of throwing arm). We will use this to determine the subset of timestamps from leg lift to release, and then to build features off of.

In [7]:
# determine handedness, and front leg
all_handedness = ['l', 'r']
handedness = df_with_joints.throws.unique()[0].lower()
front_leg = [x for x in all_handedness if x != handedness][0]

# get (x,y,z) column names from handedness
joints_to_filter_to = [f"{handedness}Shoulder", f"{handedness}Elbow", f"{handedness}Wrist", f"{front_leg}Knee"]
joint_cols_fmt =  [f"{a}_{b}" for a in joints_to_filter_to for b in ['x', 'y', 'z']]

metadata_cols = ['astros_pitch_id',	'sched_id', 'pitcher_id', 'bats', 'throws', 'time']
columns_to_filter_to = metadata_cols + joint_cols_fmt

In [8]:
pitch_df = wide_df[columns_to_filter_to]
pitch_df['time_key'] = (pitch_df['time'] * 1000).astype(int)
pitch_df.head()

Unnamed: 0,astros_pitch_id,sched_id,pitcher_id,bats,throws,time,rShoulder_x,rShoulder_y,rShoulder_z,rElbow_x,rElbow_y,rElbow_z,rWrist_x,rWrist_y,rWrist_z,lKnee_x,lKnee_y,lKnee_z,time_key
0,17,429650,669302,L,R,-1.995,0.0547,60.6922,6.1628,-0.0243,60.911301,5.1287,-0.2626,60.1217,4.7455,0.8863,59.942902,2.7927,-1994
1,17,429650,669302,L,R,-1.992,0.0566,60.693401,6.1624,-0.0233,60.9123,5.1282,-0.2572,60.1236,4.7401,0.892,59.949699,2.7933,-1992
2,17,429650,669302,L,R,-1.989,0.0588,60.694199,6.1608,-0.0233,60.913601,5.127,-0.2535,60.127201,4.7326,0.8973,59.953602,2.7937,-1988
3,17,429650,669302,L,R,-1.985,0.0611,60.694,6.1596,-0.0197,60.908298,5.1246,-0.2507,60.125801,4.7229,0.9029,59.959301,2.7943,-1984
4,17,429650,669302,L,R,-1.982,0.065,60.694698,6.1588,-0.0169,60.905399,5.1231,-0.2442,60.126301,4.7129,0.907,59.964001,2.7948,-1982


## Add Start and Release Columns

In [9]:
leg_lift_col_name = [x for x in pitch_df.columns if 'Knee' in x and front_leg in x and 'z' in x][0]

In [10]:
pitch_df_w_ll = pp.set_leg_lift_time(pitch_df, leg_lift_col_name)
pitch_df_throw = pp.set_release_point(pitch_df_w_ll)
pitch_df_fil = pp.filter_df_to_start_release(pitch_df_throw)

In [11]:
print(pitch_df_throw.shape)
print(pitch_df_fil['astros_pitch_id'].unique().shape)
print(pitch_df_fil[pitch_df_fil['release'] == 1].shape)
print(pitch_df_fil[pitch_df_fil['start'] == 1].shape)
print(pitch_df_fil.shape)
print(pitch_df_fil[pitch_df_fil['start'] == 1].shape)
print(pitch_df_fil[pitch_df_fil['start'] == 1].shape)
print(pitch_df_fil[pitch_df_fil['start'] == 1].shape)

(86408, 21)
(96,)
(96, 24)
(96, 24)
(32189, 24)
(96, 24)
(96, 24)
(96, 24)


In [12]:
pitch_df_fil.head()

Unnamed: 0,astros_pitch_id,sched_id,pitcher_id,bats,throws,time,rShoulder_x,rShoulder_y,rShoulder_z,rElbow_x,...,rWrist_z,lKnee_x,lKnee_y,lKnee_z,time_key,start,release,time_25,time_5,time_75
0,17,429650,669302,L,R,-1.249,0.5306,60.8559,6.1299,0.3768,...,5.6102,-0.439,59.757801,2.8943,-1249,1.0,0,0,0,0
1,17,429650,669302,L,R,-1.245,0.5311,60.8531,6.1316,0.3667,...,5.6213,-0.4642,59.7668,2.9082,-1244,0.0,0,0,0,0
2,17,429650,669302,L,R,-1.242,0.5331,60.852402,6.1312,0.3557,...,5.6295,-0.4899,59.776402,2.9208,-1242,0.0,0,0,0,0
3,17,429650,669302,L,R,-1.239,0.5365,60.8503,6.1314,0.3486,...,5.633,-0.5154,59.7836,2.9341,-1238,0.0,0,0,0,0
4,17,429650,669302,L,R,-1.235,0.5393,60.849998,6.131,0.3457,...,5.6394,-0.5396,59.7892,2.9484,-1234,0.0,0,0,0,0
