In [424]:
# common imports
import numpy as np
import pandas as pd
import joblib, glob
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# machine learning imports


# display setup
pd.set_option("display.max_columns", None)  # the None parameter displays unlimited columns
plt.style.use('seaborn')  # for plots

# 1. Getting the Data

> Each participant has 3 csv files, one for each alignment state.
>
> Taking a look at one file for each state should help gain insight to which dataframe manipulations are necessary
> before inserting it into a machine learning algorithm.

In [425]:
# read right hand file
handright = pd.read_csv(r"extraFiles/HandRight.csv")

In [426]:
handright.head()

Unnamed: 0,Time,Frame ID,Hand Type,# hands,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,Wrist Pos X,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength
0,128.2883,15478,right,1,15.67167,226.2064,20.2647,34.26665,-195.2147,-35.04302,0.270987,-0.017635,-0.2778,46.88831,207.3548,86.23589,203.8337,49.33507,237.4553,0.0,0.206389,0.0
1,128.3048,15480,right,1,16.24119,222.9389,20.11781,36.16443,-180.0939,-4.052799,0.245085,-0.01477,-0.277763,47.54861,205.6568,86.4225,203.9553,47.88272,238.4482,0.0,0.24305,0.0
2,128.3218,15482,right,1,16.72461,220.9709,19.94972,26.35298,-95.08819,-11.15762,0.230796,-0.008779,-0.278792,48.1503,204.5182,86.35149,204.6231,45.35921,236.8544,0.0,0.281134,0.0
3,128.3384,15484,right,1,17.09016,219.7124,19.67679,19.46804,-61.75518,-21.30378,0.217681,-0.010759,-0.279768,48.64175,203.9989,86.16056,204.9939,44.18692,236.0927,0.0,0.277691,0.0
4,128.3551,15486,right,1,17.46017,219.3652,19.13128,21.94964,-8.355943,-35.08179,0.220696,-0.01738,-0.278629,48.93135,203.3777,85.59017,204.5952,42.68003,235.2896,0.0,0.283008,0.0


In [427]:
handright.shape

(4812, 22)

In [428]:
handright.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4812 entries, 0 to 4811
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Time             4812 non-null   float64
 1    Frame ID        4812 non-null   int64  
 2    Hand Type       4812 non-null   object 
 3    # hands         4812 non-null   int64  
 4    Position X      4812 non-null   float64
 5    Position Y      4812 non-null   float64
 6    Position Z      4812 non-null   float64
 7    Velocity X      4812 non-null   float64
 8    Velocity Y      4812 non-null   float64
 9    Velocity Z      4812 non-null   float64
 10   Pitch           4812 non-null   float64
 11   Roll            4812 non-null   float64
 12   Yaw             4812 non-null   float64
 13   Wrist Pos X     4428 non-null   float64
 14   Wrist Pos Y     4428 non-null   float64
 15   Wrist Pos Z     4428 non-null   float64
 16   Elbow pos X     4428 non-null   float64
 17   Elbow Pos Y  

In [429]:
handright.isna().sum()

Time                 0
 Frame ID            0
 Hand Type           0
 # hands             0
 Position X          0
 Position Y          0
 Position Z          0
 Velocity X          0
 Velocity Y          0
 Velocity Z          0
 Pitch               0
 Roll                0
 Yaw                 0
 Wrist Pos X       384
 Wrist Pos Y       384
 Wrist Pos Z       384
 Elbow pos X       384
 Elbow Pos Y       384
 Elbow Pos Z       384
 Grab Strenth        0
 Grab Angle          0
 Pinch Strength      0
dtype: int64

In [430]:
handright[" # hands"].value_counts()

1    4732
2      80
Name:  # hands, dtype: int64

In [431]:
handright[" Hand Type"].value_counts()

right    4738
left       74
Name:  Hand Type, dtype: int64

> Features in the DataFrame:
1. Time: Second and milliseconds of recorded movement
2. Frame ID: Identification number of detected movement
3. Hand Type: Right or left hand
4. Number hands: Hands detected in the frame/movement
5. Position X: Center position of the palm from the leap origin, millimeters
6. Position Y:
7. Position Z:
8. Velocity X: Rate of change
9. Velocity Y:
10. Velocity Z:
11. Pitch:
12. Roll:
13. Yaw:
14. Wrist Pos X:
15. Wrist Pos Y:
16. Wrist Pos Z:
17. Elbow pos X:
18. Elbow pos Y:
19. Elbow pos Z:
20. Grab Strength:
21. Grab Angle:
22. Pinch Strength:

Clearly just by looking at a few files the data needs to be cleaned.

* Alone has instances with 2 hands and right hand in hand type that need to be removed. Likewise, spontaneous
and sync have instances with 1 hand and a mismatch between right and left hands.

In [432]:
def remove_first7(df):
    df.drop(df[df["Time"] < df["Time"].min() + 7].index, inplace=True)
    df.reset_index(drop=True, inplace=True)

In [433]:
handright.isna().sum()

Time                 0
 Frame ID            0
 Hand Type           0
 # hands             0
 Position X          0
 Position Y          0
 Position Z          0
 Velocity X          0
 Velocity Y          0
 Velocity Z          0
 Pitch               0
 Roll                0
 Yaw                 0
 Wrist Pos X       384
 Wrist Pos Y       384
 Wrist Pos Z       384
 Elbow pos X       384
 Elbow Pos Y       384
 Elbow Pos Z       384
 Grab Strenth        0
 Grab Angle          0
 Pinch Strength      0
dtype: int64

In [456]:
print("Before:", handright.shape)
handright_prepared = handright.drop(handright[(handright[" Hand Type"]=="left") | (handright[" # hands"]==2)].index)
handright_prepared.reset_index(drop=True, inplace=True)
print(handright_prepared.shape)
remove_first7(handright_prepared) # right hand file without first 7 seconds
print(handright_prepared.shape)
handright_prepared.dropna(inplace=True)
print("After:", handright_prepared.shape)
print("Null values:", handright_prepared.isna().sum().sum())
handright_prepared.head()

Before: (4812, 22)
(4698, 22)
(4277, 22)
After: (3893, 22)
Null values: 0


Unnamed: 0,Time,Frame ID,Hand Type,# hands,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,Wrist Pos X,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength
0,135.3024,16287,right,1,11.93032,199.9861,4.981665,-35.51221,-165.3466,-57.68153,-0.092033,0.108262,-0.906612,75.4574,206.0521,45.38764,284.3946,140.0167,201.9318,0.0,0.561324,0.0
1,135.3194,16289,right,1,11.1919,195.3075,3.661716,-43.58744,-309.8374,-77.18515,-0.111674,0.126925,-0.904311,74.46141,202.4562,44.28679,282.6278,140.9712,203.6773,0.0,0.626321,0.0
2,135.3357,16291,right,1,10.53308,189.0297,2.829388,-30.4036,-363.0033,-43.79815,-0.140004,0.127219,-0.901862,73.6006,197.4387,43.54082,280.6384,142.5401,206.7526,0.0,0.636596,0.0
3,135.3524,16293,right,1,10.243,181.9816,2.06433,-15.424,-423.212,-43.99948,-0.166623,0.122782,-0.901268,73.19537,191.7202,42.73155,279.7357,148.1382,209.933,0.0,0.701644,0.0
4,135.369,16295,right,1,10.02061,174.8592,1.228467,-8.733602,-413.4513,-53.88054,-0.197763,0.127587,-0.897777,72.68671,186.1613,41.98006,278.689,152.5215,212.1112,0.0,0.786548,0.0


In [457]:
# split right hand file for training and testing
train_size = round(len(handright_prepared) * 0.8)

handright_prepared_train = handright_prepared.loc[:train_size].copy()
handright_prepared_test = handright_prepared.loc[train_size:].copy()

print("Right Hand Train\nShape:", handright_prepared_train.shape,
      "\nNull Values:", handright_prepared_train.isna().sum().sum())
print()
print("Right Hand Test\nShape:", handright_prepared_test.shape,
      "\nNull Values:", handright_prepared_test.isna().sum().sum())

Right Hand Train
Shape: (3115, 22) 
Null Values: 0

Right Hand Test
Shape: (779, 22) 
Null Values: 0


> The following functions load the data into a DataFrame:

In [459]:
# function combines alone csv file with the right hand file
def combine_right(df_alone, is_test):
    # remove instances with right hand or 2 hands
    df_alone.drop(df_alone[(df_alone[" Hand Type"]=="right") | (df_alone[" # hands"]==2)].index, inplace=True)
    # drop is True to prevent from adding the old indices as a new column
    df_alone.reset_index(drop=True, inplace=True)

    # copy of right hand according to train / test
    # prevents harming the DataFrame since it is used multiple times
    if is_test:
        right = handright_prepared_test.copy()
    else:
        right = handright_prepared_train.copy()

    # calculate number of copies needed to match length of left hand
    n_copies = round(len(df_alone) / len(handright_prepared_train)) + 1
    copies = [] # list for appending copies
    for i in range(n_copies):
        copies.append(right)
    right = pd.concat(copies, axis=0, ignore_index=True) # concat copies to a DataFrame
    right = right.loc[:len(df_alone)-1] # -1 because loc includes the end index, and dataframe indices start from 0

    # align column values with alone
    right.Time = df_alone.Time
    right[" Frame ID"] = df_alone[" Frame ID"]

    # merge DataFrames, sort by Time column and reset index
    merged = right.merge(df_alone, how="outer").sort_values("Time").reset_index(drop=True)
    merged[" # hands"] = 2 # change number of hands to 2 (2 hand rows were dropped per dataframe before merge)
    return merged

In [451]:
# function returns a DataFrame with right and left hands merged into one row
# column names format is changed

def transform_columns(df):
    # strip removes white spaces in the beginning and end
    # lower changes uppercase letters to lowercase
    df.columns = df.columns.str.strip().str.lower()
    # rename columns without spaces
    df.columns = df.columns.str.replace(" ", "_")
    # change number of hands column name
    df.columns = df.columns.str.replace("#_hands", "n_hands")

    # remove instances with 1 hand detected
    df.drop(df[df["n_hands"] == 1].index, inplace=True)

    # merge right and left hands into one row
    left = df.groupby("hand_type").get_group("left").drop(["hand_type", "n_hands", "frame_id"], axis=1)
    right = df.groupby("hand_type").get_group("right").drop(["hand_type", "n_hands", "frame_id"], axis=1)
    keep_same = {"state", "time"} # shared columns
    left.columns = left.columns.map(lambda x: x if x in keep_same else x + "_left")
    right.columns = right.columns.map(lambda x: x if x in keep_same else x + "_right")
    return left.merge(right, how="outer", on=["state", "time"])

In [463]:
training_data = []

# glob searches all directories and files
for filename in glob.iglob("extraFiles/Training/**/*.csv", recursive=True):
    df = pd.read_csv(filename, index_col=None, header=0)
    remove_first7(df) # removes first 7 seconds from file
    if "Alone" in filename:
        if "Training" in filename:
            df = combine_right(df, False)
        else:
            df = combine_right(df, True)
        df.insert(0, "state", 0)
    elif "Sync" in filename:
        df.insert(0, "state", 1)
    elif "Spontan" in filename:
        df.insert(0, "state", 2)
    df = transform_columns(df)
    training_data.append(df)

In [462]:
len(training_data)

27

In [None]:
frame = pd.concat(training_data, axis=0, ignore_index=True)


def split_train_val(df):



### Notes:

01:37:00

- Organize the data in a pandas dataframe
- Goal: using the hand ... , detect if is alone, sync or spontaneous.
- "spontaneous synchronizing"
- interpersonal space
- training has 9 participants
- validation has different participants
- spontan and sync: if # hands is 1, remove data!
- time series
- every 2 lines is one feature (need to be combined) = 1 frame
- choose how many frames
- position y is similar
- 2 recordings, second is usually better
- can't split train and test as usual (correlation between each following movement). can't shuffle!
for instance, can take first 40 seconds for training and last 5 for testing.
- at least 2 seconds in between train and test
- model for 1 person, try model on second person. train the second person and test the third etc.
- 4 frames per second
-

> #### Resources:
1. Leap Motion Attributes <a href="https://developer-archive.leapmotion.com/documentation/python/api/Leap.Hand.html"
> title="leapmotion">link</a>

In [None]:
r = handright_prepared[:100].copy()
l = alone[:200]
# r.Time = l.Time
# r[" Frame ID"] = l[" Frame ID"]

diff = len(l) - len(r)
diff

# l.drop(l[:diff].index).reset_index(drop=True)
l
# merged = r.merge(l, how="outer").sort_values("Time").reset_index(drop=True)
# merged.groupby("Time")[" # hands"].sum()

In [None]:
def transform_time(df):
    # adds a column with the second
    df["start_time"] = df.time.astype(int) - df.time.min().astype(int)
    start = df.start_time.min() + 7 # remove first 7 seconds
    first_7_seconds = df[df["start_time"] < start].index
    # drop is True to prevent from adding the old indices as a new column
    df_new = df.drop(first_7_seconds).reset_index(drop=True)
    # reset index (drop = False) adds round time as the left column in the DataFrame
    df = df.groupby("start_time").nth([0, 1, 2, 3]).reset_index() # first four instances per second
    df.drop(df[df["start_time"] < df["start_time"].min() + 7].index).reset_index(drop=True)
    df.drop("start_time", axis=1, inplace=True)
    return df

alone["Time"] = alone["Time"] - alone["Time"].min()

    # reset index (drop = False) adds round time as the left column in the DataFrame
    df = df.groupby("start_time").nth([0, 1, 2, 3]).reset_index() # first four instances per second

In [None]:
# adds a column with the second
merged["round_time"] = merged.time.astype(int) - merged.time.min().astype(int)

In [None]:
# reset index (drop = False) adds round time as the left column in the DataFrame
merged = merged.groupby("round_time").nth([0, 1, 2, 3]).reset_index()

In [None]:
# drop is True to prevent from adding the old indices as a new column
merged = merged.drop(merged[merged["round_time"] < merged.round_time.min() + 7].index).reset_index(drop=True)

In [None]:
merged

In [None]:
round_time_groups = merged.groupby("round_time")