In [16]:
from sklearn.datasets import make_classification
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import numpy as np
import boto3
import json
import pandas as pd
from io import StringIO


def select_frame(df, single=True):
    # Take df where each row is a frame in the video and return
    # either 1 still frame or multiple still frames
    #print(df.iloc[843])
    deltas = []
    for i in range(len(df)-100,100,-1):
        # iterate in reverse order as best poses often last
        current_ = df.iloc[i]
        next_ = df.iloc[i-1]
        deltas.append(np.mean(abs(next_ - current_)))
    SD = np.std(deltas)
        
    if single:
        # Single Frame
        for i in range(len(df)-1,0,-1):
            # iterate in reverse order as best poses often last
            current_ = df.iloc[i]
            next_ = df.iloc[i-1]
            d = np.mean(abs(next_ - current_)) 
            if d < 0.05*SD:
                print(f'frame index: {i-20,i}')
                frame = np.asarray(df.ix[i])
                print(frame)
                return frame
    else:
        # Multiple Frames
        ds = deltas < SD*0.5
        return df[:-1][ds]
    
    
def getCSV(filepath):
    # Get csv file from s3 and apply labels
    client = boto3.client('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')

    #client = boto3.client.get_Object('alignedstorage', filepath)
    bucket_name = 'alignedstorage'

    object_key = filepath
    csv_obj = client.get_object(Bucket=bucket_name, Key=object_key)
    body = csv_obj['Body']
    csv_string = body.read().decode('utf-8')

    df = pd.read_csv(StringIO(csv_string), index_col='Unnamed: 0')
    df['arm'] = df['knee1'] = df['knee2'] = df['hips'] = df['torso'] = 0
    if 'arm' in filepath:
        df['arm'] = 1
    elif 'knee1' in filepath:
        df['knee1'] = 1
    elif 'knee2' in filepath:
        df['knee2'] = 1
    elif 'hips' in filepath:
        df['hips'] = 1
    elif 'torso' in filepath:
        df['torso'] = 1
        
    frame = select_frame(df, single=True)
    
    return frame


def iterate_bucket_items(bucket):
    """
    Move through all CSV files, take still frames, and apply correct labels
    """

    #client = boto3.client('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')
    s3 = boto3.resource('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')
    bucket = s3.Bucket('alignedstorage')
    prefix_objs = bucket.objects.filter(Prefix="training_data")
    
    col_names =  list(range(80))
    #print(col_names)
    full_df  = pd.DataFrame(columns = col_names)
    full_df = []
    print(full_df)
    for obj in prefix_objs:
        key = obj.key
        #print(key)
        if 'csv' in key:
            single = getCSV(key)
            print(single)
            if single is None:
                continue
            full_df.append(single)
            #print(full_df)
    
    #final = pd.DataFrame(np.asarray(full_df))
    final = full_df
    return final


def multi_acc(real, preds):
    # accuracy for multi classification model
    correct = 0
    incorrect = 0
    for i,j in zip(real.values.flatten(), preds.values.flatten()):
        if i==j:
            correct += 1
        else:
            incorrect +=1
    return correct / (correct+incorrect)


def multimodel(full_df):
    from sklearn.model_selection import train_test_split
    # Takes DF with each row as best pose from a video
    # Fits multi-label model and returns evaluation metrics
    from sklearn.metrics import accuracy_score, f1_score
    #print(full_df.head())
    
    #X = full_df.drop(columns = ['arm','knee1','knee2','torso','hips'])
    #Y = full_df[['arm','knee1','knee2','torso','hips']]
    X = full_df.drop(columns = [75,76,77,78,79])
    Y = full_df[[75,76,77,78,79]]
    #print(X.head())
    #print(Y.head())
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
    
    # Train Model
    forest = RandomForestClassifier(n_estimators=100, random_state=1)
    multi_target_forest = MultiOutputClassifier(forest, n_jobs=-1)
    multi_target_forest.fit(X_train, Y_train)
    
    # Evaluate Model
    preds = pd.DataFrame(multi_target_forest.predict(X_test))
    print(Y_test)
    print(preds)
    #accuracy = multi_acc(Y_test, preds)
    accuracy = accuracy_score(Y_test.values.flatten(), preds.values.flatten())
    f1 = f1_score(Y_test.values.flatten(), preds.values.flatten())
    return accuracy, f1


In [12]:
getCSV('training_data/warrior2_torso_1.csv')

frame index: (831, 851)
[8.37030e+02 1.55272e+02 8.86390e-01 7.60617e+02 2.00316e+02 8.44171e-01
 6.99914e+02 1.80808e+02 7.36187e-01 5.96102e+02 2.08099e+02 6.63628e-01
 4.96229e+02 2.14120e+02 8.17690e-01 8.21329e+02 2.21890e+02 7.12349e-01
 9.05623e+02 2.23843e+02 7.14066e-01 1.00360e+03 2.10192e+02 8.33860e-01
 7.05820e+02 3.80590e+02 6.80638e-01 6.60744e+02 3.74724e+02 6.03754e-01
 5.56895e+02 4.78584e+02 8.13000e-01 4.51135e+02 5.88316e+02 7.88832e-01
 7.48920e+02 3.82482e+02 6.60882e-01 8.54627e+02 4.58871e+02 8.65143e-01
 8.23267e+02 6.02018e+02 7.83261e-01 8.35119e+02 1.37658e+02 8.56298e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.01805e+02 1.25844e+02 8.31293e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.85982e+02 6.33238e+02 6.78908e-01
 8.84030e+02 6.23528e+02 5.49873e-01 8.07739e+02 6.17698e+02 7.52489e-01
 4.29600e+02 6.23551e+02 7.44972e-01 4.19762e+02 6.17620e+02 7.38993e-01
 4.49144e+02 5.98012e+02 5.78027e-01 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 1.00000e+00]


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


array([8.37030e+02, 1.55272e+02, 8.86390e-01, 7.60617e+02, 2.00316e+02,
       8.44171e-01, 6.99914e+02, 1.80808e+02, 7.36187e-01, 5.96102e+02,
       2.08099e+02, 6.63628e-01, 4.96229e+02, 2.14120e+02, 8.17690e-01,
       8.21329e+02, 2.21890e+02, 7.12349e-01, 9.05623e+02, 2.23843e+02,
       7.14066e-01, 1.00360e+03, 2.10192e+02, 8.33860e-01, 7.05820e+02,
       3.80590e+02, 6.80638e-01, 6.60744e+02, 3.74724e+02, 6.03754e-01,
       5.56895e+02, 4.78584e+02, 8.13000e-01, 4.51135e+02, 5.88316e+02,
       7.88832e-01, 7.48920e+02, 3.82482e+02, 6.60882e-01, 8.54627e+02,
       4.58871e+02, 8.65143e-01, 8.23267e+02, 6.02018e+02, 7.83261e-01,
       8.35119e+02, 1.37658e+02, 8.56298e-01, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 8.01805e+02, 1.25844e+02, 8.31293e-01, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 8.85982e+02, 6.33238e+02, 6.78908e-01,
       8.84030e+02, 6.23528e+02, 5.49873e-01, 8.07739e+02, 6.17698e+02,
       7.52489e-01, 4.29600e+02, 6.23551e+02, 7.44972e-01, 4.197

In [17]:
final = iterate_bucket_items('alignedstorage')


[]
frame index: (543, 563)
[6.84239e+02 1.51413e+02 8.82333e-01 6.35286e+02 2.14101e+02 8.72766e-01
 5.72647e+02 2.14081e+02 7.68265e-01 4.78473e+02 2.64975e+02 7.30762e-01
 3.94270e+02 2.98272e+02 7.98845e-01 6.97968e+02 2.14104e+02 7.36754e-01
 7.80252e+02 2.10103e+02 7.74031e-01 8.66424e+02 1.92497e+02 8.04826e-01
 6.35220e+02 3.98209e+02 7.10220e-01 5.88257e+02 3.98209e+02 6.66710e-01
 4.96212e+02 4.98103e+02 7.67321e-01 3.94271e+02 6.07812e+02 7.84619e-01
 6.80291e+02 3.98131e+02 6.87737e-01 7.82176e+02 4.74538e+02 8.07256e-01
 7.89960e+02 6.17566e+02 8.09818e-01 6.78348e+02 1.37642e+02 8.53816e-01
 6.84299e+02 1.43458e+02 1.77438e-01 6.37200e+02 1.37654e+02 8.86686e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.52731e+02 6.35359e+02 6.36348e-01
 8.50771e+02 6.33288e+02 5.62208e-01 7.76322e+02 6.31323e+02 7.50156e-01
 3.78597e+02 6.48905e+02 8.03136e-01 3.66825e+02 6.37228e+02 7.79926e-01
 3.86445e+02 6.13695e+02 5.72292e-01 1.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


frame index: (404, 424)
[7.35173e+02 1.43505e+02 8.34527e-01 6.82357e+02 1.90566e+02 8.57127e-01
 6.23480e+02 1.78690e+02 7.49006e-01 5.29513e+02 1.41567e+02 7.68220e-01
 4.49143e+02 7.09824e+01 8.05484e-01 7.43073e+02 2.02297e+02 7.73327e-01
 8.15504e+02 2.53215e+02 7.54440e-01 8.97786e+02 2.96325e+02 7.85195e-01
 6.70495e+02 3.86446e+02 6.88920e-01 6.33205e+02 3.86433e+02 6.41960e-01
 5.41148e+02 4.92272e+02 7.82635e-01 4.33500e+02 6.11765e+02 7.30212e-01
 7.11707e+02 3.84517e+02 7.05755e-01 8.23272e+02 4.70713e+02 8.34999e-01
 8.11622e+02 6.17651e+02 7.40659e-01 7.27343e+02 1.29774e+02 9.25819e-01
 7.39103e+02 1.31734e+02 3.45359e-01 6.86171e+02 1.19977e+02 8.96544e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.78213e+02 6.49018e+02 6.87660e-01
 8.76252e+02 6.43116e+02 5.62482e-01 8.05599e+02 6.33271e+02 8.41860e-01
 4.13898e+02 6.48893e+02 7.77540e-01 4.02169e+02 6.37205e+02 6.90177e-01
 4.31575e+02 6.19576e+02 5.46692e-01 1.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00]
[

frame index: (353, 373)
[6.82204e+02 1.45463e+02 9.43673e-01 6.48878e+02 2.12147e+02 8.95656e-01
 5.99973e+02 2.10185e+02 7.84984e-01 4.94252e+02 2.04245e+02 7.97204e-01
 4.15903e+02 1.72922e+02 8.16574e-01 6.96026e+02 2.14069e+02 7.99705e-01
 7.72384e+02 2.19905e+02 7.38597e-01 8.52654e+02 1.94438e+02 8.78281e-01
 6.27387e+02 3.82459e+02 7.01938e-01 5.90262e+02 3.78592e+02 6.74013e-01
 5.09955e+02 4.88309e+02 7.88606e-01 4.31497e+02 5.86178e+02 8.46928e-01
 6.62628e+02 3.84424e+02 6.98505e-01 7.62594e+02 4.60849e+02 8.66247e-01
 7.89933e+02 5.92190e+02 7.95576e-01 6.68593e+02 1.33646e+02 9.15281e-01
 6.82321e+02 1.33692e+02 6.44223e-01 6.39199e+02 1.39629e+02 8.42136e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.38981e+02 6.13675e+02 7.49712e-01
 8.36976e+02 6.03979e+02 7.06274e-01 7.76398e+02 6.05890e+02 7.85550e-01
 4.11870e+02 6.13662e+02 7.55642e-01 4.02103e+02 6.03855e+02 7.80220e-01
 4.31519e+02 5.92153e+02 7.06297e-01 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00]
[

frame index: (904, 924)
[0.00000e+00 0.00000e+00 0.00000e+00 6.17618e+02 2.40627e+01 2.79630e-01
 4.27590e+02 3.97082e+01 1.25237e-01 3.06133e+02 4.80468e+02 2.04182e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.72322e+02 5.35042e+00 2.61538e-01
 8.09617e+02 2.84538e+02 5.11955e-01 7.13578e+02 4.94238e+02 6.25653e-01
 5.76500e+02 3.80577e+02 2.49506e-01 4.80512e+02 3.78611e+02 2.01990e-01
 4.88318e+02 6.80315e+02 2.03693e-01 0.00000e+00 0.00000e+00 0.00000e+00
 6.72458e+02 3.88400e+02 2.34701e-01 6.84228e+02 6.52908e+02 2.74821e-01
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 1.00000e+00 0.00000e+00]
[

frame index: (382, 402)
[7.27228e+02 1.57237e+02 8.68484e-01 6.68604e+02 2.10135e+02 8.63087e-01
 6.09826e+02 2.08205e+02 7.48043e-01 5.19689e+02 2.33619e+02 7.80533e-01
 4.21707e+02 2.33623e+02 7.81767e-01 7.29335e+02 2.12062e+02 7.97279e-01
 8.19409e+02 2.16032e+02 8.21137e-01 9.03694e+02 2.17976e+02 7.82184e-01
 6.37242e+02 4.00178e+02 7.19824e-01 5.98011e+02 4.00117e+02 6.67372e-01
 5.11802e+02 4.92201e+02 7.66916e-01 4.02129e+02 5.99966e+02 7.37471e-01
 6.80315e+02 4.00121e+02 7.39418e-01 7.76264e+02 4.90247e+02 8.59744e-01
 6.98007e+02 6.07806e+02 7.80322e-01 7.13631e+02 1.45509e+02 9.50925e-01
 7.23415e+02 1.47317e+02 1.19389e-01 6.78376e+02 1.43514e+02 8.96668e-01
 0.00000e+00 0.00000e+00 0.00000e+00 7.54773e+02 6.43093e+02 6.41247e-01
 7.50849e+02 6.35327e+02 5.52283e-01 6.82322e+02 6.19523e+02 8.03591e-01
 3.68785e+02 6.37264e+02 7.33033e-01 3.62948e+02 6.33253e+02 6.74301e-01
 4.00158e+02 6.03978e+02 5.50411e-01 0.00000e+00 1.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00]
[

frame index: (499, 519)
[6.82255e+02 1.86652e+02 8.74971e-01 6.33287e+02 2.21835e+02 8.51878e-01
 5.72538e+02 2.19915e+02 7.57295e-01 4.80461e+02 2.61131e+02 8.32924e-01
 3.88411e+02 2.84558e+02 8.14347e-01 6.94033e+02 2.17995e+02 7.34455e-01
 7.78325e+02 2.17984e+02 7.85789e-01 8.68369e+02 2.17935e+02 7.89000e-01
 6.19624e+02 4.13826e+02 7.28440e-01 5.76462e+02 4.13814e+02 6.71715e-01
 4.90235e+02 4.94213e+02 7.69063e-01 3.66862e+02 6.01958e+02 7.62979e-01
 6.64645e+02 4.11893e+02 7.01519e-01 7.72426e+02 4.76579e+02 8.62272e-01
 8.27230e+02 6.03883e+02 7.79428e-01 6.76389e+02 1.74892e+02 8.45314e-01
 6.84273e+02 1.76800e+02 2.50774e-01 6.37189e+02 1.63165e+02 9.21043e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.89927e+02 6.33291e+02 7.01077e-01
 8.87980e+02 6.23547e+02 5.70904e-01 8.21374e+02 6.19552e+02 8.53858e-01
 3.27643e+02 6.39201e+02 6.44451e-01 3.21793e+02 6.33294e+02 6.68257e-01
 3.64875e+02 6.11744e+02 5.33097e-01 0.00000e+00 0.00000e+00 1.00000e+00
 0.00000e+00 0.00000e+00]
[

frame index: (445, 465)
[8.25311e+02 1.63083e+02 9.15489e-01 7.54738e+02 1.98408e+02 8.24375e-01
 7.01874e+02 1.67040e+02 7.54256e-01 6.01920e+02 1.49457e+02 8.30367e-01
 5.07819e+02 1.16097e+02 8.79312e-01 8.05633e+02 2.29703e+02 7.90050e-01
 8.84072e+02 2.76748e+02 8.58622e-01 9.64346e+02 3.15914e+02 8.49380e-01
 6.94045e+02 3.84478e+02 7.28231e-01 6.51011e+02 3.82469e+02 6.70736e-01
 5.58908e+02 4.90214e+02 7.33451e-01 4.62799e+02 6.05882e+02 8.03935e-01
 7.31245e+02 3.86423e+02 7.08011e-01 8.39055e+02 4.74631e+02 8.54905e-01
 8.37103e+02 6.21511e+02 7.90220e-01 8.21312e+02 1.47417e+02 9.46279e-01
 8.31165e+02 1.51330e+02 3.90991e-01 7.80280e+02 1.33833e+02 8.63637e-01
 0.00000e+00 0.00000e+00 0.00000e+00 8.99753e+02 6.54826e+02 5.88954e-01
 8.99676e+02 6.50878e+02 5.15216e-01 8.25346e+02 6.35308e+02 7.45084e-01
 4.43264e+02 6.46948e+02 7.45338e-01 4.31516e+02 6.35315e+02 7.13728e-01
 4.60884e+02 6.17573e+02 6.42998e-01 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 1.00000e+00]
[

In [19]:
pd.DataFrame(final)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,75,76,77,78,79
0,684.239,151.413,0.882333,635.286,214.101,0.872766,572.647,214.081,0.768265,478.473,...,637.228,0.779926,386.445,613.695,0.572292,1.0,0.0,0.0,0.0,0.0
1,735.173,143.505,0.834527,682.357,190.566,0.857127,623.48,178.69,0.749006,529.513,...,637.205,0.690177,431.575,619.576,0.546692,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,762.601,5.38652,0.081768,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,743.022,492.278,0.380813,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,745.076,137.624,0.901442,721.425,212.043,0.8659,678.339,200.328,0.781237,592.125,...,637.19,0.789742,474.596,617.563,0.64685,1.0,0.0,0.0,0.0,0.0
5,727.247,131.752,0.934218,674.406,182.737,0.807287,609.811,182.721,0.737221,517.725,...,605.909,0.704317,419.778,588.199,0.595281,0.0,0.0,0.0,0.0,0.0
6,682.204,145.463,0.943673,648.878,212.147,0.895656,599.973,210.185,0.784984,494.252,...,603.855,0.78022,431.519,592.153,0.706297,0.0,0.0,0.0,0.0,0.0
7,698.046,139.629,0.866338,651.0,206.234,0.857125,590.179,200.375,0.74813,494.238,...,601.876,0.715069,427.514,586.216,0.669858,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,631.32,57.3247,0.055948,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [21]:
multimodel(pd.DataFrame(final))

     75   76   77   78   79
17  0.0  1.0  0.0  0.0  0.0
5   0.0  0.0  0.0  0.0  0.0
8   0.0  0.0  0.0  0.0  0.0
15  0.0  1.0  0.0  0.0  0.0
4   1.0  0.0  0.0  0.0  0.0
6   0.0  0.0  0.0  0.0  0.0
     0    1    2    3    4
0  0.0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  1.0  0.0
3  0.0  0.0  0.0  0.0  0.0
4  0.0  0.0  0.0  0.0  0.0
5  0.0  0.0  0.0  0.0  0.0


(0.8666666666666667, 0.0)