### This notebook gets the hand data csv file and rewrites the tendon values to align with the theory of mirror neurons. Using ridge regression, a mapping is created between visual features and tendon values. These new tendon values created from the visual features are written in a new file called hand_data_mirror

In [103]:
import pandas as pd
from sklearn.metrics import *
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
from collections import defaultdict as dd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from slir import SparseLinearRegression

import operator
import re
from collections import Counter

#from textblob import TextBlob
#from textblob import Word

from IPython.display import HTML, display
from IPython.display import Image

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
%matplotlib  

import pickle

Using matplotlib backend: Qt5Agg


### Read data, remove punctuation, stem, and tokenize the descriptions, and add  camera one-hot encoding

In [104]:
def read_raw_data():
    data = pd.read_csv('../data/hand_data3_separated.csv',  index_col=False)

    # remove punctuation
    data['desc_list'] = data.description.apply(lambda x: [i for i in re.sub(r'[^\w\s]','',str(x)).lower().split()])
    data['desc_str'] = data.desc_list.apply(lambda x: ' '.join(x))

    #add one-hot encoding
    camera_data = pd.get_dummies(data.camera_angle)
    data = pd.concat([data, camera_data], axis=1)
    cols = data.columns.tolist()
    cols = cols[:8] + cols[-4:] + cols[8:-4]
    data = data[cols]
    
    #get words and vocabs
    words = [y for x in data.desc_list for y in x]
    vocab = list(set(words))
    print('number of unique words in our data:', len(vocab), '\nnumber of word tokens in our data: ', len(words))
    
    return data, words, vocab

    #OLD CODE
    # stemming seems to make things worse
    #import nltk
    #import nltk.stem.snowball as stem
    #wnl = stem.EnglishStemmer()
    #Select only the straight_on camera angles (lowers accuracy, so don't do it)
    #data = data.loc[data['camera_angle'] == 'straight_on']
    #Fix spelling mistakes (textblob doesn't work well for this)
    #for phrase in data.description:
    #    blob = TextBlob(phrase)
    #    print(blob.correct())

### split off some of the data for test/eval/held out (this should only be done the first time)

### Read in data, stack it, and clean it

In [110]:
def read_in_data():
    train_data = pd.read_pickle("../data/train_data3_separated.pkl")
    test_data = pd.read_pickle("../data/test_data3_separated.pkl")
    return train_data, test_data

def stack_training_data(mydata):
    s = mydata.apply(lambda x: pd.Series(x['desc_list']),axis=1).stack().reset_index(level=1, drop=True)
    s.name = 'word'
    mydata = mydata.join(s)
    return mydata

def remove_unwated_words(mydata, vocab, words):
    wanted_words = list(set(words))
    #unwanted_words = {'hand', 'and', 'the', 'a', 'with', 'is', 'are', 'to', 'of', 'finger', 'fingers', 'thumb'}
    unwanted_words = {'hand', 'and', 'the', 'a', 'with', 'is', 'are', 'to', 'of'}
    unwanted_tags = {}
    for curr_word in vocab:
        #w = Word(word)
        #final = w.lemmatize("v")
        #blob = TextBlob(curr_word)
        ##tags = blob.tags
        #if tags[0][1] in unwanted_tags:
        #    wanted_words.remove(curr_word)
        if curr_word in unwanted_words:
            wanted_words.remove(curr_word)
    mydata = mydata.loc[mydata['word'].isin(wanted_words)]
    return mydata

In [111]:
_, words, vocab = read_raw_data()
train_data, test_data = read_in_data()
train_data[:10]

number of unique words in our data: 1109 
number of word tokens in our data:  13617


Unnamed: 0,poseID,camera_angle,description,T1,T2,T3,T4,T5,above,behind,...,f993,f994,f995,f996,f997,f998,f999,f1000,desc_list,desc_str
710,177,left,thumb and finger next to pinky curled,0.7,0.0,0.3,0.7,0.0,0,0,...,1.1e-05,3.93e-05,7.77e-06,8e-06,2.4e-05,1.87e-06,5e-05,0.000819,"[thumb, and, finger, next, to, pinky, curled]",thumb and finger next to pinky curled
1083,27,behind,hand facing away slightly curled,0.0,0.3,0.0,0.0,0.0,0,1,...,3e-06,4.67e-05,9.24e-06,2e-06,1.9e-05,5.69e-07,2.7e-05,5.1e-05,"[hand, facing, away, slightly, curled]",hand facing away slightly curled
1224,63,straight_on,c's up,0.0,0.7,0.3,0.0,0.0,0,0,...,5e-06,5.28e-06,1.13e-06,4e-06,1.4e-05,7.94e-07,9e-06,0.000387,"[cs, up]",cs up
485,121,above,curl,0.3,0.3,0.3,0.3,0.3,1,0,...,2e-05,7.18e-06,1.22e-05,1e-06,1.7e-05,3.44e-06,7.5e-05,2.4e-05,[curl],curl
348,87,straight_on,Open hand; ring finger curled,0.3,0.0,0.0,0.7,0.0,0,0,...,9e-06,5.53e-05,1.24e-06,4e-06,1.7e-05,2.22e-06,1.9e-05,0.000167,"[open, hand, ring, finger, curled]",open hand ring finger curled
898,224,left,OK symbol,0.7,0.7,0.0,0.7,0.7,0,0,...,1.4e-05,6.05e-06,2.21e-06,2e-06,6e-06,1.58e-06,2.7e-05,8.7e-05,"[ok, symbol]",ok symbol
1336,91,straight_on,middle; little fingers slightly bent,0.3,0.0,0.3,0.0,0.3,0,0,...,1.2e-05,3.32e-05,3.05e-06,6e-06,1.7e-05,1.47e-06,1e-05,0.000253,"[middle, little, fingers, slightly, bent]",middle little fingers slightly bent
463,115,behind,looks like bird ostrich,0.3,0.3,0.0,0.7,0.3,0,1,...,5e-06,9.53e-05,1.46e-05,5e-06,4.3e-05,9.31e-07,2.8e-05,0.000276,"[looks, like, bird, ostrich]",looks like bird ostrich
1017,11,above,pointing at something,0.0,0.0,0.3,0.0,0.7,1,0,...,5.1e-05,4.16e-07,7.18e-06,1e-06,5e-06,3.58e-06,0.000174,1.2e-05,"[pointing, at, something]",pointing at something
1188,54,straight_on,grasping but your index finger is bent,0.0,0.7,0.0,0.0,0.0,0,0,...,5e-06,7.67e-06,9.7e-07,5e-06,1.4e-05,7.84e-07,9e-06,0.0004,"[grasping, but, your, index, finger, is, bent]",grasping but your index finger is bent


In [132]:
data, words, vocab = read_raw_data()

data.columns[:15]

number of unique words in our data: 1109 
number of word tokens in our data:  13617


Index(['poseID', 'camera_angle', 'description', 'T1', 'T2', 'T3', 'T4', 'T5',
       'above', 'behind', 'left', 'straight_on', 'f1', 'f2', 'f3'],
      dtype='object')

In [183]:
START_COL = 'T1' 
END_COL = 'T5'
V_START_COL = 'above'
V_END_COL = 'f1000'

y = train_data.ix[:,START_COL:END_COL].as_matrix()
X = train_data.ix[:,V_START_COL:V_END_COL].as_matrix()
y_test = test_data.ix[:,START_COL:END_COL].as_matrix()
X_test = test_data.ix[:,V_START_COL:V_END_COL].as_matrix()
y_all = data.ix[:,START_COL:END_COL].as_matrix()
X_all = data.ix[:,V_START_COL:V_END_COL].as_matrix()

print("train", X.shape, y.shape)
print("test", X_test.shape, y_test.shape)
print("all", X_all.shape, y_all.shape)

train (1555, 1004) (1555, 5)
test (194, 1004) (194, 5)
all (1944, 1004) (1944, 5)


In [184]:
from sklearn.linear_model import *
import numpy as np

model = Ridge(alpha=0.0001, copy_X=True, fit_intercept=True, max_iter=None, normalize=True, 
              random_state=False, solver='auto', tol=0.01)
model.fit(X, y)

Ridge(alpha=0.0001, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=True, random_state=False, solver='auto', tol=0.01)

## Evaluation

In [185]:
from sklearn.metrics import mean_squared_error
from math import sqrt
import math

y_actual = y_test
y_predicted = model.predict(X_test) 

rms = sqrt(mean_squared_error(y_actual, y_predicted))
print(rms)
print(y_actual[:10])
print(np.around(y_predicted[:10], decimals=1))


0.07578534546014949
[[0.  0.3 0.7 0.7 0. ]
 [0.  0.7 0.  0.7 0.7]
 [0.7 0.7 0.7 0.7 0. ]
 [0.7 0.7 0.7 0.7 0.7]
 [0.7 0.7 0.7 0.7 0.7]
 [0.  0.3 0.7 0.3 0.3]
 [0.3 0.3 0.3 0.3 0.7]
 [0.3 0.  0.  0.7 0.7]
 [0.7 0.3 0.7 0.3 0.7]
 [0.7 0.  0.3 0.7 0.3]]
[[-0.   0.2  0.7  0.7 -0. ]
 [-0.   0.7 -0.   0.7  0.7]
 [ 0.7  0.7  0.7  0.7  0. ]
 [ 0.7  0.7  0.7  0.7  0.7]
 [ 0.7  0.7  0.7  0.7  0.7]
 [ 0.   0.3  0.7  0.3  0.3]
 [ 0.3  0.3  0.3  0.3  0.7]
 [ 0.3  0.   0.   0.6  0.7]
 [ 0.7  0.3  0.8  0.3  0.8]
 [ 0.7 -0.   0.3  0.7  0.3]]


## Produce mirror .pkl

In [186]:
new_tendons = model.predict(X_all) 
new_tendons.shape # this should match above

(1944, 5)

In [187]:
# new_tendons now needs to replace the columns from START_COL to END_COL
data_old = pd.read_csv('../data/hand_data3_separated.csv',  index_col=False)

In [188]:
#modify the data
data_new = data_old
columns = ["T1", "T2", "T3", "T4", "T5"]
for i, col in enumerate(columns):    
    print(i, col)
    data_new = data_new.drop([col], axis=1)
    data_new.insert(loc=i+3, column=col, value=new_tendons[:,i],)
    
data_new

0 T1
1 T2
2 T3
3 T4
4 T5


Unnamed: 0,poseID,camera_angle,description,T1,T2,T3,T4,T5,f1,f2,...,f991,f992,f993,f994,f995,f996,f997,f998,f999,f1000
0,0,straight_on,front of left hand with all fingers standing u...,-0.001788,-0.009452,0.010359,0.000517,-0.009321,4.950000e-07,0.000049,...,2.850000e-06,1.190000e-05,2.610000e-06,7.220000e-06,5.780000e-07,2.680000e-06,1.190000e-05,9.060000e-07,9.820000e-06,7.085950e-04
1,0,above,pointing,0.004818,0.024178,-0.029647,-0.025938,-0.012471,1.010000e-05,0.000701,...,1.580000e-06,1.960000e-06,2.060000e-05,9.680000e-08,2.620000e-06,2.340000e-07,1.800000e-06,1.200000e-06,3.880000e-05,4.150000e-06
2,0,left,side veiw of hand,-0.001383,-0.000259,-0.002080,0.003072,-0.002982,5.040000e-06,0.000059,...,1.116130e-04,2.350000e-05,1.240000e-05,2.780000e-05,5.840000e-06,6.270000e-06,7.440000e-05,1.690000e-06,3.740000e-05,7.576890e-04
3,0,behind,hand away all fingers slightly bent inwards,0.032711,0.020469,-0.014717,0.013237,-0.014145,8.910000e-07,0.000195,...,2.060000e-06,1.185720e-04,3.760000e-06,3.960000e-05,1.220000e-05,2.200000e-06,1.900000e-05,6.490000e-07,6.760000e-05,5.140000e-05
4,1,straight_on,hand raised; palm facing self; pinky slightly ...,0.006278,0.010864,-0.000519,0.012717,0.304370,5.290000e-07,0.000037,...,2.530000e-06,8.670000e-06,3.880000e-06,8.530000e-06,7.780000e-07,1.780000e-06,1.080000e-05,9.950000e-07,6.590000e-06,6.100050e-04
5,1,above,about to grab someone on the shoulder hand,-0.002930,-0.031468,0.014597,0.050939,0.305722,2.490000e-05,0.001562,...,2.710000e-06,3.780000e-06,3.090000e-05,1.880000e-07,3.800000e-06,4.510000e-07,3.420000e-06,2.410000e-06,9.680000e-05,7.240000e-06
6,1,left,fingers slightly curved; palm facing right,-0.003367,-0.000566,-0.003261,-0.000784,0.310560,4.780000e-06,0.000062,...,9.150000e-05,2.540000e-05,1.730000e-05,2.600000e-05,8.810000e-06,7.690000e-06,7.690000e-05,2.770000e-06,6.110000e-05,7.018750e-04
7,1,behind,back of hand,-0.023814,-0.007751,0.056034,0.031354,0.279768,1.100000e-06,0.000226,...,3.990000e-06,1.176840e-04,4.700000e-06,4.370000e-05,1.120000e-05,3.570000e-06,3.130000e-05,7.370000e-07,8.490000e-05,9.130000e-05
8,2,straight_on,One finger is down while all others are up,-0.005224,-0.012207,-0.007857,-0.011971,0.702546,7.880000e-07,0.000045,...,2.330000e-06,1.550000e-05,5.250000e-06,1.350000e-05,1.470000e-06,2.020000e-06,1.570000e-05,1.800000e-06,8.510000e-06,6.622640e-04
9,2,above,All fingers are bend except one,-0.022735,0.036882,0.010482,-0.039797,0.700640,2.820000e-05,0.001859,...,1.930000e-06,3.790000e-06,2.910000e-05,1.380000e-07,3.260000e-06,4.620000e-07,2.510000e-06,2.480000e-06,9.970000e-05,8.080000e-06


In [189]:
#write new tedoncs to csv file
data_new.to_csv(path_or_buf='../data/hand_data3_mirror2.csv', index=False)