In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

In [69]:
def getFrameData(root, iterKey):
    allFrameData = []
    for frameData in root.iter(iterKey):
        if not frameData.attrib:
            allFrameData.append(frameData.text)
        else:
            allFrameData.append(frameData.attrib)
    return allFrameData

def readKinData(file_name, inum):
    print 'Parsing File Number ', inum
    tree = ET.parse(file_name)
    root = tree.getroot()
    
    kinData = []
    kinetics = []
    stimulus = []

    larvaFrameData = pd.DataFrame(getFrameData(root, 'larvaFrameData'))
    skeleton = pd.DataFrame(getFrameData(root, 'skeleton'))
    kinetics = pd.concat([larvaFrameData.reset_index(drop=True), skeleton.reset_index(drop=True)], axis=1)

    midpoint = pd.DataFrame(getFrameData(root, 'midpoint'), dtype=np.float16)
    midpoint.rename(columns={'x': 'mid_x', 'y': 'mid_y'}, inplace=True)
    midpoint.reset_index(inplace=True, drop=True)
    centroid = pd.DataFrame(getFrameData(root, 'centroid'), dtype=np.float16)
    centroid.rename(columns={'x': 'cent_x', 'y': 'cent_y'}, inplace=True)
    centroid.reset_index(inplace=True, drop=True)
    head = pd.DataFrame(getFrameData(root, 'head'), dtype=np.float16)
    head.rename(columns={'x': 'head_x', 'y': 'head_y'}, inplace=True)
    head.reset_index(inplace=True, drop=True)
    tail = pd.DataFrame(getFrameData(root, 'tail'), dtype=np.float16)
    tail.rename(columns={'x': 'tail_x', 'y': 'tail_y'}, inplace=True)
    tail.reset_index(inplace=True, drop=True)    
    featureLocation = pd.DataFrame(getFrameData(root, 'featureLocation'), dtype=np.float16)
    s = featureLocation.copy()
    all_feature_df = pd.DataFrame([])
    all_feat = s.name.unique()
    for feat in all_feat:
        d = s.loc[s['name'] == feat, ['numberMaximumVotes', 'numberVotes', 'x', 'y']]
        d.rename(columns={'numberMaximumVotes': feat + '_maxVotes', 
                          'numberVotes': feat + '_votes',
                          'x': feat + '_x',
                          'y': feat + '_y'}, inplace=True)
        all_feature_df = pd.concat([all_feature_df, d.reset_index(drop=True)], axis=1)
    
    stimulusIntensity = pd.DataFrame(getFrameData(root, 'intensityPercentage'))
    temp = pd.DataFrame(getFrameData(root, 'ledArrayStimulus'))
    
    if not temp.empty:
        stimulus = pd.concat([temp.reset_index(drop=True), stimulusIntensity.reset_index(drop=True)], axis=1)
        stimulus.rename(columns={0: 'intensity'}, inplace=True)
    else:
        stimulus = stimulusIntensity
    
    kinData = pd.concat([kinetics.reset_index(drop=True), midpoint, centroid, head, tail, all_feature_df, stimulus.reset_index(drop=True)], axis=1)
    kinData['num'] = inum
    kinData['timeIndex'] = kinData.index.values

    return kinData

In [13]:
file_name = 'sample_new_tracker.xml'
tree = ET.parse(file_name)
root = tree.getroot()
# kin_data = readKinData(file_name, 0)

In [14]:
larvaFrameData = pd.DataFrame(getFrameData(root, 'larvaFrameData'))
skeleton = pd.DataFrame(getFrameData(root, 'skeleton'))
kinetics = pd.concat([larvaFrameData, skeleton], axis=1)

midpoint = pd.DataFrame(getFrameData(root, 'midpoint'), dtype=np.float16)
midpoint.rename(columns={'x': 'mid_x', 'y': 'mid_y'}, inplace=True)
centroid = pd.DataFrame(getFrameData(root, 'centroid'), dtype=np.float16)
centroid.rename(columns={'x': 'cent_x', 'y': 'cent_y'}, inplace=True)
head = pd.DataFrame(getFrameData(root, 'head'), dtype=np.float16)
head.rename(columns={'x': 'head_x', 'y': 'head_y'}, inplace=True)
tail = pd.DataFrame(getFrameData(root, 'tail'), dtype=np.float16)
tail.rename(columns={'x': 'tail_x', 'y': 'tail_y'}, inplace=True)    
featureLocation = pd.DataFrame(getFrameData(root, 'featureLocation'), dtype=np.float16)
stimulusIntensity = pd.DataFrame(getFrameData(root, 'intensityPercentage'))
temp = pd.DataFrame(getFrameData(root, 'ledArrayStimulus'))

if not temp.empty:
    stimulus = pd.concat([temp, stimulusIntensity], axis=1)
    stimulus.rename(columns={0: 'intensity'}, inplace=True)
else:
    stimulus = stimulusIntensity

kinData = pd.concat([kinetics, midpoint, centroid, head, tail, stimulus], axis=1)
kinData['num'] = 0
kinData['timeIndex'] = kinData.index.values

In [16]:
kinData.columns

Index([u'allocentricHeadAngle', u'allocentricHeadAngleSpeed', u'behaviorMode',
       u'bodyAngleSpeed', u'centroidSpeed', u'derivedMaxLength',
       u'headAngleSpeed', u'headSpeed', u'midpointSpeed',
       u'percentageOfMaxLength', u'smoothedAllocentricHeadAngle',
       u'smoothedAllocentricHeadAngleSpeed', u'smoothedBodyAngleSpeed',
       u'smoothedHeadAngleSpeed', u'smoothedTailSpeedDotBodyAngle',
       u'tailSpeed', u'tailSpeedDotBodyAngle', u'timeBackingUp',
       u'timeSinceLastBehaviorModeChange', u'timeStopped', u'captureTime',
       u'headToBodyAngle', u'length', u'tailBearing', u'mid_x', u'mid_y',
       u'cent_x', u'cent_y', u'head_x', u'head_y', u'tail_x', u'tail_y',
       u'color', u'height', u'rotation', u'width', u'x', u'y', u'intensity',
       u'num', u'timeIndex'],
      dtype='object')

In [70]:
kin_data = readKinData(file_name, 0)

Parsing File Number  0


In [72]:
kin_data.head()

Unnamed: 0,allocentricHeadAngle,allocentricHeadAngleSpeed,behaviorMode,bodyAngleSpeed,centroidSpeed,derivedMaxLength,headAngleSpeed,headSpeed,midpointSpeed,percentageOfMaxLength,...,F16_y,color,height,rotation,width,x,y,intensity,num,timeIndex
0,0.0,0.0,STOP,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,CRM,0.2,0.0,0.2,0.64075,3.87475,0.0,0,0
1,176.84275617225978,29473.79269537663,STOP,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,CRM,0.2,0.0,0.2,0.64075,3.87475,0.0,0,1
2,176.84275617225978,0.0,STOP,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,CRM,0.2,0.0,0.2,0.64075,3.87475,0.0,0,2
3,176.84275617225978,0.0,STOP,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,CRM,0.2,0.0,0.2,0.64075,3.87475,0.0,0,3
4,176.84275617225978,0.0,STOP,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,CRM,0.2,0.0,0.2,0.64075,3.87475,0.0,0,4


In [73]:
temp

Unnamed: 0,color,height,rotation,width,x,y
0,CRM,0.2,0.0,0.2,0.64075,3.87475
1,CRM,0.2,0.0,0.2,0.64075,3.87475
2,CRM,0.2,0.0,0.2,0.64075,3.87475
3,CRM,0.2,0.0,0.2,0.64075,3.87475
4,CRM,0.2,0.0,0.2,0.64075,3.87475
5,CRM,0.2,0.0,0.2,0.64075,3.87475
6,CRM,0.2,0.0,0.2,0.64075,3.87475
7,CRM,0.2,0.0,0.2,0.64075,3.87475
8,CRM,0.2,0.0,0.2,0.64075,3.87475
9,CRM,0.2,0.0,0.2,0.64075,3.87475


In [63]:
# s = featureLocation.pivot(index=['numberMaximumVotes', 'numberVotes', 'x', 'y'], columns='name')
# all_tail_speed = kin_data_all.loc[:,['tailSpeed', 'num', 'timeIndex']].pivot(index='num', columns='timeIndex').as_matrix()[:,start_frame:end_frame]
s = featureLocation.copy()
all_feature_df = pd.DataFrame([])
all_feat = s.name.unique()
for feat in all_feat:
    d = s.loc[s['name'] == feat, ['numberMaximumVotes', 'numberVotes', 'x', 'y']]
    d.rename(columns={'numberMaximumVotes': feat + '_maxVotes', 
                      'numberVotes': feat + '_votes',
                      'x': feat + '_x',
                      'y': feat + '_y'}, inplace=True)
    all_feature_df = pd.concat([all_feature_df, d.reset_index(drop=True)], axis=1)

In [64]:
all_feature_df

Unnamed: 0,F1_maxVotes,F1_votes,F1_x,F1_y,F2_maxVotes,F2_votes,F2_x,F2_y,F3_maxVotes,F3_votes,...,F14_x,F14_y,F15_maxVotes,F15_votes,F15_x,F15_y,F16_maxVotes,F16_votes,F16_x,F16_y
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
