In [1]:
%matplotlib
%matplotlib notebook

# Import Libraries
import cv2
import numpy as np
import numpy.matlib
import pandas as pd
import os
import struct
import matplotlib.pyplot as plt
import seaborn as sns

# Headers for different tables
meta_data_header = ['FrameNumber', 'Time', 'Stage_x', 'Stage_y', 'Centroid_x', 'Centroid_y',
                    'Midpoint_x', 'Midpoint_y', 'Head_x', 'Head_y', 'Tail_x', 'Tail_y', 'MouthHook_x', 'MouthHook_y',
                    'LeftMHhook_x', 'LeftMHhook_y', 'RightMHhook_x', 'RightMHhook_y',
                    'LeftDorsalOrgan_x', 'LeftDorsalOrgan_y', 'RightDorsalOrgan_x', 'RightDorsalOrgan_y',
                    'CenterBolwigOrgan_x', 'CenterBolwigOrgan_y', 'LeftBolwigOrgan_x', 'LeftBolwigOrgan_y',
                    'RightBolwigOrgan_x', 'RightBolwigOrgan_y', 'V9_x', 'V9_y', 'V10_x', 'V10_y', 'V11_x', 'V11_y',
                    'V12_x', 'V12_y', 'V13_x', 'V13_y', 'V14_x', 'V14_y', 'V15_x', 'V15_y', 'V16_x', 'V16_y',
                    'MouthHook_votes', 'LeftMHhook_votes', 'RightMHhook_votes', 'LeftDorsalOrgan_votes',
                    'RightDorsalOrgan_votes', 'CenterBolwigOrgan_votes', 'LeftBolwigOrgan_votes', 'RightBolwigOrgan_votes',
                    'V9_votes', 'V10_votes', 'V11_votes', 'V12_votes', 'V13_votes', 'V14_votes', 'V15_votes',
                    'V16_votes', 'Num_Key_points']

coordinate_header = ['FrameNumber', 'MouthHook_x', 'MouthHook_y', 'LeftMHhook_x', 'LeftMHhook_y',
                     'RightMHhook_x', 'RightMHhook_y', 'LeftDorsalOrgan_x', 'LeftDorsalOrgan_y',
                     'RightDorsalOrgan_x', 'RightDorsalOrgan_y', 'CenterBolwigOrgan_x', 'CenterBolwigOrgan_y',
                     'LeftBolwigOrgan_x', 'LeftBolwigOrgan_y', 'RightBolwigOrgan_x', 'RightBolwigOrgan_y']

distance_header = ['MouthHook', 'LeftMHhook',
                   'RightMHhook', 'LeftDorsalOrgan', 'RightDorsalOrgan',
                   'CenterBolwigOrgan', 'LeftBolwigOrgan', 'RightBolwigOrgan']

def readSplineData(fileName, nFrames):
    fCount = 0;
    spline = {}
    with open(fileName, "rb") as f:
        while (True) and (fCount < nFrames-1):
            fCount += 1
            gap, frameNumber = struct.unpack('>ii', f.read(struct.calcsize('>ii')))

            nPointsToRead =  struct.unpack('>i', f.read(struct.calcsize('>i')))
            fmt = ">%dH" % (nPointsToRead)
            tempX = struct.unpack(fmt, f.read(struct.calcsize(fmt)))

            nPointsToRead =  struct.unpack('>i', f.read(struct.calcsize('>i')))
            fmt = ">%dH" % (nPointsToRead)
            tempY = struct.unpack(fmt, f.read(struct.calcsize(fmt)))
            
            spline[frameNumber-1] = np.vstack((np.asarray(tempX).T, np.asarray(tempY).T))

    return spline

def readContourData(fileName, nFrames):    
    fCount = 0;
    contour = {}
    with open(fileName, "rb") as f:
        while (True) and (fCount < nFrames-1):
            fCount += 1
            frameNumber = struct.unpack('>i', f.read(struct.calcsize('>i')))

            nPointsToRead = struct.unpack('>i', f.read(struct.calcsize('>i')))            
            fmt = ">%dH" %(nPointsToRead)
            buff = f.read(struct.calcsize(fmt))
            tempX = struct.unpack(fmt, buff)
            
            nPointsToRead = struct.unpack('>i', f.read(struct.calcsize('>i')))
            fmt = ">%dH" %(nPointsToRead)
            buff = f.read(struct.calcsize(fmt))
            tempY = struct.unpack(fmt, buff)
            
            frameNumber = frameNumber[0]
            contour[frameNumber-1] = np.vstack((np.asarray(tempX).T, np.asarray(tempY).T))

    return contour


Using matplotlib backend: MacOSX


In [2]:
root = '../expts/'
all_exp = ['20170317_7c1', '20170318_5c0_test_170317', '20180417_7c0']
all_exp_names = ['Same Day', 'Next Day', 'Next Year']

test_list = [('dataCollectedOn_20170317_grp_1/Rawdata_20170317_233847_20170321_034501/', "Rawdata_20170317_233847"),
             ('dataCollectedOn_20170317_grp_2/Rawdata_20170317_232257_20170318_202320/', "Rawdata_20170317_232257"),
             ('dataCollectedOn_20170317_grp_3/Rawdata_20170317_230844_20170319_205326/', "Rawdata_20170317_230844"),
             ('dataCollectedOn_20170317_grp_4/Rawdata_20170317_230136_20170318_152636/', "Rawdata_20170317_230136"),
             ('dataCollectedOn_20170317_grp_5/Rawdata_20170317_224851_20170319_195454/', "Rawdata_20170317_224851"),
             ('dataCollectedOn_20170317_grp_6/Rawdata_20170317_195538_20170321_013857/', "Rawdata_20170317_195538"),
             ('dataCollectedOn_20170317_grp_7/Rawdata_20170317_194352_20170318_134620/', "Rawdata_20170317_194352")]

for idx_bp, bp in enumerate(distance_header[:-3]):
    for idx_exp, exp in enumerate(all_exp):
        for idx_test, (test, test_string) in enumerate(test_list):
            test_dir = os.path.join(root, 'trainingData_'+ exp, test)

            ## Read tracker metadata and annotation
            for fs in os.listdir(test_dir):
                if 'Metadata' in fs:
                    meta_data = pd.read_csv(os.path.join(test_dir, fs), sep=',', header=0, names=meta_data_header)
                if 'Coordinates' in fs:
                    coordinates = pd.read_csv(os.path.join(test_dir, fs), sep=',', names=coordinate_header)

            if (meta_data.empty is False):
                ## Make the metadata frame umber start from 0 index
                meta_data.loc[:, 'FrameNumber'] = meta_data.loc[:, 'FrameNumber'] - 1
                meta_data.set_index('FrameNumber', inplace=True)

                ## Make the annotation frame umber start from 0 index
                coordinates = coordinates.round(0)
                start_frame = coordinates.loc[0, 'FrameNumber'].copy() - 1
                coordinates.loc[:, 'FrameNumber'] = coordinates.loc[:, 'FrameNumber'].copy() - start_frame
                coordinates.set_index('FrameNumber', inplace=True)
                
                ## Get index of no annotations
                no_anno = np.array(np.where(coordinates.values == -1)).T
                temp_index = coordinates.iloc[no_anno[:, 0], :].index.values
                temp_cols = coordinates.iloc[:,no_anno[:, 1]].columns.values
                no_anno_cols = []
                no_anno_index = []        
                for i in range(0, len(temp_cols), 2):
                    no_anno_cols.append(temp_cols[i][:-2])
                    no_anno_index.append(temp_index[i])
                no_anno_df = pd.DataFrame({'index': no_anno_index, 'col': no_anno_cols})

                ## Inner join to take only the frames intersecting the annotation and tracker metadata
                ## Use when want to see only annotated frames 
                meta_coord = pd.merge(meta_data, coordinates, on='FrameNumber', how='inner', suffixes=('_T', '_A'))

                ## Outer join to take only the frames intersecting the annotation and tracker metadata
                ## Use when want to see all frames 
        #         meta_coord = pd.merge(meta_data, coordinates, on='FrameNumber', how='outer', suffixes=('_T', '_A'))

                ## Calculate distance between annotation and the tracker metadata
                for head in distance_header:
                    temp_x = (meta_coord[head+'_x_T'].values - meta_coord[head+'_x_A'].values)**2
                    temp_y = (meta_coord[head+'_y_T'].values - meta_coord[head+'_y_A'].values)**2
                    meta_coord.loc[:, head+'_dist'] = np.sqrt(temp_x + temp_y)*2.75
                    
#                 ## Replace the no annotation with NA
                for rind, rval in no_anno_df.iterrows():
                    meta_coord.loc[rval['index'], rval['col']+'_dist'] = np.nan
                    
#                 ## Replace NaN values with a high negative number
# #                 meta_coord.fillna(-2, inplace=True)
                meta_coord.dropna(axis=0, how='any', inplace=True)
    
                ## Add the video info
                meta_coord['test'] = test
                meta_coord['test_string'] = test_string

                if idx_test == 0:
                    dist_all_test =  np.array(meta_coord.reset_index().loc[:, bp+'_dist'].values[2:])
                    vid_all_test =  np.array(meta_coord.reset_index().reset_index().loc[:, ['index', 'test', 'test_string']].values[2:])
                else:
                    temp_dist = np.array(meta_coord.reset_index().loc[:, bp+'_dist'].values[2:])
                    temp_vid = np.array(meta_coord.reset_index().reset_index().loc[:, ['index', 'test', 'test_string']].values[2:])
                    dist_all_test = np.concatenate((dist_all_test, temp_dist))
                    vid_all_test = np.concatenate((vid_all_test, temp_vid))                

        if idx_exp == 0:
            dist_all_exp = np.array(dist_all_test)
            vid_all_exp = np.array(vid_all_test)
        else:
            dist_all_exp = np.column_stack((dist_all_exp, dist_all_test))
            vid_all_exp = np.column_stack((vid_all_exp, vid_all_test))
        
    if idx_bp == 0:
        dist_all_bp = np.array(dist_all_exp)
        vid_all_bp = np.array(vid_all_exp)
    else:
        dist_all_bp = np.column_stack((dist_all_bp, np.column_stack((np.ones((dist_all_exp.shape[0], 3))*200, dist_all_exp))))
        vid_all_bp = np.column_stack((vid_all_bp, np.column_stack((np.ones((vid_all_exp.shape[0], 3))*200, vid_all_exp))))        

video_info = pd.DataFrame(vid_all_bp[:, 0:3], columns=['FrameNumber', 'test', 'test_string'])
df = pd.DataFrame(dist_all_bp)
df2 = df.copy()

In [3]:
gap_cols = [3, 4, 5, 9, 10, 11, 15, 16, 17, 21, 22, 23]
df2.iloc[:, gap_cols] = np.nan
fig = plt.figure(figsize=(18, 10))
g = sns.boxplot(data=df2, showfliers=False)
sns.despine()
tickLabels = np.matlib.repmat((all_exp_names + ['','','']), 1, 5)[0][:-3]
g.set_xticks(g.get_xticks()+0.5)
g.set_xticklabels(tickLabels, rotation=45, ha="right", fontsize=10)
plt.ylim([-10, 251])
g.set_yticks(np.arange(0, 251, 50))
g.set_yticklabels(g.get_yticks(), fontsize=10)
plt.tick_params(axis='x', bottom=False, top=False)
plt.ylabel('Error [microns]', fontsize=20)
plt.savefig('2017_boxplot.png', bbox_inches='tight')

In [4]:
# df[(df > 0) & (df <= 25)] = 25
# df[(df > 25) & (df <= 50)] = 50
# df[(df > 50) & (df <= 75)] = 75
# df[(df > 75) & (df <= 100)] = 100
# df[(df > 100) & (df <= 125)] = 125
# df[df > 125] = 150

# df[(df > 0) & (df <= 10)] = 5
# df[(df > 10) & (df <= 20)] = 15
# df[(df > 20) & (df <= 30)] = 25
# df[(df > 30) & (df <= 40)] = 35
# df[(df > 40) & (df <= 50)] = 45
# df[(df > 50) & (df <= 60)] = 55
# df[(df > 60) & (df <= 70)] = 65
# df[(df > 70) & (df <= 80)] = 75
# df[(df > 80) & (df <= 90)] = 85
# df[(df > 90) & (df <= 100)] = 95
# df[(df > 100) & (df <= 125)] = 125
# df[(df > 125)] = 150

df[(df > 200)] = 200

# df = df.sort_values(by=[39, 52])

In [5]:
# # g = sns.clustermap(df, yticklabels=False,  cmap=sns.light_palette("navy", reverse=True))
# # g = sns.clustermap(df, yticklabels=False,  cmap=sns.light_palette((210, 90, 60), input="husl"))

# fig = plt.figure(figsize=(10, 10))
# # g = sns.heatmap(df, yticklabels=False,
# #                 cmap=sns.light_palette('green', 12),
# #                 cbar=False)
# g = sns.heatmap(df, yticklabels=False,
#                 cmap=sns.color_palette("hot", 12),
#                 cbar=True)
# g.set_xticks(np.arange(1, df.shape[1]+1))
# tickLabels = np.matlib.repmat((all_exp_names + ['','','']), 1, 5)[0][:-3]
# g.set_xticklabels(tickLabels, rotation=45, ha="right", fontsize=8)
# plt.tick_params(axis='x', bottom=False, top=False)
# plt.ylabel('')
# plt.savefig('heatmap.png', bbox_inches='tight')
# # plt.colorbar.set_ticks([0, 25, 50, 75, 100])
# # cax = plt.gcf().axes[-1]
# # cax.set_yticks([0, 25, 50, 75, 100, 125])
# # cax.tick_params(labelsize=20)

In [6]:
gc = sns.clustermap(df, yticklabels=False,  cmap=sns.color_palette("hot", 8),
                   robust=True, figsize=(18, 10), col_cluster=False, row_cluster=True,
                  vmin=0, vmax=200, method='ward')
tickLabels = np.matlib.repmat((all_exp_names + ['','','']), 1, 5)[0][:-3]
gc.ax_heatmap.set_xticklabels(tickLabels, rotation=45, ha="right", fontsize=12)
gc.ax_heatmap.tick_params(axis='x', bottom=False, top=False)
plt.ylabel('')
plt.show()

In [7]:
new_ind = gc.dendrogram_row.reordered_ind
df_cluster = df.iloc[new_ind, :].copy()
df_cluster.iloc[:, gap_cols] = np.nan


# video_info_cluster = video_info.iloc[new_ind, :].copy()
# video_info_cluster.reset_index(drop=True, inplace=True)

In [8]:
fig = plt.figure(figsize=(18, 10))
g = sns.heatmap(df_cluster, yticklabels=False,
                cmap=sns.color_palette("hot", 8),
                cbar=True, cbar_kws={"shrink": 0.5, "ticks": np.arange(0, 225, 25)})
g.set_xticks(np.arange(1, df.shape[1]+1))
tickLabels = np.matlib.repmat((all_exp_names + ['','','']), 1, 5)[0][:-3]
g.set_xticklabels(tickLabels, rotation=45, ha="right", fontsize=8)
plt.tick_params(axis='x', bottom=False, top=False)
plt.tick_params(axis='y', left=False, right=False)
plt.tick_params(axis='y', left=False, right=False)
plt.ylabel('')
plt.show()
plt.savefig('2017_heatmap_clustered_sns.png', bbox_inches='tight')

In [9]:
from sklearn.cluster import AgglomerativeClustering
# Affinity = {“euclidean”, “l1”, “l2”, “manhattan”, “cosine”}
# Linkage = {“ward”, “complete”, “average”}
df_agglo = df.copy()
Hclustering = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='ward')
col_for_clustering = df_agglo.columns.values
Hclustering.fit(df_agglo.iloc[:, col_for_clustering].values)
df_agglo['labels'] = Hclustering.labels_
df_agglo.sort_values(['labels'], inplace=True)
df_agglo.iloc[:, gap_cols] = np.nan
lab = df_agglo.labels
df_agglo.drop(columns=['labels'], inplace=True)
df_agglo_clust_col = df_agglo.iloc[:, col_for_clustering]

In [10]:
fig = plt.figure(figsize=(7, 10))
g = sns.heatmap(df_agglo_clust_col, yticklabels=False,
                cmap=sns.color_palette("hot", 8),
                cbar=True, cbar_kws={"shrink": 0.3, "ticks": np.arange(0, 225, 25)})
g.set_xticks(np.arange(1, df_agglo_clust_col.shape[1]+1))
tickLabels = np.matlib.repmat((all_exp_names + ['','','']), 1, 5)[0][:-3]
g.set_xticklabels(tickLabels, rotation=45, ha="right", fontsize=8)
plt.tick_params(axis='x', bottom=False, top=False)
plt.tick_params(axis='y', left=False, right=False)
plt.tick_params(axis='y', left=False, right=False)
plt.ylabel('')
plt.show()
plt.savefig('2017_heatmap_clustered_agglo_imp_cols.png', bbox_inches='tight')

In [11]:
fig = plt.figure(figsize=(18, 10))
g = sns.heatmap(df_agglo, yticklabels=False,
                cmap=sns.color_palette("hot", 8),
                cbar=True, cbar_kws={"shrink": 0.3, "ticks": np.arange(0, 225, 25)})
g.set_xticks(np.arange(1, df_agglo.shape[1]+1))
tickLabels = np.matlib.repmat((all_exp_names + ['','','']), 1, 5)[0][:-3]
g.set_xticklabels(tickLabels, rotation=45, ha="right", fontsize=8)
plt.tick_params(axis='x', bottom=False, top=False)
plt.tick_params(axis='y', left=False, right=False)
plt.tick_params(axis='y', left=False, right=False)
plt.ylabel('')
plt.show()
plt.savefig('2018_heatmap_clustered_agglo.png', bbox_inches='tight')

In [12]:
lab.value_counts()

1    951
2    350
0    269
Name: labels, dtype: int64

In [13]:
fig = plt.figure(figsize=(6, 6))
s = lab.value_counts()
s = s.sort_index()
plt.plot(s.index.values, (s/s.sum())*100)
plt.show()

In [14]:
video_info_cluster = video_info.iloc[lab.index.values, :].copy()
failed_all_df = video_info_cluster.iloc[lab[lab==1].index.values, :]
failed_more_do_df = video_info_cluster.iloc[lab[lab==2].index.values, :]

In [15]:
failed_more_do_df.shape

(350, 3)

In [16]:
video_path = '../expts/videos_20170317/'
failed_all_path = '../expts/failed_all_2017/'
failed_more_do_path = '../expts/failed_more_do_2017/'

for row_index, row in failed_all_df.iterrows():
    video_file = os.path.join(video_path, row['test_string']+"_crop.avi")
    cap = cv2.VideoCapture(video_file)
    if cap.isOpened():
        cap.set(1, row['FrameNumber'])
        ret, originalFrame = cap.read()
        if ret == True:
            name = failed_all_path+"failedAll_%04d.png"%(row_index)
            cv2.imwrite(name, originalFrame)
        else:
            print 'Cannot read Frame: ', row['test_string']
    else:
        print ".......Not Found"
    cap.release()
    cv2.waitKey(200)
    
for row_index, row in failed_more_do_df.iterrows():
    video_file = os.path.join(video_path, row['test_string']+"_crop.avi")
    cap = cv2.VideoCapture(video_file)
    if cap.isOpened():
        cap.set(1, row['FrameNumber'])
        ret, originalFrame = cap.read()
        if ret == True:
            name = failed_more_do_path+"failedMoreInDO_%04d.png"%(row_index)
            cv2.imwrite(name, originalFrame)
        else:
            print 'Cannot read Frame'
    else:
        print ".......Not Found"
    cap.release()
    cv2.waitKey(200)

.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Fou

.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Fou

.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found
.......Not Found


In [17]:
failed_all_df.head()

Unnamed: 0,FrameNumber,test,test_string
673,176,dataCollectedOn_20170317_grp_3/Rawdata_2017031...,Rawdata_20170317_230844
202,204,dataCollectedOn_20170317_grp_1/Rawdata_2017031...,Rawdata_20170317_233847
391,130,dataCollectedOn_20170317_grp_2/Rawdata_2017031...,Rawdata_20170317_232257
676,179,dataCollectedOn_20170317_grp_3/Rawdata_2017031...,Rawdata_20170317_230844
764,71,dataCollectedOn_20170317_grp_4/Rawdata_2017031...,Rawdata_20170317_230136


In [18]:
failed_all_df_ = df_agglo.iloc[lab[lab==1].index.values, :]
failed_all_df_.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
673,7.778175,8.696264,8.696264,,,,16.5,150.749171,8.25,,...,,13.75,12.298374,33.568028,,,,5.5,11.667262,24.902309
202,74.453425,85.294343,60.562468,,,,7.778175,145.438346,28.710843,,...,,63.48868,15.556349,15.556349,,,,200.0,45.769122,50.558135
391,14.022304,13.75,17.608592,,,,13.75,2.75,6.149187,,...,,34.457401,35.855613,59.364341,,,,32.070235,93.338095,20.020302
676,25.943448,27.084359,25.353747,,,,8.25,20.020302,13.75,,...,,11.667262,0.0,12.298374,,,,79.17899,16.035118,19.445436
764,3.889087,16.727597,6.149187,,,,27.223611,34.015621,33.114385,,...,,21.478187,23.49601,19.445436,,,,25.353747,13.75,27.084359


In [19]:
### What are the frames where the error is so high
## Important for validation
# s = np.where(dist_all_bp[:, 39] > 500)
# df2.loc[s[0], :].head()
# video_info.reset_index(drop=True, inplace=True)
# video_info.loc[s[0], :]