In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import pandas as pd

In [3]:
# csv files for answer responses
gdrive_path = "/Users/dorislee/Google Drive/Turn/user_study/evaluation_study/transcription_and_analysis/"
user_task =  pd.read_csv(gdrive_path+"UserTaskAssignment.csv")

In [4]:
prediction_ground_truth = {
    'Police1': [55.1,4.2,40.7], #ticket,warn,arrest
    'Police2': [53.43,12.02,34.55], #ticket,warn,arrest
    'Autism1': [17.3,82.7], #NO,YES   
    'Autism2': [41,59] #NO,YES   
}

In [5]:
def euclidean_dist(x,y):
    x = np.array(x)
    y = np.array(y)
    return np.sqrt(np.sum(((x-y)**2)))

In [6]:
def compute_prediction_distances(filename,gt_task_key,aggregate=True):
    prediction = pd.read_csv(gdrive_path+filename)
    if "Police" in filename: 
        attributes = ["ticket","warn","arrest"]
        task_name = "Task1"
    else: 
        attributes = ["NO","YES"]
        task_name = "Task2"
    prediction = prediction.merge(user_task).drop("Note",axis=1)
    
    #Computing Euclidean Distance
    dist_data = []
    for row in prediction.iterrows():
        user_prediction  = row[1][attributes].values
        gt_prediction = prediction_ground_truth[gt_task_key]
        dist_data.append([row[1]["User"],row[1][task_name],euclidean_dist(user_prediction,gt_prediction)])
    dist_result = pd.DataFrame(dist_data,columns=["User","Task","Distance"])
    dist_result.groupby("Task",as_index=False)
    if aggregate:
        #mean and standard deviation
        distance_aggregate_result = dist_result.groupby("Task",as_index=False)["Distance"].mean()
        distance_aggregate_result["SD"] = dist_result.groupby("Task",as_index=False)["Distance"].apply(lambda x: x.std())
        return distance_aggregate_result
    else: 
        return dist_result

In [7]:
def compute_prediction_aggregate_stats(filename):
    prediction = pd.read_csv(gdrive_path+filename)
    if "Police" in filename: 
        attributes = ["ticket","warn","arrest"]
        task_name = "Task1"
    else: 
        attributes = ["NO","YES"]
        task_name = "Task2"
    prediction = prediction.merge(user_task).drop("Note",axis=1)
    
    all_task = ["Task1","Task2"]
    all_task.remove(task_name)
    not_task_name = all_task[0]
    prediction = prediction.drop(not_task_name,axis=1).rename(columns={task_name:"Algorithm"})
#     prediction = pd.melt(prediction,id_vars=['Algorithm',"User","Surprisingness"], value_vars=attributes)
    return prediction

In [8]:
import altair as alt

In [9]:
filename = "Prediction1(Autism).csv"
prediction = pd.read_csv(gdrive_path+filename)

In [10]:
df = compute_prediction_distances("Prediction1(Autism).csv","Autism1",aggregate=False)
df["Type"] = "Shallow"
df["Dataset"] = "Autism"
df2 = compute_prediction_distances("Prediction1(Police).csv","Police1",aggregate=False)
df2["Type"] = "Shallow"
df2["Dataset"] = "Police"
df3 = compute_prediction_distances("Prediction2(Police).csv","Police2",aggregate=False)
df3["Type"] = "Deep"
df3["Dataset"] = "Police"
df4 = compute_prediction_distances("Prediction2(Autism).csv","Autism2",aggregate=False)
df4["Type"] = "Deep"
df4["Dataset"] = "Autism"

df_Distance= pd.concat([df,df2,df3,df4])

df_Distance["Condition"] = df_Distance["Task"].apply(lambda x: {1:"Storyboard", 2:'Cluster', 3:'BFS'}[int(x[1])])
df_Distance = df_Distance.drop("Task",axis=1)

In [23]:
import scipy
import scipy.stats

In [50]:
df_Distance_shallow = df_Distance[df_Distance["Type"]=="Shallow"]

In [51]:
clust = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Autism")&(df_Distance_shallow["Condition"]=="Cluster")]["Distance"])
sbd = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Autism")&(df_Distance_shallow["Condition"]=="Storyboard")]["Distance"])
bfs = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Autism")&(df_Distance_shallow["Condition"]=="BFS")]["Distance"])

In [52]:
print (scipy.stats.mannwhitneyu(bfs,sbd))
print (scipy.stats.mannwhitneyu(clust,sbd))

MannwhitneyuResult(statistic=0.0, pvalue=0.002348848744665035)
MannwhitneyuResult(statistic=18.0, pvalue=0.46666704000752796)


In [53]:
clust = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Police")&(df_Distance_shallow["Condition"]=="Cluster")]["Distance"])
sbd = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Police")&(df_Distance_shallow["Condition"]=="Storyboard")]["Distance"])
bfs = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Police")&(df_Distance_shallow["Condition"]=="BFS")]["Distance"])

In [54]:
print (scipy.stats.mannwhitneyu(bfs,sbd))
print (scipy.stats.mannwhitneyu(clust,sbd))

MannwhitneyuResult(statistic=4.0, pvalue=0.015319493968851633)
MannwhitneyuResult(statistic=1.0, pvalue=0.00411950941286232)


In [49]:

points = alt.Chart(df_Distance).mark_point(filled=True).encode(
    alt.X(
        'mean(Distance)',
        scale=alt.Scale(zero=False),
        axis=alt.Axis(title='Distance')
    ),
    alt.Y('Condition'),
    color=alt.value('black')
).facet(
    alt.Row('Dataset'),
    alt.Column("Type")
)

error_bars = alt.Chart(df_Distance).mark_rule().encode(
    alt.X('ci0(Distance)'),
    alt.X2("ci1(Distance)"),
    alt.Y('Condition')
).facet(
    alt.Row('Dataset'),
    alt.Column("Type")
)
# base = error_bars+points
# chart = alt.hconcat()
# from altair.expr import datum
# for type in ['Shallow',"Deep"]:
#     chart |= base.transform_filter(datum.Type == type)
# vchart = alt.vconcat()
# for dataset in ['Police',"Autism"]:
#     vchart &= base.transform_filter(datum.Dataset == dataset)


In [50]:
# points+error_bars

In [51]:
# combined = alt.layer(error_bars, points, data=df)
# # combined.facet(
# #     alt.Row('Dataset'),
# #     alt.Column("Type")
# # )
# combined

Because the combining doesn't work in altair, I did it manually in vega-lite, below is the command to generate the viz

In [25]:
{
  "$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json",
  "config": {
    "view": {
      "height": 300,
      "width": 200
    }
  },
  "data": {
    "name": "data-427b73d529a522ea363d890c36599576"
  },
  "datasets": {
    "data-427b73d529a522ea363d890c36599576": [
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 60.38691911333116,
        "Task": "B2",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 47.65899705197331,
        "Task": "B3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Task": "B1",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 43.41635636485402,
        "Task": "B3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Task": "B1",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Task": "B2",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Task": "B2",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 46.24478348960021,
        "Task": "B3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 25.031580054003783,
        "Task": "B1",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 46.24478348960021,
        "Task": "B3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Task": "B1",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Task": "B2",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Task": "B2",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 48.931789258109085,
        "Task": "B3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Task": "B1",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 60.38691911333116,
        "Task": "B3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 10.889444430272833,
        "Task": "B1",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 32.10264786586926,
        "Task": "B2",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 13.120213412898437,
        "Task": "A1",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.491532946846993,
        "Task": "A1",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 58.492221705112215,
        "Task": "A2",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 21.86641260015003,
        "Task": "A2",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 60.10108152105085,
        "Task": "A3",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 36.41620518395622,
        "Task": "A3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 10.979070998950686,
        "Task": "A1",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.792643079096679,
        "Task": "A1",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 18.65851012272952,
        "Task": "A2",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 52.85962542432551,
        "Task": "A2",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 71.77422935845428,
        "Task": "A3",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 81.05640011744909,
        "Task": "A3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 19.751961927869345,
        "Task": "A1",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 0.9899494936611699,
        "Task": "A1",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 42.013569236616874,
        "Task": "A2",
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 31.51095047757208,
        "Task": "A2",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 11.664475984801035,
        "Task": "A3",
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 12.055704044144417,
        "Task": "A3",
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 4.5659391147933635,
        "Task": "A1",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 9.80549845749822,
        "Task": "A1",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 4.5659391147933635,
        "Task": "A2",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Task": "A2",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 16.54532562387335,
        "Task": "A3",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 96.43416303364695,
        "Task": "A3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 13.095793217671089,
        "Task": "A1",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.7489110232688665,
        "Task": "A1",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 13.577473991873454,
        "Task": "A2",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 8.243045553677353,
        "Task": "A2",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 19.305123672227538,
        "Task": "A3",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 10.521777416387406,
        "Task": "A3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 15.87916244642645,
        "Task": "A1",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Task": "A1",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 13.577473991873454,
        "Task": "A2",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 18.9485566732667,
        "Task": "A2",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Task": "A3",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 71.28778156177958,
        "Task": "A3",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Task": "B2",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 19.79898987322333,
        "Task": "B3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Task": "B1",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Task": "B3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 15.556349186104045,
        "Task": "B1",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 15.556349186104045,
        "Task": "B2",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 36.76955262170047,
        "Task": "B2",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 22.627416997969522,
        "Task": "B3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Task": "B1",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Task": "B3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 43.840620433565945,
        "Task": "B1",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Task": "B2",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Task": "B2",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Task": "B3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 43.840620433565945,
        "Task": "B1",
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Task": "B3",
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 12.020815280171307,
        "Task": "B1",
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Task": "B2",
        "Type": "Deep"
      }
    ]
  },
  "facet": {
    "column": {
      "field": "Type",
      "type": "nominal",
      "sort":["Shallow","Deep"]
    },
    "row": {
      "field": "Dataset",
      "type": "nominal",
      "sort":["Police","Autism"]
    }
  },
  "spec":{
    "layer":[
    {
      "encoding": {
        "x": {
          "aggregate": "ci0",
          "field": "Distance",
          "type": "quantitative"
        },
        "x2": {
          "aggregate": "ci1",
          "field": "Distance",
          "type": "quantitative"
        },
        "y": {
          "field": "Condition",
          "type": "nominal"
        }
      },
      "mark": "rule"
    },
    {
    "encoding": {
      "color": {
        "value": "black"
      },
      "x": {
        "aggregate": "mean",
        "axis": {
          "title": "Distance"
        },
        "field": "Distance",
        "scale": {
          "zero": false
        },
        "type": "quantitative"
      },
      "y": {
        "field": "Condition",
        "type": "nominal"
      }
    },
    "mark": {
      "filled": true,
      "type": "point"
    }
  }
    ]
  } 
  
}

NameError: name 'false' is not defined

In [38]:
df =compute_prediction_aggregate_stats("Prediction1(Autism).csv")
df["Type"] = "Shallow"
df["Dataset"] = "Autism"
df2 =compute_prediction_aggregate_stats("Prediction1(Police).csv")
df2["Type"] = "Shallow"
df2["Dataset"] = "Police"
df3 =compute_prediction_aggregate_stats("Prediction2(Police).csv")
df3["Type"] = "Deep"
df3["Dataset"] = "Police"
df4 =compute_prediction_aggregate_stats("Prediction2(Autism).csv")
df4["Type"] = "Deep"
df4["Dataset"] = "Autism"

df_Surprise= pd.concat([df,df2,df3,df4])

df_Surprise["Condition"] = df_Surprise["Algorithm"].apply(lambda x: {1:"Storyboard", 2:'Cluster', 3:'BFS'}[int(x[1])])

df_Surprise = df_Surprise[["User","Surprisingness","Type","Dataset","Condition"]]

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [46]:
df_Surprise_shallow = df_Surprise[df_Surprise["Type"]=="Shallow"]
clust = list(df_Surprise_shallow[(df_Surprise_shallow["Dataset"]=="Autism")&(df_Surprise_shallow["Condition"]=="Cluster")]["Surprisingness"])
sbd = list(df_Surprise_shallow[(df_Surprise_shallow["Dataset"]=="Autism")&(df_Surprise_shallow["Condition"]=="Storyboard")]["Surprisingness"])
bfs = list(df_Surprise_shallow[(df_Surprise_shallow["Dataset"]=="Autism")&(df_Surprise_shallow["Condition"]=="BFS")]["Surprisingness"])

In [47]:
print (scipy.stats.mannwhitneyu(bfs,sbd))
print (scipy.stats.mannwhitneyu(clust,sbd))

MannwhitneyuResult(statistic=1.0, pvalue=0.0037894815045318104)
MannwhitneyuResult(statistic=10.0, pvalue=0.10949411704084216)


In [48]:
clust = list(df_Surprise_shallow[(df_Surprise_shallow["Dataset"]=="Police")&(df_Surprise_shallow["Condition"]=="Cluster")]["Surprisingness"])
sbd = list(df_Surprise_shallow[(df_Surprise_shallow["Dataset"]=="Police")&(df_Surprise_shallow["Condition"]=="Storyboard")]["Surprisingness"])
bfs = list(df_Surprise_shallow[(df_Surprise_shallow["Dataset"]=="Police")&(df_Surprise_shallow["Condition"]=="BFS")]["Surprisingness"])

In [49]:
print (scipy.stats.mannwhitneyu(bfs,sbd))
print (scipy.stats.mannwhitneyu(clust,sbd))

MannwhitneyuResult(statistic=8.0, pvalue=0.059055634434495546)
MannwhitneyuResult(statistic=6.5, pvalue=0.03726310015681925)


In [39]:
points = alt.Chart(df_Surprise).mark_point(filled=True).encode(
    alt.X(
        'mean(Surprisingness)',
        scale=alt.Scale(zero=False),
        axis=alt.Axis(title='Distance')
    ),
    alt.Y('Condition'),
    color=alt.value('black')
).facet(
    alt.Row('Dataset'),
    alt.Column("Type")
)

error_bars = alt.Chart(df_Surprise).mark_rule().encode(
    alt.X('ci0(Surprisingness)'),
    alt.X2("ci1(Surprisingness)"),
    alt.Y('Condition')
).facet(
    alt.Row('Dataset'),
    alt.Column("Type")
)


In [40]:
# print (points.to_json())

In [55]:
# print (error_bars.to_json())

In [None]:
{
  "$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json",
  "config": {
    "view": {
      "height": 300,
      "width": 200
    }
  },
"data": {
    "name": "data-dc3100d223325835dc00c5242a558b46"
  },
  "datasets": {
    "data-dc3100d223325835dc00c5242a558b46": [
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 10,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 9,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 1,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 4,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 3,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 5,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 1,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 8,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 7,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 10,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 2,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 3,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 2,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 10,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 1,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 9,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 1,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 7,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 4,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 3,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 8,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 8,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 5,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 3,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 8,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 8,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 8,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 4,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 1,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 10,
        "Type": "Shallow"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 7,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Shallow"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Shallow"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 4,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 1,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 9,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 5,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 4,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 1,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 5,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 3,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 5,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Surprisingness": 1,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Surprisingness": 6,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Surprisingness": 8,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 10,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 7,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 3,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 1,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 6,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 5,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 1,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 3,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 8,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 7,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 7,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 8,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 8,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 3,
        "Type": "Deep"
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Surprisingness": 5,
        "Type": "Deep"
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Surprisingness": 2,
        "Type": "Deep"
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Surprisingness": 1,
        "Type": "Deep"
      }
    ]
  },
  "facet": {
    "column": {
      "field": "Type",
      "type": "nominal",
      "sort":["Shallow","Deep"]
    },
    "row": {
      "field": "Dataset",
      "type": "nominal",
      "sort":["Police","Autism"]
    }
  },
  "spec":{
    "layer":[
    {
    "encoding": {
      "color": {
        "value": "black"
      },
      "x": {
        "aggregate": "mean",
        "axis": {
          "title": "Distance"
        },
        "field": "Surprisingness",
        "scale": {
          "zero": false
        },
        "type": "quantitative"
      },
      "y": {
        "field": "Condition",
        "type": "nominal"
      }
    },
    "mark": {
      "filled": true,
      "type": "point"
    }
  },
   {
    "encoding": {
      "x": {
        "aggregate": "ci0",
        "field": "Surprisingness",
        "type": "quantitative"
      },
      "x2": {
        "aggregate": "ci1",
        "field": "Surprisingness",
        "type": "quantitative"
      },
      "y": {
        "field": "Condition",
        "type": "nominal"
      }
    },
    "mark": "rule"
  }
    ]
  } 
  
}

In [58]:
df_all  = df_Distance.merge(df_Surprise,on=["User","Type","Dataset","Condition"])

In [59]:
points = alt.Chart(df_all).mark_point(filled=True).encode(
    alt.X(
        'mean(Surprisingness)',
        scale=alt.Scale(zero=False),
        axis=alt.Axis(title='Distance')
    ),
    alt.Y('Condition'),
    color=alt.value('black')
).facet(
    alt.Row('Dataset'),
    alt.Column("Type")
)

error_bars = alt.Chart(df_all).mark_rule().encode(
    alt.X('ci0(Surprisingness)'),
    alt.X2("ci1(Surprisingness)"),
    alt.Y('Condition')
).facet(
    alt.Row('Dataset'),
    alt.Column("Type")
)


In [62]:
# print (points.to_json())

{
  "$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json",
  "config": {
    "view": {
      "height": 300,
      "width": 200
    }
  },
  "data": {
    "name": "data-a439ee9e67ca478b9930f0457f13b30e"
  },
  "datasets": {
    "data-a439ee9e67ca478b9930f0457f13b30e": [
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 60.38691911333116,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 1
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 47.65899705197331,
        "Surprisingness": 9,
        "Type": "Shallow",
        "User": 2
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 3
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 43.41635636485402,
        "Surprisingness": 4,
        "Type": "Shallow",
        "User": 4
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 5
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Surprisingness": 5,
        "Type": "Shallow",
        "User": 6
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 7
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 46.24478348960021,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 8
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 25.031580054003783,
        "Surprisingness": 7,
        "Type": "Shallow",
        "User": 9
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 46.24478348960021,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 10
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 11
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 12
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 13
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 48.931789258109085,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 14
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 15
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 60.38691911333116,
        "Surprisingness": 9,
        "Type": "Shallow",
        "User": 16
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 10.889444430272833,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 17
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 32.10264786586926,
        "Surprisingness": 7,
        "Type": "Shallow",
        "User": 18
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 13.120213412898437,
        "Surprisingness": 4,
        "Type": "Shallow",
        "User": 1
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.491532946846993,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 2
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 58.492221705112215,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 3
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 21.86641260015003,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 4
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 60.10108152105085,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 5
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 36.41620518395622,
        "Surprisingness": 5,
        "Type": "Shallow",
        "User": 6
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 10.979070998950686,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 7
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.792643079096679,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 8
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 18.65851012272952,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 9
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 52.85962542432551,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 10
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 71.77422935845428,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 11
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 81.05640011744909,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 12
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 19.751961927869345,
        "Surprisingness": 4,
        "Type": "Shallow",
        "User": 13
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 0.9899494936611699,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 14
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 42.013569236616874,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 15
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 31.51095047757208,
        "Surprisingness": 7,
        "Type": "Shallow",
        "User": 16
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 11.664475984801035,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 17
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 12.055704044144417,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 18
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 4.5659391147933635,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 1
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 9.80549845749822,
        "Surprisingness": 4,
        "Type": "Deep",
        "User": 2
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 4.5659391147933635,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 3
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 4
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 16.54532562387335,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 5
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 96.43416303364695,
        "Surprisingness": 9,
        "Type": "Deep",
        "User": 6
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 13.095793217671089,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 7
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.7489110232688665,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 8
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 13.577473991873454,
        "Surprisingness": 4,
        "Type": "Deep",
        "User": 9
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 8.243045553677353,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 10
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 19.305123672227538,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 11
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 10.521777416387406,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 12
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 15.87916244642645,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 13
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 14
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 13.577473991873454,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 15
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 18.9485566732667,
        "Surprisingness": 6,
        "Type": "Deep",
        "User": 16
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 17
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 71.28778156177958,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 18
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 10,
        "Type": "Deep",
        "User": 1
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 19.79898987322333,
        "Surprisingness": 7,
        "Type": "Deep",
        "User": 2
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 3
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 4
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 15.556349186104045,
        "Surprisingness": 6,
        "Type": "Deep",
        "User": 5
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 15.556349186104045,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 6
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 36.76955262170047,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 7
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 22.627416997969522,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 8
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 9
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 7,
        "Type": "Deep",
        "User": 10
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 43.840620433565945,
        "Surprisingness": 7,
        "Type": "Deep",
        "User": 11
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 12
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 13
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 14
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 43.840620433565945,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 15
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 16
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 12.020815280171307,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 17
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 18
      }
    ]
  },
  "repeat": {
    "column": [
      "Distance",
      "Surprisingness"
    ]
  },
  "facet": {
    "column": {
      "field": "Type",
      "type": "nominal",
      "sort":["Shallow","Deep"]
    },
    "row": {
      "field": "Dataset",
      "type": "nominal",
      "sort":["Police","Autism"]
    }
  },
  "spec":{
    "layer":[
    {
    "encoding": {
      "color": {
        "value": "black"
      },
      "x": {
        "aggregate": "mean",
        "axis": {
          "title": "Surprisingness"
        },
        "field": "Surprisingness",
        "scale": {
          "zero": false
        },
        "type": "quantitative"
      },
      "y": {
        "field": "Condition",
        "type": "nominal"
      }
    },
    "mark": {
      "filled": true,
      "type": "point"
    }
  },
   {
    "encoding": {
      "x": {
        "aggregate": "ci0",
        "field": "Surprisingness",
        "type": "quantitative"
      },
      "x2": {
        "aggregate": "ci1",
        "field": "Surprisingness",
        "type": "quantitative"
      },
      "y": {
        "field": "Condition",
        "type": "nominal"
      }
    },
    "mark": "rule"
  }
    ]
  } 
  
}

{
  "$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json",
  "config": {
    "view": {
      "height": 300,
      "width": 200
    }
  },
  "data": {
    "name": "data-a439ee9e67ca478b9930f0457f13b30e"
  },
  "datasets": {
    "data-a439ee9e67ca478b9930f0457f13b30e": [
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 60.38691911333116,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 1
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 47.65899705197331,
        "Surprisingness": 9,
        "Type": "Shallow",
        "User": 2
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 3
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 43.41635636485402,
        "Surprisingness": 4,
        "Type": "Shallow",
        "User": 4
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 5
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Surprisingness": 5,
        "Type": "Shallow",
        "User": 6
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 7
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 46.24478348960021,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 8
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 25.031580054003783,
        "Surprisingness": 7,
        "Type": "Shallow",
        "User": 9
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 46.24478348960021,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 10
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 11
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 12
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 3.8183766184073584,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 13
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 48.931789258109085,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 14
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 3.252691193458117,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 15
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 60.38691911333116,
        "Surprisingness": 9,
        "Type": "Shallow",
        "User": 16
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 10.889444430272833,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 17
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 32.10264786586926,
        "Surprisingness": 7,
        "Type": "Shallow",
        "User": 18
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 13.120213412898437,
        "Surprisingness": 4,
        "Type": "Shallow",
        "User": 1
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.491532946846993,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 2
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 58.492221705112215,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 3
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 21.86641260015003,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 4
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 60.10108152105085,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 5
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 36.41620518395622,
        "Surprisingness": 5,
        "Type": "Shallow",
        "User": 6
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 10.979070998950686,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 7
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.792643079096679,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 8
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 18.65851012272952,
        "Surprisingness": 3,
        "Type": "Shallow",
        "User": 9
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 52.85962542432551,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 10
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 71.77422935845428,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 11
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 81.05640011744909,
        "Surprisingness": 8,
        "Type": "Shallow",
        "User": 12
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 19.751961927869345,
        "Surprisingness": 4,
        "Type": "Shallow",
        "User": 13
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 0.9899494936611699,
        "Surprisingness": 1,
        "Type": "Shallow",
        "User": 14
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 42.013569236616874,
        "Surprisingness": 10,
        "Type": "Shallow",
        "User": 15
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 31.51095047757208,
        "Surprisingness": 7,
        "Type": "Shallow",
        "User": 16
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 11.664475984801035,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 17
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 12.055704044144417,
        "Surprisingness": 2,
        "Type": "Shallow",
        "User": 18
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 4.5659391147933635,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 1
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 9.80549845749822,
        "Surprisingness": 4,
        "Type": "Deep",
        "User": 2
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 4.5659391147933635,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 3
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 4
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 16.54532562387335,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 5
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 96.43416303364695,
        "Surprisingness": 9,
        "Type": "Deep",
        "User": 6
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 13.095793217671089,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 7
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 6.7489110232688665,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 8
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 13.577473991873454,
        "Surprisingness": 4,
        "Type": "Deep",
        "User": 9
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 8.243045553677353,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 10
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 19.305123672227538,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 11
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 10.521777416387406,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 12
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 15.87916244642645,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 13
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 14
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 13.577473991873454,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 15
      },
      {
        "Condition": "Cluster",
        "Dataset": "Police",
        "Distance": 18.9485566732667,
        "Surprisingness": 6,
        "Type": "Deep",
        "User": 16
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 10.73432811125131,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 17
      },
      {
        "Condition": "BFS",
        "Dataset": "Police",
        "Distance": 71.28778156177958,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 18
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 10,
        "Type": "Deep",
        "User": 1
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 19.79898987322333,
        "Surprisingness": 7,
        "Type": "Deep",
        "User": 2
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 3
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 4
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 15.556349186104045,
        "Surprisingness": 6,
        "Type": "Deep",
        "User": 5
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 15.556349186104045,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 6
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 36.76955262170047,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 7
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 22.627416997969522,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 8
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 9
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 7,
        "Type": "Deep",
        "User": 10
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 43.840620433565945,
        "Surprisingness": 7,
        "Type": "Deep",
        "User": 11
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 12
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 13
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 29.698484809834994,
        "Surprisingness": 8,
        "Type": "Deep",
        "User": 14
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 43.840620433565945,
        "Surprisingness": 3,
        "Type": "Deep",
        "User": 15
      },
      {
        "Condition": "BFS",
        "Dataset": "Autism",
        "Distance": 12.727922061357855,
        "Surprisingness": 5,
        "Type": "Deep",
        "User": 16
      },
      {
        "Condition": "Storyboard",
        "Dataset": "Autism",
        "Distance": 12.020815280171307,
        "Surprisingness": 2,
        "Type": "Deep",
        "User": 17
      },
      {
        "Condition": "Cluster",
        "Dataset": "Autism",
        "Distance": 1.4142135623730951,
        "Surprisingness": 1,
        "Type": "Deep",
        "User": 18
      }
    ]
  },
  "repeat": {
    "column": [
      "Distance",
      "Surprisingness"
    ]
  },
  "facet": {
    "column": {
      "field": "Type",
      "type": "nominal",
      "sort":["Shallow","Deep"]
    },
    "row": {
      "field": "Dataset",
      "type": "nominal",
      "sort":["Police","Autism"]
    }
  },
  "spec":{
    "layer":[
    {
    "encoding": {
      "color": {
        "value": "black"
      },
      "x": {
        "aggregate": "mean",
        "axis": {
          "title": "Distance"
        },
        "field": "Distance",
        "scale": {
          "zero": false
        },
        "type": "quantitative"
      },
      "y": {
        "field": "Condition",
        "type": "nominal"
      }
    },
    "mark": {
      "filled": true,
      "type": "point"
    }
  },
   {
    "encoding": {
      "x": {
        "aggregate": "ci0",
        "field": "Distance",
        "type": "quantitative"
      },
      "x2": {
        "aggregate": "ci1",
        "field": "Distance",
        "type": "quantitative"
      },
      "y": {
        "field": "Condition",
        "type": "nominal"
      }
    },
    "mark": "rule"
  }
    ]
  } 
  
}

In [86]:
df =compute_prediction_aggregate_stats("Prediction1(Autism).csv")
df["Type"] = "Shallow"
df["Dataset"] = "Autism"
df2 =compute_prediction_aggregate_stats("Prediction1(Police).csv")
df2["Type"] = "Shallow"
df2["Dataset"] = "Police"

In [89]:
df.groupby("Condition").mean()

Unnamed: 0_level_0,User,NO,YES,Surprisingness
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BFS,9.0,51.816667,48.183333,8.333333
Cluster,9.5,28.333333,71.666667,4.666667
Storyboard,10.0,22.5,77.5,2.5


In [92]:
df.groupby("Condition").count()

Unnamed: 0_level_0,User,NO,YES,Surprisingness,Algorithm,Type,Dataset
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BFS,6,6,6,6,6,6,6
Cluster,6,6,6,6,6,6,6
Storyboard,6,6,6,6,6,6,6


In [91]:
df["Condition"] = df["Algorithm"].apply(lambda x: {1:"Storyboard", 2:'Cluster', 3:'BFS'}[int(x[1])])
df.groupby("Condition").std()

Unnamed: 0_level_0,User,NO,YES,Surprisingness
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BFS,5.477226,4.214459,4.214459,2.250926
Cluster,6.024948,18.073922,18.073922,3.386247
Storyboard,5.477226,6.892024,6.892024,2.345208


In [82]:
df2["Condition"] = df2["Algorithm"].apply(lambda x: {1:"Storyboard", 2:'Cluster', 3:'BFS'}[int(x[1])])
df2.groupby("Condition").std()

Unnamed: 0_level_0,User,ticket,warn,arrest,Surprisingness
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BFS,5.394442,24.678736,10.801929,33.506442,2.949576
Cluster,5.394442,19.452506,8.336666,26.56125,3.141125
Storyboard,5.394442,6.023426,6.183985,6.400521,1.21106


In [None]:
clust = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Police")&(df_Distance_shallow["Condition"]=="Cluster")]["Distance"])
sbd = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Police")&(df_Distance_shallow["Condition"]=="Storyboard")]["Distance"])
bfs = list(df_Distance_shallow[(df_Distance_shallow["Dataset"]=="Police")&(df_Distance_shallow["Condition"]=="BFS")]["Distance"])

In [None]:
print (scipy.stats.mannwhitneyu(bfs,sbd))
print (scipy.stats.mannwhitneyu(clust,sbd))