In [1]:
import firebase_admin
from firebase_admin import credentials, db
import json

In [2]:
mainDatabase = 'https://intent-system-prolific.firebaseio.com/'
graphDatabase = 'https://task-provenance-database.firebaseio.com/'

In [3]:
cred = credentials.Certificate("intent-system-prolific-firebase-adminsdk-2kz20-2819511b33.json")
firebase_admin.initialize_app(cred,{
            'databaseURL': mainDatabase
        })
graph_global = firebase_admin.initialize_app(cred, {
            'databaseURL': graphDatabase
        },name="GraphDB")

In [4]:
# Helper functions 
def getFromMainDB(path):
  return db.reference(path).get()

def getFromGraphDB(path):
  return db.reference(path, graph_global).get()

In [5]:
with open('trrack-data/study_results_outlier.json') as f:
    prov_out = json.load(f)

In [6]:
def getStateSequence(path):
    data = getFromGraphDB(path)
    root = data['root']
    states = []
    states.append(data['nodes'][root])
    while states[-1].get('children'):
        child = states[-1]['children'][0]
        states.append(data['nodes'][child])
    return states

In [7]:
def cleanStateSequence(nodes):
    clean_states = []
    for node in nodes:
        if node.get('state'):
            if node['state'].get('plots'):
                if node['state']['plots'] != []:
                    state = {}
                    if  node['label'] == 'Add plot: X - Y':
                        state['timestamp'] = node['metadata']['createdOn']
                        state['selection'] = []
                        state['turnedPrediction'] = None
                        clean_states.append(state)
                    elif node['state']['plots'][0].get('selectedPoints'):
                        state['timestamp'] = node['metadata']['createdOn']
                        state['selection'] = node['state']['plots'][0]['selectedPoints']
                        state['turnedPrediction'] = node['state'].get('turnedPrediction')
                        clean_states.append(state)
    return clean_states

In [8]:
extractedData = []
for participant in prov_out:
    data = {}
    data['id'] = participant['data']['participantId']
    data['taskIds'] = list(participant['data']['tasks'].keys())
    data['tasks'] = {}
    for task in data['taskIds']:
        taskdata = participant['data']['tasks'][task]
        path = taskdata['graph']
        data['tasks'][task] = {}
        data['tasks'][task]['accuracy'] = taskdata['accuracy']
        data['tasks'][task]['dataset'] = taskdata['dataset']
        data['tasks'][task]['difficulty'] = taskdata['difficulty']
        data['tasks'][task]['training'] = False if taskdata['training'] == 'no' else True
        data['tasks'][task]['supported'] = True if taskdata['user-driven'] == 'supported' else False
        data['tasks'][task]['autoCompleteUsed'] = taskdata['interactionDetails']['autoCompleteUsed']
        data['tasks'][task]['rankOfPredictionUsed'] = taskdata['interactionDetails']['rankOfPredictionUsed']
        data['tasks'][task]['selectionSequence'] = cleanStateSequence(getStateSequence(path))
    extractedData.append(data)

In [12]:
import numpy as np

In [13]:
with open('trrack-data/outlier_extracted.json', 'w+') as f:
    json.dump(extractedData, f)