In [1]:
# analyze the session logs from the IPE user study
import numpy as np
import codecs
import re
import json
import os
import copy
import difflib

from collections import defaultdict, Counter

%load_ext autoreload
%autoreload 2

In [2]:
participant_project_settings = {
    'P38': {
        'BASIC': 1,
        'QE': 2,
        'CD': 3,
        'IPE': 4
    },
    'P92': {
        'BASIC': 2,
        'QE': 3,
        'CD': 4,
        'IPE': 1
    },
    'P93': {
        'BASIC': 3,
        'QE': 4,
        'CD': 1,
        'IPE': 2 
    },
    'P94': {
        'BASIC': 4,
        'QE': 1,
        'CD': 2,
        'IPE': 3 
    },
    'P81': {
        'BASIC': 1,
        'QE': 2,
        'CD': 4,
        'IPE': 3
    }
}

In [3]:
LOG_DIR='/home/chris/Desktop/Dropbox/projects/handycat_interactive_pe/user_study_logs'

In [4]:

# Now make a map from participants to all of thier log files 
# Because of the way we collect logs, projects may be duplicated across multiple logfiles for a participant
def logdir_to_participant_log_map(logdir):
    task_map = {'A': 1, 'B': 2, 'C': 3, 'D': 4}
    participant_logfiles = defaultdict(list)
    for participant_id in os.listdir(logdir):
        print('id: {}'.format(participant_id))
        for item in os.listdir(os.path.join(logdir, participant_id)):
            if item.endswith('.json'):
                participant_logfiles[participant_id].append(os.path.join(logdir, participant_id, item))
    
    # parse each logfile to extract its project objects
    nested_dict = lambda: defaultdict(nested_dict)
    participant_logs = nested_dict()
    broken_log_groups = defaultdict(list)
    for p_id, logfiles in participant_logfiles.items():
        print('parsing logs for {}'.format(p_id))
        log_objs = [json.loads(open(f).read()) for f in logfiles]
        print('there are {} raw logs'.format(len(logfiles)))
        # now we need to deduplicate any projects that appear in the logs multiple times
        for log_obj in log_objs:
            for docname, log_entries in log_obj['document'].items():
                # extract the setting from the name
                # names are: `<setting>-Task-<id-letter>`
                project_setting, _, project_identifier = docname.split('-')
                print('docname: {}, project_setting: {}'.format(docname, project_setting))
                project_id = task_map[project_identifier]
                # sanity: just assert that any duplicate entries have the same number of entries
                try:
                    if participant_logs[p_id][project_setting]['segments'] == {}:
                        raise KeyError
                        
                    prev_log = json.dumps(participant_logs[p_id][project_setting]['segments'])         
                    current_log = json.dumps(log_entries['segments'])
                    try:
                        assert prev_log == current_log
                    except AssertionError:
                        broken_log_groups[p_id].append((prev_log, current_log))
                        # use the longer of the two
                        print('new len {}, old len {}'.format(len(current_log), len(prev_log)))

                        if len(current_log) > len(prev_log):
                            participant_logs[p_id][project_setting]['segments'] = log_entries['segments']
        
                except KeyError:
                    participant_logs[p_id][project_setting]['segments'] = log_entries['segments']
                
                # note we just overwrite because duplicates are identical anyway
                participant_logs[p_id][project_setting]['project_id'] = project_id

    # assert all project types exist for all participants
    for p_id, projects in participant_logs.items():
        assert len(projects) == 4
        assert set(['BASIC', 'QE', 'CD', 'IPE']) == set(projects.keys())
       
    return participant_logs, broken_log_groups
 

In [5]:
logs, broken_logs = logdir_to_participant_log_map(LOG_DIR)

id: P93
id: P81
id: P38
id: P94
id: P92
parsing logs for P93
there are 5 raw logs
docname: BASIC-Task-C, project_setting: BASIC
docname: QE-Task-D, project_setting: QE
docname: CD-Task-A, project_setting: CD
docname: IPE-Task-B, project_setting: IPE
docname: BASIC-Task-C, project_setting: BASIC
docname: QE-Task-D, project_setting: QE
docname: CD-Task-A, project_setting: CD
docname: BASIC-Task-C, project_setting: BASIC
docname: BASIC-Task-C, project_setting: BASIC
new len 39719, old len 39719
docname: BASIC-Task-C, project_setting: BASIC
docname: QE-Task-D, project_setting: QE
parsing logs for P81
there are 4 raw logs
docname: BASIC-Task-A, project_setting: BASIC
docname: QE-Task-B, project_setting: QE
docname: CD-Task-D, project_setting: CD
docname: IPE-Task-C, project_setting: IPE
docname: BASIC-Task-A, project_setting: BASIC
new len 17384, old len 17384
docname: BASIC-Task-A, project_setting: BASIC
docname: QE-Task-B, project_setting: QE
docname: BASIC-Task-A, project_setting: BASIC


In [6]:
# broken_logs.keys()

In [7]:
# add function to compare the starting value of a segment with the final value
# see the QE score log analysis notebook
def get_before_after_from_segments(segments):
    ordered_segments = sorted([(int(k), v) for k,v in segments.items()], key=lambda x: x[0])
    segment_before_after = []
    for seg_id, events in ordered_segments:
        action_names = [e['action'] for e in events]
        before = u''
        after = u''
        if 'segment-complete' in action_names:
            # first index of 'change-segment'
            end_event_idx = action_names.index('segment-complete')
            before = events[end_event_idx]['data']['previousValue']
            after = events[end_event_idx]['data']['newValue']

        segment_before_after.append((seg_id, before, after))

    return segment_before_after

In [8]:
def flatten_segment_logs(segment_logs, action_namespace=None):
    flat_actions = []
    for segment_log in segment_logs:
        for seg_id, actions in segment_log.items():
            if action_namespace is not None:
                actions = [a for a in actions if a['action'].split('.')[0] == action_namespace]
            flat_actions.extend(actions)
    return flat_actions


In [9]:
def edit_actions_by_type(segment_logs):
    ipe_actions = flatten_segment_logs(segment_logs, action_namespace='ipe')
    action_counts = Counter()
    action_counts.update([a['action'].split('.')[1] for a in ipe_actions])
    return action_counts
    

In [10]:
def edit_actions_by_setting(user_logs):
    ipe_actions = {}
    for setting, setting_data in user_logs.items():
        print(setting)
        ipe_actions[setting] = Counter([a['action'].split('.')[1]
                                        for a in flatten_segment_logs([setting_data['segments']], 
                                                                      action_namespace='ipe')]) 
    return ipe_actions


In [11]:
user_action_counts = {p_id: edit_actions_by_type([data['segments'] for setting, data in d.items()])
                      for p_id, d in logs.items()}

In [12]:
user_actions_by_setting = {p_id: edit_actions_by_setting(data) for p_id, data in logs.items()}

BASIC
QE
CD
IPE
BASIC
QE
CD
IPE
BASIC
QE
CD
IPE
BASIC
QE
CD
IPE
QE
BASIC
CD
IPE


In [13]:
good_users = ['P38', 'P93', 'P81']

In [14]:
for good_user in good_users:
    print('User ID: {}'.format(good_user))
    print(json.dumps(user_actions_by_setting[good_user], indent=2))
    total_actions = sum([c for s, a in user_actions_by_setting[good_user].items() 
                         for n, c in a.items() if 'server' not in n])
    print('Total IPE actions: {}'.format(total_actions))

User ID: P38
{
  "BASIC": {
    "confirm": 5,
    "delete": 7,
    "replace": 1
  },
  "QE": {},
  "CD": {
    "replace": 13,
    "delete": 3
  },
  "IPE": {
    "replace": 10,
    "delete": 3
  }
}
Total IPE actions: 42
User ID: P93
{
  "BASIC": {
    "confirm": 2,
    "delete": 13,
    "replace": 8,
    "insert": 25
  },
  "QE": {
    "qe_server_response": 51,
    "replace": 12,
    "delete": 13,
    "insert": 12
  },
  "CD": {
    "replace": 8,
    "cd_server_response": 6,
    "insert": 11,
    "delete": 5
  },
  "IPE": {
    "qe_server_response": 55,
    "delete": 5,
    "replace": 9,
    "cd_server_response": 1,
    "insert": 8
  }
}
Total IPE actions: 131
User ID: P81
{
  "BASIC": {
    "insert": 2,
    "delete": 6
  },
  "QE": {
    "qe_server_response": 27,
    "delete": 4,
    "replace": 6,
    "insert": 2
  },
  "CD": {
    "insert": 12,
    "delete": 13,
    "cd_server_response": 9,
    "replace": 6
  },
  "IPE": {
    "qe_server_response": 29,
    "replace": 4,
    "delete"

In [15]:
# Key question: did users edit more or less when there was QE feedback available?
# Key question: did users edit the things that QE annotated more than other things?
#      - how to measure this?
#      - check the token annotation -- is it OK or BAD in the original sequence?
#      - check which tokens were (probably) removed (DEL, or SUB)
#      - we are effectively doing sequence alignment in an edit matrix
#      - see what the TER ops are using one of the QE scripts?
#      - we can use get_opcodes() from difflib, or possibly get_matching_blocks()
# https://docs.python.org/2/library/difflib.html

def get_edit_distance(before, after):
    matcher = difflib.SequenceMatcher(isjunk=None, a=before, b=after)
    return 1. - matcher.ratio()

In [35]:
def show_how_seqs_differ(seq1, seq2):
    matcher = difflib.SequenceMatcher(a=seq1, b=seq2, autojunk=False)
    return matcher.get_opcodes()

In [37]:
ttt = get_before_after_from_segments(logs['P93']['BASIC']['segments'])
seq_a, seq_b = ttt[0][1].split(), ttt[0][2].split()
test_diff = show_how_seqs_differ(seq_a, seq_b)
print(list(test_diff))
for tag, i1, i2, j1, j2 in test_diff:
    print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" % 
           (tag, i1, i2, seq_a[i1:i2], j1, j2, seq_b[j1:j2]))

[('equal', 0, 1, 0, 1), ('replace', 1, 2, 1, 3), ('equal', 2, 5, 3, 6), ('replace', 5, 6, 6, 7), ('equal', 6, 9, 7, 10), ('replace', 9, 10, 10, 11), ('equal', 10, 14, 11, 15)]
  equal a[0:1] (['In']) b[0:1] (['In'])
replace a[1:2] (['Contribute,']) b[1:3] (['Beitragen', ','])
  equal a[2:5] (['klicken', 'Sie', 'auf']) b[3:6] (['klicken', 'Sie', 'auf'])
replace a[5:6] (['Veröffentlichen,']) b[6:7] (['"Veröffentlichen",'])
  equal a[6:9] (['um', 'die', 'Änderungen']) b[7:10] (['um', 'die', 'Änderungen'])
replace a[9:10] (['in']) b[10:11] (['auf'])
  equal a[10:14] (['der', 'Website', 'zu', 'veröffentlichen.']) b[11:15] (['der', 'Website', 'zu', 'veröffentlichen.'])


NameError: name 'a' is not defined

In [29]:
# TODO: mean edit distance per user
# Remember: we have a lot of segments, but just two users
# Remember: we have the before/after segments for more than just two users
#    - especially in the case of QE, this information could be useful 

In [19]:
ttt[:4]

[(0,
  'In Contribute, klicken Sie auf Veröffentlichen, um die Änderungen in der Website zu veröffentlichen.',
  'In Beitragen , klicken Sie auf "Veröffentlichen", um die Änderungen auf der Website zu veröffentlichen.'),
 (1,
  'Der Zeiger des Innenkreises ist die Begrenzung des Werkzeugs in voller Stärke angezeigt.',
  'Der Innenkreis des Zeigers zeigt die Begrenzung des Werkzeugs bei voller Stärke an .'),
 (2,
  'Sie können auch die Reinigungsläsung malen Sie mit einem Airbrush simulieren.',
  'Sie können auch Sprühfarbe mit einem Airbrush simulieren.'),
 (3,
  'Wählen Sie das Slice-Auswahlwerkzeug aus und klicken Sie auf das Segment im Bild.',
  'Wählen Sie das Ausschnittwerkzeug aus und klicken Sie auf das Segment im Bild.')]

In [None]:
# VISUALIZATIONS

In [None]:
import matplotlib.pyplot as plt
import numpy as np


def barplot_action_types(actions_by_setting):
    plt.rcdefaults()
    fig, ax = plt.subplots()

    settings = ['IPE', 'CD', 'QE', 'BASIC']
    action_types = ['insert', 'delete', 'replace']
    action_colors = {
        'insert': 'green',
        'delete': 'red',
        'replace': 'blue'
    }
    
    ind = np.arange(len(actions_by_setting))
    width = 0.2
    
    bar_groups = defaultdict(list)
    # TODO: group label is setting
    for setting in settings:
        action_counts = actions_by_setting[setting]
        for action in action_types:
            if action in action_counts:
                bar_groups[action].append(action_counts[action])
            else:
                bar_groups[action].append(0)
    
    # TODO: colored legend by action type
    for offset, (action, counts) in enumerate(bar_groups.items()):
        ax.barh(ind + (width*offset), counts, width, 
                color=action_colors[action], 
                edgecolor='black',
                linewidth=1,
                label=action)
    
    # Add the axis labels
    #ax.set_ylabel("Editing Time (seconds)")
    #ax.set_xlabel("Sentence Id (sorted by increasing length)")
    ax.set(yticks=ind+(1*width),
           yticklabels=settings,
           ylim=[3*width - 1, len(actions_by_setting)])
    
    # Add a legend
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[::-1], labels[::-1], loc='upper right')
    ax.legend()

barplot_action_types(user_actions_by_setting['P81'])
plt.show()
