In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
np.set_printoptions(precision=2)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('precision', 2)

%matplotlib inline
matplotlib.style.use('ggplot')
matplotlib.rcParams['figure.figsize'] = 25, 15

In [2]:
df = pd.read_csv('dream_table_beers_90447168_2017-01-17_11.22.45.csv')

In [3]:
df.head(n=2)

Unnamed: 0,User,Sim,Time,Index,User or Model?,Event,Item,Action,Laser on status,Wavelength,Width,Concentration,Absorption,Detector location,Ruler location,Table,X axis,Y axis,X axis scale,Y axis scale,Experiment #s included,Notes
0,90447168,beers-law-lab,0.0,0,model,initializing,sim,beersLawLab.sim.simStarted,,,,,,,,,,,,,,
1,90447168,beers-law-lab,0.0,2,model,updating state,,,False,780.0,1.0,0.1,,"{u'y': 2, u'x': 6.3}","{u'y': 3.58, u'x': 3.3}",,,,,,,


In [4]:
#list all the events and items to inform action to family dictionary
print df['Event'].unique()
print df['Item'].unique()

['initializing' 'updating state' 'toggle laser' 'dragStarted' 'dragged'
 'dragEnded' 'editing notes' 'recording data' 'expanding table'
 'expanding graph' 'Adding data to graph' 'Selecting Y-axis'
 'Selecting X-axis' 'Changed concentration' 'Removing data from table'
 'Selecting scale of Y-axis']
['sim' nan 'laser button' 'cuvetteNode' 'notepad' 'wavelengthControls'
 'solutionControls' 'detectorNode' 'table' 'graph' 'trialNumber 1'
 'Y-axis dropdown menu' 'X-axis dropdown menu' 'trialNumber 2'
 'trialNumber 3' 'concentration slider' 'trialNumber 4' 'trialNumber 5'
 'trialNumber 6' 'trialNumber 7' 'trialNumber 8' 'trialNumber 9'
 'trialNumber 20' 'trialNumber 19' 'trialNumber 18' 'trialNumber 17'
 'trialNumber 16' 'trialNumber 15' 'trialNumber 14' 'trialNumber 13'
 'trialNumber 12' 'trialNumber 11' 'trialNumber 10'
 'Y-axis scale dropdown menu']


In [5]:
# Define actions families
# I'm not sure what is 'Changed concentration' - is it using the arrows to change concentration? Yes!
action_to_family = {'N':['editing notes'],
                    'M':['recording data'],
                   'Dt':['Removing data from table'],
                    'G':['Adding data to graph','Selecting Y-axis','Selecting X-axis', 'Selecting scale of Y-axis','Selecting scale of X-axis'],
                   'O':['initializing', 'toggle laser','expanding table','collapsing table','expanding graph','collapsing graph'],
                   'St':['updating state']}
variable_actions = ['dragEnded','dragged','dragStarted','Changed concentration']
#events to add: collapsing table and graph, selecting X, removing from graph, move detector (in dragging now)

In [6]:
# Populate family column based on action_to_family dict above
#First we reverse the dictionary to map events to families.
event_to_family = {}
for family, events in action_to_family.iteritems():
    for event in events:
        event_to_family[event]=family

def map_event_family(row):
    event = row['Event']
    item = row['Item']
    if event in variable_actions:
        return 'V'+item[0]
    else:
        return event_to_family[event]

df['Family'] = df[['Event','Item']].apply(map_event_family, axis=1)

In [7]:
# color dict for plotting action timeline plots
colors =  {
    'N':'#969696',
    'M': '#a50f15',
    'Dt': '#4a1486',
    'G': '#c51b8a',
    'O': '#c51b8a',
    'Vc': '#fb6a4a',
    'Vd': '#fb6a4a',
    'Vs': '#fb6a4a',
    'Vw': '#fb6a4a',
    }
family_order = ['O','G','Dt','N','M','Vd','Vw','Vc','Vs']
family_full_names = {'O':'Other',
                    'G':'Graph',
                    'Dt':'Data Table',
                    'N':'Notes',
                    'M':'Measure',
                    'Vd':'Detector',
                    'Vw':'Wavelength',
                    'Vc':'Cuvette',
                    'Vs':'Solution',
                    }

In [8]:
def plot_barGraph_new(df, studentID, family_plot_order, ignore=['St']):
    '''
    This function plots timeline action plots
    '''
    #get blocks of actions and their time coordinate for broken bar plot
    blocks, time_coords = get_blocks_withTime(df, {studentID}, ignore)
    #create figure and axes
    fig, ax = plt.subplots()
    
    #split sequences so that each block is an element of a list
    splitblocks = re.compile(r'([A-Z]{1}[a-z]{0,3})\1*').split(blocks[studentID])
    splitblocks = [s for s in splitblocks if s != ""]

    #grab time coordinates of each blocks from dict 
    time_coord = time_coords[studentID]
    axis_labels = []
    for i,action in enumerate(family_plot_order):
        axis_labels.append(family_full_names[action])
        out = [(x,y) for x,y in zip(splitblocks,time_coord) if action == x]
        _, time_coord_filtered = map(list,zip(*out))  
        ax.broken_barh(time_coord_filtered,(10*i,9),facecolors=colors[action],linewidth=2,edgecolor='k')

    ax.set_ylim(0, len(axis_labels)*10)
    ax.set_xlim(0, time_coord[-1][0]+time_coord[-1][1]+2)
    ax.set_xlabel('Time (s)', fontsize=25)
    ax.xaxis.set_tick_params(labelsize=20)
    ax.set_yticks(range(5,len(axis_labels)*10+15,10))
    ax.set_yticklabels(axis_labels, fontsize=25)
    ax.grid(True)

In [10]:
plot_barGraph_new(df, '12345678', family_plot_order)
#i made linewidth black and thick so we can see thin bars of some families

NameError: name 'family_plot_order' is not defined