In [10]:
import config
from analytics import operation_builder
from analytics.parser import *
from analytics.visualization import *
import pandas as pd
import os

list_of_elem_ops_per_pad = dict()
elemOpsCounter = 0
root_of_dbs = "belgian_experiment/"
for (dirpath, dirnames, filenames) in os.walk(root_of_dbs):
    for filename in filenames:
        if ".db" in filename:
            path_to_db = os.path.join(dirpath, filename)
            list_of_elem_ops_per_main, _ = get_elem_ops_per_pad_from_db(path_to_db=path_to_db, editor='etherpadSQLite3')
            pad_name = path_to_db[len(root_of_dbs):path_to_db.find("data") - 1]
            assert len(list_of_elem_ops_per_main.keys()) == 1
            list_of_elem_ops_per_pad[pad_name] = list_of_elem_ops_per_main['main']

pads, _, elem_ops_treated = operation_builder.build_operations_from_elem_ops(list_of_elem_ops_per_pad,
                                                                             config.maximum_time_between_elem_ops)

for pad_name in pads:
    elemOpsCounter += len(elem_ops_treated[pad_name])
    pad = pads[pad_name]
    # create the paragraphs
    pad.create_paragraphs_from_ops(elem_ops_treated[pad_name])
    # classify the operations of the pad
    pad.classify_operations(length_edit=config.length_edit, length_delete=config.length_delete)
    # find the context of the operation of the pad
    pad.build_operation_context(config.delay_sync, config.time_to_reset_day, config.time_to_reset_break)

print("Loaded %s pads with a total of %s elementary operations" % (str(len(pads)), str(elemOpsCounter)))

Loaded 17 pads with a total of 57147 elementary operations


In [17]:
def compute_overall_op_type(pad, jump=False):
    """
    Compute the type counts of all operations of the pad.

    :param pad:
    :return: DataFrame with the counts
    """
    # Create DataFrame and fill it
    df = pd.DataFrame(columns=('Pad name', 'Types', 'Authors'))
    for i, op in enumerate(pad.operations):
        df.loc[i] = [pad.pad_name, op.type, op.author]
    if not jump:
        df = df[df['Types'] != 'jump']
    return df

In [19]:
counts = []
for pad in pads:
    counts.append(compute_overall_op_type(pads[pad]))
df_counts = pd.concat(counts)
df_counts.head()

Unnamed: 0,Pad name,Types,Authors
0,Group 7_session 1,paste,Etherpad_admin
3,Group 7_session 1,paste,Etherpad_admin
6,Group 7_session 1,paste,Etherpad_admin
9,Group 7_session 1,paste,Etherpad_admin
12,Group 7_session 1,edit,a.hKB2cgaxHJpIurYR


In [76]:
df_types = df_counts.groupby(by=['Pad name', 'Authors', 'Types'])['Types'].count().unstack().fillna(value=0)
df_types = df_types.drop(labels=['Etherpad_admin'], level='Authors')
df_types['total'] = df_types.sum(axis=1)
df_types = df_types.merge(df_types.sum(axis=0, level='Pad name'), left_index=True, right_index=True)
df_prop_types = pd.DataFrame()
df_prop_types['delete'] = df_types['delete_x'].div(df_types['delete_y'])
df_prop_types['edit'] = df_types['edit_x'].div(df_types['edit_y'])
df_prop_types['paste'] = df_types['paste_x'].div(df_types['paste_y'])
df_prop_types['write'] = df_types['write_x'].div(df_types['write_y'])
df_prop_types['total'] = df_types['total_x'].div(df_types['total_y'])
df_prop_types.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,delete,edit,paste,write,total
Pad name,Authors,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Group 10_session 1,a.5jgzYp1oOxBDGwcJ,0.085714,0.193252,0.793651,0.107438,0.236697
Group 10_session 1,a.hDqEuezMoA7CVUDG,0.085714,0.113497,0.031746,0.181818,0.117431
Group 10_session 1,a.nEzl5qtUhUYiw9nz,0.457143,0.220859,0.111111,0.31405,0.244037
Group 10_session 1,a.w8mseKIkEjIt0fVP,0.371429,0.472393,0.063492,0.396694,0.401835
Group 11_session 1,a.1PFVWhCNTiGYVIaY,0.2,0.235849,0.09434,0.311688,0.228883


In [24]:
props = []
for pad in pads:
    # Compute author proportions
    authors, proportions = pads[pad].author_proportions(considerate_admin=True)

    # Transform the array in dataframe
    df = pd.DataFrame({'Participation proportion': proportions,
                        'Authors': authors,
                        'Pad name': pad                       
                        })
    props.append(df)
df_props = pd.concat(props)

In [25]:
df_props

Unnamed: 0,Authors,Pad name,Participation proportion
0,Etherpad_admin,Group 7_session 1,0.013654
1,a.hKB2cgaxHJpIurYR,Group 7_session 1,0.129772
2,a.wWrUOeC3T2PzYMDA,Group 7_session 1,0.111665
3,a.vW98tv9WbfStDxTq,Group 7_session 1,0.256439
4,a.LPP4s5Fk5fxHu5eI,Group 7_session 1,0.488470
0,Etherpad_admin,Group 1_session 1,0.012597
1,a.xXHBMVqjjuOxqJnC,Group 1_session 1,0.137887
2,a.2l4IIltwVpyA4sxR,Group 1_session 1,0.000757
3,a.rsAvO60QPs9tT4II,Group 1_session 1,0.423690
4,a.rcMsQlKrM3M22Scp,Group 1_session 1,0.312483
