In [30]:
import pandas as pd
import numpy as np
import json

In [31]:
dataEvents = pd.read_csv('E:/Documentos/PCEO/5/Informatica/TFG/datos/anonamyze_all_data_collection_v2.csv', sep=";")

In [32]:
pd.options.mode.chained_assignment = None  # default='warn'

orderMapping = {'1. One Box': 1, '2. Separated Boxes': 2, '3. Rotate a Pyramid': 3, '4. Match Silhouettes': 4, '5. Removing Objects': 5, '6. Stretch a Ramp': 6, '7. Max 2 Boxes': 7, '8. Combine 2 Ramps': 8, '9. Scaling Round Objects': 9,'Square Cross-Sections': 10, 'Bird Fez': 11, 'Pi Henge': 12, '45-Degree Rotations': 13,  'Pyramids are Strange': 14, 'Boxes Obscure Spheres': 15, 'Object Limits': 16, 'Warm Up': 17, 'Angled Silhouette': 18, 'Sugar Cones': 19,'Stranger Shapes': 20, 'Tall and Small': 21, 'Ramp Up and Can It': 22, 'More Than Meets Your Eye': 23, 'Not Bird': 24, 'Unnecesary': 25, 'Zzz': 26, 'Bull Market': 27, 'Few Clues': 28, 'Orange Dance': 29, 'Bear Market': 30}


def computeAvgTimes(dataEvents):

    dataEvents['group'] = [json.loads(x)['group'] if 'group' in json.loads(x).keys() else '' for x in dataEvents['data']]
    dataEvents['user'] = [json.loads(x)['user'] if 'user' in json.loads(x).keys() else '' for x in dataEvents['data']]

    # removing those rows where we dont have a group and a user that is not guest
    dataEvents = dataEvents[((dataEvents['group'] != '') & (dataEvents['user'] != '') & (dataEvents['user'] != 'guest'))]
    dataEvents['group_user_id'] = dataEvents['group'] + '~' + dataEvents['user']


    # Data Cleaning
    dataEvents['time'] = pd.to_datetime(dataEvents['time'])
    dataEvents = dataEvents.sort_values('time')

    userPuzzleDict = {}
    theresHoldActivity = 60

    for user in dataEvents['group_user_id'].unique():

        #Select rows
        user_events = dataEvents[dataEvents['group_user_id'] == user]
        userPuzzleDict[user] = {}

        # Analyze when a puzzle has been started
        activePuzzle = None
        previousEvent = None
        activeTime = 0
        initialTime = None

        for enum, event in user_events.iterrows():

            if(event['type'] == 'ws-puzzle_started'):

                initialTime = event['time']
                activePuzzle = json.loads(event['data'])['task_id']
                if(activePuzzle not in userPuzzleDict[user].keys()):
                    userPuzzleDict[user][activePuzzle] = {'completed':0,'avg_complete_time':0}


            # If they are not playing a puzzle we do not do anything and continue
            if(activePuzzle is None):
                continue

            # If it is the first event we store the current event and continue
            if(previousEvent is None):
                previousEvent = event
                continue

            # Add new active time
            delta_seconds = (event['time'] - previousEvent['time']).total_seconds()
            if((delta_seconds < theresHoldActivity)):
                activeTime += delta_seconds

            # If event is puzzle complete we always add it
            if(event['type'] == 'ws-puzzle_complete'):
                puzzleName = json.loads(event['data'])['task_id']
                if(puzzleName in userPuzzleDict[user].keys()):
                    if(userPuzzleDict[user][puzzleName]['completed']==0):
                        userPuzzleDict[user][puzzleName]['avg_complete_time'] += round(activeTime/60,2)
                        userPuzzleDict[user][puzzleName]['completed'] = 1


            # Analyze when puzzle is finished or user left
            # Measure time, attempts, completion and actions
            if(event['type'] in ['ws-puzzle_complete', 'ws-exit_to_menu', 'ws-disconnect']):
                # reset counters
                activeTime = 0
                activePuzzle = None

            previousEvent = event


    stats_by_level_player = []
    for user in userPuzzleDict.keys():
        userDf = pd.DataFrame.from_dict(userPuzzleDict[user], orient = 'index')
        userDf['group_user_id'] = user
        key_split = user.split('~')
        userDf['group'] = key_split[0]
        if (userDf.shape != 0):
            stats_by_level_player.append(userDf)
        else:
            continue

    try:
        stats_by_level_player = pd.concat(stats_by_level_player, sort=True)
        stats_by_level_player['puzzle'] = stats_by_level_player.index
        stats_by_level_player['order'] = stats_by_level_player['puzzle'].map(orderMapping)

        avgTimes = round(stats_by_level_player.groupby(['puzzle','order']).agg({
                                                    'avg_complete_time': lambda x: np.mean(x[x!=0]) })
                               .reset_index(),2).sort_values('order')

        return avgTimes,stats_by_level_player

    except ValueError:
        return -1

In [33]:
pruebas,stats = computeAvgTimes(dataEvents)

avg_complete_time:  0.36 TotalTime:  0.31
avg_complete_time:  1.23 TotalTime:  1.0
avg_complete_time:  4.21 TotalTime:  4.11
avg_complete_time:  1.09 TotalTime:  1.01
avg_complete_time:  1.47 TotalTime:  1.26
avg_complete_time:  6.02 TotalTime:  5.87
avg_complete_time:  6.9 TotalTime:  6.77
avg_complete_time:  2.99 TotalTime:  0.12
avg_complete_time:  0.21 TotalTime:  0.2
avg_complete_time:  0.1 TotalTime:  0.09
avg_complete_time:  0.22 TotalTime:  0.19
avg_complete_time:  14.15 TotalTime:  13.29
avg_complete_time:  3.87 TotalTime:  3.75
avg_complete_time:  0.48 TotalTime:  0.48
avg_complete_time:  1.07 TotalTime:  0.95
avg_complete_time:  3.61 TotalTime:  3.47
avg_complete_time:  1.96 TotalTime:  1.85
avg_complete_time:  9.3 TotalTime:  9.23
avg_complete_time:  0.39 TotalTime:  0.36
avg_complete_time:  0.65 TotalTime:  0.45
avg_complete_time:  0.67 TotalTime:  0.47
avg_complete_time:  0.75 TotalTime:  0.6
avg_complete_time:  7.45 TotalTime:  7.35
avg_complete_time:  4.11 TotalTime:  3

In [34]:
pruebas.to_csv("../Outputs/avgTimeByPuzzleOutput.csv", decimal = ".", sep =";", mode='w')

PermissionError: [Errno 13] Permission denied: '../Outputs/avgTimeByPuzzleOutput2.csv'

In [35]:
stats

Unnamed: 0,TotalTime,avg_complete_time,completed,group,group_user_id,puzzle,order
Sandbox,0.00,0.00,0.0,cb71040b5bd1341a34afc24961536ebd,cb71040b5bd1341a34afc24961536ebd~098f6bcd4621d...,Sandbox,
1. One Box,0.31,0.36,1.0,cb71040b5bd1341a34afc24961536ebd,cb71040b5bd1341a34afc24961536ebd~56ccce25ead83...,1. One Box,1.0
2. Separated Boxes,1.00,1.23,1.0,cb71040b5bd1341a34afc24961536ebd,cb71040b5bd1341a34afc24961536ebd~56ccce25ead83...,2. Separated Boxes,2.0
3. Rotate a Pyramid,4.11,4.21,1.0,cb71040b5bd1341a34afc24961536ebd,cb71040b5bd1341a34afc24961536ebd~56ccce25ead83...,3. Rotate a Pyramid,3.0
4. Match Silhouettes,1.01,1.09,1.0,cb71040b5bd1341a34afc24961536ebd,cb71040b5bd1341a34afc24961536ebd~56ccce25ead83...,4. Match Silhouettes,4.0
...,...,...,...,...,...,...,...
5. Removing Objects,1.64,1.71,1.0,e21640b4aea9349ad77d86d6017cb061,e21640b4aea9349ad77d86d6017cb061~fb504c1409250...,5. Removing Objects,5.0
6. Stretch a Ramp,3.53,3.62,1.0,e21640b4aea9349ad77d86d6017cb061,e21640b4aea9349ad77d86d6017cb061~fb504c1409250...,6. Stretch a Ramp,6.0
7. Max 2 Boxes,0.00,0.00,0.0,e21640b4aea9349ad77d86d6017cb061,e21640b4aea9349ad77d86d6017cb061~fb504c1409250...,7. Max 2 Boxes,7.0
1. One Box,0.47,0.47,1.0,e21640b4aea9349ad77d86d6017cb061,e21640b4aea9349ad77d86d6017cb061~5cce64bf55190...,1. One Box,1.0
