In [1]:
from __future__ import annotations
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict, Counter
from collections.abc import Callable
from queue import PriorityQueue
from io import StringIO
import seaborn as sns
from dataclasses import dataclass, field
from typing import Any
pd.set_option("display.max_colwidth", 0)

In [2]:
raw_data = pd.read_csv("play_sessions.csv")
raw_data = raw_data[raw_data.user_id.notnull()]
raw_data = raw_data[raw_data.version == "1.0.3"]
raw_data = raw_data.reset_index()
len(raw_data)

619

In [3]:
class Episode():
    def __init__(self):
        self.passing = False
        self.programming_interface = pd.DataFrame()
        self.episode_data = pd.DataFrame()
        self.program = ""
        self.challenge_name = ""
    def __init__(self, pi:pd.DataFrame, ed:pd.DataFrame, passing:bool, program_rep:str, challenge_name:str):
        self.passing = passing
        self.programming_interface = pi
        self.episode_data = ed
        self.program = program_rep
        self.challenge_name = challenge_name
    def __str__(self):
        return str(ed)
        

In [4]:
def parse_raw_data_frames(row:int) -> pd.DataFrame:
    frames = raw_data.frames[row]
    obj = json.loads(frames)
    for i, o in enumerate(obj):
        obj[i] = json.loads(obj[i])
    session = pd.DataFrame(obj)
    return session

memo_frames = {}

def iter_session_frames():
    for i in raw_data.index:
        if i not in memo_frames:   
            frames = parse_raw_data_frames(i)
            memo_frames[i] = frames
        yield memo_frames[i].copy()

def iter_enum_session_frames(): #TODO: uh make this not a weird copy
    for i in raw_data.index:
        yield i, parse_raw_data_frames(i)

In [5]:
organized_sessions = defaultdict(list)
other_actors = pd.DataFrame()

for i, all_frames in iter_enum_session_frames():
    if len(all_frames) == 0:
        continue
    user_id = raw_data.user_id[i]
    
    episode_list = []
    passing = False
    state = ""
    challenge_name = ""
    curr_prog_interface = ""
    prev_prog_interface = ""
    curr_episode_data = ""
    frame_header = all_frames.columns.to_series().to_frame(1).T.to_csv(index=False).partition("\n")[0]

    for i, frame in all_frames.iterrows():
        if frame.actor == "episode_data":
            if frame.object_name == "challenge_pass":
                passing = True
            if frame.verb == "episode_started":
                challenge_name = frame.object_name
                episode_list.append(Episode(
                    passing=passing, 
                    pi=pd.DataFrame(StringIO(f'{frame_header}\n{prev_prog_interface}')), 
                    ed=pd.DataFrame(StringIO(f'{frame_header}\n{curr_episode_data}')), 
                    program_rep=state, challenge_name=challenge_name))
                state = json.dumps(json.loads(frame.state_info["program"]), sort_keys=True)
                passing = False
                prev_prog_interface = curr_prog_interface
                curr_prog_interface = ""
                curr_episode_data = ""
                
            curr_episode_data = f'{curr_episode_data}{frame.to_frame(1).T.to_csv(header=False, index=False)}'
        elif frame.actor == "programming_interface":
            curr_prog_interface = f'{curr_prog_interface}{frame.to_frame(1).T.to_csv(header=False, index=False)}'
        else:
            other_actors = pd.concat([other_actors, frame.to_frame(1).T],  ignore_index=True, sort=True)
    # record last episode and last program changes
    episode_list.append(Episode(
        passing=passing, 
        pi=pd.DataFrame(StringIO(prev_prog_interface)), 
        ed=pd.DataFrame(StringIO(curr_episode_data)), 
        program_rep=state, challenge_name=challenge_name))
    if len(curr_prog_interface) > 0:
        episode_list.append(Episode(
            passing=False, 
            pi=pd.DataFrame(StringIO(curr_prog_interface)), 
            ed=pd.DataFrame(columns=all_frames.columns), 
            program_rep="", challenge_name=challenge_name))
        
    organized_sessions[user_id].append(episode_list)
    

In [6]:
session_metrics = pd.DataFrame(["user_id", "session_index_for_user", "activity_type", 
                        "num_episodes", "passed", "num_episodes_before_passing_or_quitting"])
activity_metrics = pd.DataFrame(["activity_name", "activity_type", "activity_instructions",
                                 "number_of_sessions", "number_of_passing_sessions",
                                "average_episodes_per_session", "median_episodes_per_session",
                                "min_episodes_per_session", "max_episodes_per_session"])

In [10]:
def failing_attempt_counter(list_of_episodes) -> (int, bool):
    counter = 0
    for episode in list_of_episodes:
        if episode.passing:
            return counter, True
        else:
            counter += 1
    return counter, False

session_metric_rows = []

for uid, sessions in organized_sessions.items():
    for i, session in enumerate(sessions):
        num_episodes_before_passing, passing = failing_attempt_counter(session)
        challengetype = "unknown"
        name = "unknown"
        if len(session) > 0:
            name = session[0].challenge_name
            if "try_it" in name:
                challengetype = "try_it"
            elif "direct_instruction" in name:
                challengetype = "direct_instruction"
            elif "mini_challenge" in name:
                challengetype = "mini_challenge"
            elif "challenge" in name:
                challengetype = "challenge"
            else:
                challengetype = "other"
        row = [uid, i, challengetype, name, len(session), passing, num_episodes_before_passing]
        session_metric_rows.append(row)
        
session_metrics = pd.DataFrame(session_metric_rows, 
                               columns=["user_id", "session_index_for_user", "activity_type", "activity_name",
                        "num_episodes", "passed", "num_episodes_before_passing_or_quitting"])
        
        

In [11]:
session_metrics

Unnamed: 0,user_id,session_index_for_user,activity_type,activity_name,num_episodes,passed,num_episodes_before_passing_or_quitting
0,1923583.0,0,other,spike_curric_turning_in_place_curriculum,2,True,1
1,1923583.0,1,try_it,spike_curric_turning_in_place_left_turn_try_it,2,True,1
2,1923583.0,2,try_it,spike_curric_90_degree_turn_try_it,2,True,1
3,1923583.0,3,other,,1,False,1
4,1923583.0,4,other,,1,False,1
...,...,...,...,...,...,...,...
577,1947126.0,38,try_it,spike_curric_other_turns_sharp_turn_try_it,2,True,1
578,1947126.0,39,mini_challenge,spike_curric_steer_around_the_crater_mini_challenge,21,True,18
579,1947126.0,40,try_it,spike_curric_arm_movement_smaller_movements_try_it,11,True,10
580,1947126.0,41,try_it,spike_curric_arm_movement_getting_stuck_try_it,2,True,1
