# Length Limit Analysis

Question: How often students hit the length limit without having a working solution? Find the tasks where this situation happens most often and diagnose them.

In [8]:
# Settings and imports.
%matplotlib inline
from collections import OrderedDict
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import parallel_coordinates
import seaborn as sns
import data

sns.set()
pd.options.display.float_format = '{:.2f}'.format

In [54]:
tasks = data.load('robomission-2018-03-10/tasks.csv')
snapshots = data.load('robomission-2018-03-10/program_snapshots.csv')
task_sessions = data.load('robomission-2018-03-10/task_sessions.csv')

# Only consider sessions with an interaction.
task_sessions = task_sessions[task_sessions.time_spent > 0]

In [56]:
def extract_length(setting_string):
    setting = json.loads(setting_string)
    return setting.get('length', None)

tasks['limit'] = tasks.setting.apply(extract_length)

In [57]:
def compute_length(program):
    if not program or type(program) != str:
        return 0
    length = 0
    test = False
    for c in program:
        length += int(not test and c in 'flrsWRI')
        if c in 'WI':
            test = True
        elif c == '{':
            test = False
    return length
        
snapshots['length'] = snapshots.program.map(compute_length)

In [82]:
def get_task_sessions_with_failed_at_limit():
    max_lengths = snapshots.groupby('task_session').length.max()
    ts = task_sessions.assign(max_length=max_lengths)
    ts = ts.fillna(0)
    ts = ts.merge(tasks, left_on='task', right_index=True)
    ts = ts[['task', 'solved', 'max_length', 'limit']]
    ts = ts[~ts.limit.isna()]
    ts['reached_limit'] = ts.max_length >= ts.limit
    ts['failed_at_limit'] = ts.reached_limit & ~ts.solved
    return ts

ts = get_task_sessions_with_failed_at_limit()
ts.head()

Unnamed: 0_level_0,task,solved,max_length,limit,reached_limit,failed_at_limit
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
28,41,True,4,4.0,True,False
99,41,False,4,4.0,True,True
304,41,True,4,4.0,True,False
404,41,True,4,4.0,True,False
418,41,True,4,4.0,True,False


Sort tasks based on the failed-at-limit rate:

In [86]:
ts_rate = ts.groupby('task').failed_at_limit.mean()
ts_rate.sort_values(ascending=False)

task
81   0.55
77   0.55
65   0.53
15   0.50
59   0.50
82   0.48
9    0.44
35   0.44
4    0.43
62   0.42
85   0.41
74   0.40
27   0.39
33   0.38
78   0.37
40   0.35
5    0.34
60   0.34
64   0.33
16   0.33
83   0.30
28   0.30
32   0.29
48   0.26
55   0.25
43   0.25
42   0.23
63   0.21
54   0.20
46   0.20
     ... 
37   0.16
21   0.14
38   0.14
80   0.12
24   0.12
58   0.12
68   0.11
3    0.10
34   0.10
72   0.10
7    0.10
1    0.09
57   0.09
45   0.08
73   0.08
17   0.08
41   0.08
50   0.08
29   0.08
20   0.07
13   0.07
52   0.06
76   0.05
84   0.05
11   0.05
70   0.04
53   0.02
56   0.02
30   0.00
39   0.00
Name: failed_at_limit, Length: 65, dtype: float64

In [88]:
tasks.iloc[81]

name                                           yellow-squares
level                                                      if
setting     {"length": 4, "fields": [[["b", []], ["b", []]...
solution                                          W!b{fIy{s}}
limit                                                    4.00
Name: 78, dtype: object

In [89]:
tasks.iloc[77]

name                                                 letter-d
level                                         final-challenge
setting     {"length": 6, "fields": [[["b", []], ["b", []]...
solution                                W!b{fIx>3{Iy{r}/{l}}}
limit                                                    6.00
Name: 77, dtype: object

**Result:** Failing at the limit happens a lot. However, from this analysis we are not able to say if the failure happens because of the limit (we don't know if the student intended to add any more blocks).