# Observations on the data

## Remaining questions
- Are the test stats correct?  For example:
    - Do the summary stats agree with the detailed data for each test? NO !!!
    - Is all_correct calculated correctly 'within' the summary stats?
- Are there duplicates?
- What is the structure of a question?

## The data:
- Overall structure
    + exam
        + question
            + result
- Contains 1 object per test administered per student
- Each object contains metadata about the test and the results

## Key Fields
- Exam ID
- Exam_name = string
- Certification = boolean
- ['sequences']['counts'] = 7 fields that provide summary of test outcomes for the student, for example: number of questions, number correct, number wrong




In [239]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import SVG

import json

In [240]:
with open('./assessment-attempts-20180128-121051-nested.json') as json_file:
    data = json.load(json_file)

In [241]:
len(data)

3280

In [242]:
data[0]

{'keen_timestamp': '1516717442.735266',
 'max_attempts': '1.0',
 'started_at': '2018-01-23T14:23:19.082Z',
 'base_exam_id': '37f0a30a-7464-11e6-aa92-a8667f27e5dc',
 'user_exam_id': '6d4089e4-bde5-4a22-b65f-18bce9ab79c8',
 'sequences': {'questions': [{'user_incomplete': True,
    'user_correct': False,
    'options': [{'checked': True,
      'at': '2018-01-23T14:23:24.670Z',
      'id': '49c574b4-5c82-4ffd-9bd1-c3358faf850d',
      'submitted': 1,
      'correct': True},
     {'checked': True,
      'at': '2018-01-23T14:23:25.914Z',
      'id': 'f2528210-35c3-4320-acf3-9056567ea19f',
      'submitted': 1,
      'correct': True},
     {'checked': False,
      'correct': True,
      'id': 'd1bf026f-554f-4543-bdd2-54dcf105b826'}],
    'user_submitted': True,
    'id': '7a2ed6d3-f492-49b3-b8aa-d080a8aad986',
    'user_result': 'missed_some'},
   {'user_incomplete': False,
    'user_correct': False,
    'options': [{'checked': True,
      'at': '2018-01-23T14:23:30.116Z',
      'id': 'a35d0e

In [243]:
# already verified that all have sequence field
len_sequences = []
len_questions = []

for item in data:
    len_sequences.append(len(item['sequences']))
    len_questions.append(len(item['sequences']['questions']))

print('# sequences range from', min(len_sequences), 'to', max(len_sequences))
print('# questions per sequence range from', min(len_questions), 'to', max(len_questions))
print('# questions total', sum(len_questions))

# sequences range from 3 to 4
# questions per sequence range from 1 to 20
# questions total 14717


In [244]:
for item in data[0]['sequences']:
    print(item)

questions
attempt
id
counts


In [245]:
# determine number of assessments that have fewer than 4 sequence entries
outliers = []
for item in data:
    if len(item['sequences']) < 4:
        outliers.append(item)

print(len(outliers), 'assessments have different number of sequences out of total of', len(len_sequences))

5 assessments have different number of sequences out of total of 3280


In [246]:
# this is to examine the assessments that have fewer than 4 sequence entries, trying to understand the difference
for item in outliers:
    print(item, '\n')

{'keen_timestamp': '1515770320.8660779', 'max_attempts': '1.0', 'started_at': '2018-01-12T15:18:36.145Z', 'base_exam_id': 'example-id', 'user_exam_id': '2c6eab33-a584-464e-b020-d5822653c46f', 'sequences': {'attempt': 1, 'id': '9b2f7400-91fd-4e7c-b19e-0ce20d2bc675', 'questions': [{'options': [{'checked': False, 'correct': True, 'id': 'e3b27b55-3818-4721-abe3-0fd4aee9c0ab'}, {'checked': False, 'id': '8001e149-88c1-43ef-970a-0c6108035e94'}, {'checked': True, 'at': '2018-01-12T15:18:39.167Z', 'id': '66dcb9d1-3692-4327-b62b-167c72f52949'}], 'id': '69e7297c-1433-4e9f-8585-5f09c78ddbad'}, {'options': [{'checked': False, 'id': '55eed25e-8cd8-46fc-9ea4-0b7377ce425d'}, {'checked': False, 'correct': True, 'id': '4a329ac6-052e-45db-a0a5-b2941a62d5cf'}], 'id': '3686549f-5f1d-4875-a42b-663c402fc9ed'}, {'options': [{'checked': False, 'id': '53455293-2275-4c16-8fed-da19d47cfd18'}, {'checked': False, 'correct': True, 'id': '94d7c6c3-1212-45da-b1ce-4ea1c91df3dc'}], 'id': 'f47b04b5-5f44-45a7-9605-635461e

All 5 had the same description "Example Exam For Development and Testing oh yeahsdf".
This implies these 5 are not for production and should therefore be removed from the dataset.

In [247]:
# checking for other outliers
others = []
for item in data:
    if item['exam_name'].find('yeahsdf') > -1:
        others.append(item)
    elif item['exam_name'].find('Example Exam') > -1:
        others.append(item)
    elif item['base_exam_id'].find('example') > -1:
        others.append(item)

print(len(others))

5


So based on this there are only 5 "development" assessments to ignore

In [330]:
df_questions = pd.DataFrame()
assessment_ids = []
exam_ids = []
exam_started = []
exam_names = []
questions = []
for item in data:
    if item['exam_name'].find('yeahsdf') > -1:
        continue
    for question in item['sequences']['questions']:
        assessment_ids.append(item['keen_id'])
        exam_ids.append(item['base_exam_id'])
        exam_started.append(item['started_at'])
        exam_names.append(item['exam_name'])
#        exams.append(item['exam_name'])
        questions.append(question)

In [331]:
df_questions = pd.DataFrame()
df_questions['assessment_id'] = assessment_ids
df_questions['exam_id'] = exam_ids
df_questions['started_at'] = exam_started
df_questions['name'] = exam_names
df_questions['question'] = questions

In [332]:
df_questions

Unnamed: 0,assessment_id,exam_id,started_at,name,question
0,5a6745820eb8ab00016be1f1,37f0a30a-7464-11e6-aa92-a8667f27e5dc,2018-01-23T14:23:19.082Z,Normal Forms and All That Jazz Master Class,"{'user_incomplete': True, 'user_correct': Fals..."
1,5a6745820eb8ab00016be1f1,37f0a30a-7464-11e6-aa92-a8667f27e5dc,2018-01-23T14:23:19.082Z,Normal Forms and All That Jazz Master Class,"{'user_incomplete': False, 'user_correct': Fal..."
2,5a6745820eb8ab00016be1f1,37f0a30a-7464-11e6-aa92-a8667f27e5dc,2018-01-23T14:23:19.082Z,Normal Forms and All That Jazz Master Class,"{'user_incomplete': False, 'user_correct': Tru..."
3,5a6745820eb8ab00016be1f1,37f0a30a-7464-11e6-aa92-a8667f27e5dc,2018-01-23T14:23:19.082Z,Normal Forms and All That Jazz Master Class,"{'user_incomplete': False, 'user_correct': Tru..."
4,5a674541ab6b0a0001c6e723,37f0a30a-7464-11e6-aa92-a8667f27e5dc,2018-01-23T14:21:47.505Z,Normal Forms and All That Jazz Master Class,"{'user_incomplete': False, 'user_correct': Tru..."
...,...,...,...,...,...
14697,5a3a3df2448eb200012a2efd,a62e5d35-75e9-11e6-8197-9801a7c3b233,2017-12-20T10:38:26.490Z,Learning Linux System Administration,"{'user_incomplete': False, 'user_correct': Tru..."
14698,5a3a3df2448eb200012a2efd,a62e5d35-75e9-11e6-8197-9801a7c3b233,2017-12-20T10:38:26.490Z,Learning Linux System Administration,"{'user_incomplete': False, 'user_correct': Tru..."
14699,5a3a3df2448eb200012a2efd,a62e5d35-75e9-11e6-8197-9801a7c3b233,2017-12-20T10:38:26.490Z,Learning Linux System Administration,"{'user_incomplete': False, 'user_correct': Tru..."
14700,5a3a3df2448eb200012a2efd,a62e5d35-75e9-11e6-8197-9801a7c3b233,2017-12-20T10:38:26.490Z,Learning Linux System Administration,"{'user_incomplete': False, 'user_correct': Fal..."


In [251]:
#plt.hist(df_questions['started_at'])

In [252]:
df_questions[110:120]['name']

110                Learning iPython Notebook
111                   Introduction to Python
112                   Introduction to Python
113                   Introduction to Python
114                   Introduction to Python
115                   Introduction to Python
116    Beginning Programming with JavaScript
117    Beginning Programming with JavaScript
118    Beginning Programming with JavaScript
119    Beginning Programming with JavaScript
Name: name, dtype: object

In [253]:
data[5]['sequences']['attempt']

1

In [254]:
len_counts = []

for item in data:
    if item['exam_name'].find('yeahsdf') > -1:
        continue

    len_counts.append(len(item['sequences']['counts']))

print('# counts range from', min(len_counts), 'to', max(len_counts))


# counts range from 7 to 7


In [255]:
data[0]['sequences']['counts']

{'incomplete': 1,
 'submitted': 4,
 'incorrect': 1,
 'all_correct': False,
 'correct': 2,
 'total': 4,
 'unanswered': 0}

In [260]:
df_exams = pd.DataFrame()
exam_ids = []
assessment_ids = []
exam_started = []
exam_names = []
certs = []
attempts = []
counts = []
questions = []
for item in data:
    if item['exam_name'].find('yeahsdf') > -1:
        continue
    exam_ids.append(item['base_exam_id'])
    assessment_ids.append(item['keen_id'])
    exam_started.append(item['started_at'])
    exam_names.append(item['exam_name'])
    certs.append(item['certification'])
    attempts.append(item['sequences']['attempt'])
    counts.append(item['sequences']['counts'])
    questions.append(item['sequences']['questions'])
    

In [261]:
print('# attempts per exam range from', min(attempts), 'to', max(attempts))

# attempts per exam range from 1 to 1


In [262]:
df_exams = pd.DataFrame()
df_exams['assessment_id'] = assessment_ids
df_exams['exam_id'] = exam_ids
df_exams['started_at'] = exam_started
df_exams['name'] = exam_names
df_exams['certification'] = certs
df_exams['attempts'] = attempts

df_counts = pd.DataFrame(counts)
df_exams = df_exams.join(df_counts)

In [263]:
df_exams.tail()

Unnamed: 0,assessment_id,exam_id,started_at,name,certification,attempts,incomplete,submitted,incorrect,all_correct,correct,total,unanswered
3270,5a3a4a5e63cb1c0001c01a0a,8b4488de-43a5-4ffa-bf82-af1e19ee1b64,2017-12-20T11:32:23.262Z,Learning Git,False,1,0,5,0,True,5,5,0
3271,5a3a4a0eee88b10001cfb0a3,8b4488de-43a5-4ffa-bf82-af1e19ee1b64,2017-12-20T11:29:09.015Z,Learning Git,False,1,0,5,2,False,3,5,0
3272,5a3a4a3ef646fe0001f73283,8b4488de-43a5-4ffa-bf82-af1e19ee1b64,2017-12-20T11:31:46.949Z,Learning Git,False,1,0,5,1,False,4,5,0
3273,5a3a3f6b8c7fa00001d2a03e,f80366d9-db60-41c3-a1c4-6c7789b478f8,2017-12-20T10:44:09.162Z,"I'm a Software Architect, Now What?",False,1,1,4,0,False,3,4,0
3274,5a3a3df2448eb200012a2efd,a62e5d35-75e9-11e6-8197-9801a7c3b233,2017-12-20T10:38:26.490Z,Learning Linux System Administration,False,1,0,8,2,False,6,8,0


# Other

In [124]:
df_questions = pd.DataFrame(questions)

In [125]:
df_questions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,"{'user_incomplete': True, 'user_correct': Fals...","{'user_incomplete': False, 'user_correct': Fal...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...",,,,,,,,,,,,,,,,
1,"{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': True, 'user_correct': Fals...","{'user_incomplete': False, 'user_correct': Fal...","{'user_incomplete': True, 'user_correct': Fals...",,,,,,,,,,,,,,,,
2,"{'user_incomplete': False, 'user_correct': Fal...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...",,,,,,,,,,,,,,,,
3,"{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': True, 'user_correct': Fals...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': True, 'user_correct': Fals...",,,,,,,,,,,,,,,,
4,"{'user_incomplete': False, 'user_correct': Fal...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...",,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3270,"{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...",,,,,,,,,,,,,,,
3271,"{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Fal...","{'user_incomplete': False, 'user_correct': Fal...","{'user_incomplete': False, 'user_correct': Tru...",,,,,,,,,,,,,,,
3272,"{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Fal...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...",,,,,,,,,,,,,,,
3273,"{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': False, 'user_correct': Tru...","{'user_incomplete': True, 'user_correct': Fals...","{'user_incomplete': False, 'user_correct': Tru...",,,,,,,,,,,,,,,,


In [134]:
len(df_exams[df_exams['name'].str.contains('Learning Git')==True])

394

In [168]:
# show fields for each question
for blah in questions[0][0]:
    print(blah)

user_incomplete
user_correct
options
user_submitted
id
user_result


In [169]:
len(questions)

3275

In [166]:
for blah in questions[0]:
    for blahblah in blah:
        print(blahblah, blah[blahblah])
    for opt in blah['options']:
        print(opt)
    print('\n')

user_incomplete True
user_correct False
options [{'checked': True, 'at': '2018-01-23T14:23:24.670Z', 'id': '49c574b4-5c82-4ffd-9bd1-c3358faf850d', 'submitted': 1, 'correct': True}, {'checked': True, 'at': '2018-01-23T14:23:25.914Z', 'id': 'f2528210-35c3-4320-acf3-9056567ea19f', 'submitted': 1, 'correct': True}, {'checked': False, 'correct': True, 'id': 'd1bf026f-554f-4543-bdd2-54dcf105b826'}]
user_submitted True
id 7a2ed6d3-f492-49b3-b8aa-d080a8aad986
user_result missed_some
{'checked': True, 'at': '2018-01-23T14:23:24.670Z', 'id': '49c574b4-5c82-4ffd-9bd1-c3358faf850d', 'submitted': 1, 'correct': True}
{'checked': True, 'at': '2018-01-23T14:23:25.914Z', 'id': 'f2528210-35c3-4320-acf3-9056567ea19f', 'submitted': 1, 'correct': True}
{'checked': False, 'correct': True, 'id': 'd1bf026f-554f-4543-bdd2-54dcf105b826'}


user_incomplete False
user_correct False
options [{'checked': True, 'at': '2018-01-23T14:23:30.116Z', 'id': 'a35d0e80-8c49-415d-b8cb-c21a02627e2b', 'submitted': 1}, {'checked

In [237]:
total_correct = 0
total_wrong = 0
idx = 1
cnt_questions = len(questions[idx])
for exam in questions[idx]:
    cnt_correct = 0
    cnt_wrong = 0
    for option in exam['options']:
        if ('submitted' not in option):
            continue
        if ('correct' in option):
            if option['correct']:
                cnt_correct += 1
            else:
                cnt_wrong += 1
        else:
            cnt_wrong += 1
        print(option, cnt_correct)
    print('\n')
    if (cnt_correct > 0):
        total_correct += 1
    if (cnt_wrong > 0):
        total_wrong += 1

print('Detail\tQuestions = ', cnt_questions, 'Correct = ', total_correct, '\tIncorrect = ', total_wrong)
print('Summary\tQuestions = ', counts[idx]['total'], 'Correct = ', counts[idx]['correct'], '\tIncorrect = ', counts[idx]['incorrect'])

{'checked': True, 'at': '2018-01-23T14:22:04.301Z', 'id': '7f13df9c-fcbe-4424-914f-2206f106765c', 'submitted': 1, 'correct': True} 1


{'checked': True, 'at': '2018-01-23T14:22:24.525Z', 'id': '7e0b639a-2ef8-4604-b7eb-5018bd81a91b', 'submitted': 1, 'correct': True} 1


{'checked': True, 'at': '2018-01-23T14:22:39.396Z', 'id': 'c185ecdb-48fb-4edb-ae4e-0204ac7a0909', 'submitted': 1, 'correct': True} 1
{'checked': True, 'at': '2018-01-23T14:22:35.081Z', 'id': '77a66c83-d001-45cd-9a5a-6bba8eb7389e', 'submitted': 1, 'correct': True} 2
{'checked': True, 'at': '2018-01-23T14:22:48.197Z', 'id': 'a9333679-de9d-41ff-bb3d-b239d6b95732', 'submitted': 1} 2


{'checked': True, 'at': '2018-01-23T14:22:55.494Z', 'id': '49c574b4-5c82-4ffd-9bd1-c3358faf850d', 'submitted': 1, 'correct': True} 1


Detail	Questions =  4 Correct =  4 	Incorrect =  1
Summary	Questions =  4 Correct =  1 	Incorrect =  1


In [235]:
questions[idx]

[{'user_incomplete': False,
  'user_correct': True,
  'options': [{'checked': False, 'id': '62feee6e-9b76-4123-bd9e-c0b35126b1f1'},
   {'checked': True,
    'at': '2018-01-23T14:22:04.301Z',
    'id': '7f13df9c-fcbe-4424-914f-2206f106765c',
    'submitted': 1,
    'correct': True},
   {'checked': False, 'id': '2c29e8e8-d4a8-406e-9cdf-de28ec5890fe'},
   {'checked': False, 'id': '59b9fc4b-f239-4850-b1f9-912d1fd3ca13'}],
  'user_submitted': True,
  'id': '95194331-ac43-454e-83de-ea8913067055',
  'user_result': 'correct'},
 {'user_incomplete': True,
  'user_correct': False,
  'options': [{'checked': True,
    'at': '2018-01-23T14:22:24.525Z',
    'id': '7e0b639a-2ef8-4604-b7eb-5018bd81a91b',
    'submitted': 1,
    'correct': True},
   {'checked': False,
    'correct': True,
    'id': 'bccd6e2e-2cef-4c72-8bfa-317db0ac48bb'},
   {'checked': False, 'id': 'a35d0e80-8c49-415d-b8cb-c21a02627e2b'}],
  'user_submitted': True,
  'id': 'bbed4358-999d-4462-9596-bad5173a6ecb',
  'user_result': 'misse

# Checking for duplicates

In [266]:
df_exams['assessment_id'].duplicated().value_counts()

False    3237
True       38
Name: assessment_id, dtype: int64

In [343]:
dup_ids = df_exams['assessment_id'][df_exams['assessment_id'].duplicated(keep=False)]
df_dupes = pd.DataFrame()
for dupe in dup_ids:
    df_dupes = df_dupes.append(df_exams[df_exams['assessment_id']==dupe])

In [344]:
df_dupes

Unnamed: 0,assessment_id,exam_id,started_at,name,certification,attempts,incomplete,submitted,incorrect,all_correct,correct,total,unanswered
461,5a4106a38149bb0001436a3d,8b4488de-43a5-4ffa-bf82-af1e19ee1b64,2017-12-25T14:08:26.602Z,Learning Git,false,1,0,5,0,True,5,5,0
1053,5a4106a38149bb0001436a3d,8b4488de-43a5-4ffa-bf82-af1e19ee1b64,2017-12-25T14:08:26.602Z,Learning Git,false,1,0,5,0,True,5,5,0
3118,5a4106a38149bb0001436a3d,8b4488de-43a5-4ffa-bf82-af1e19ee1b64,2017-12-25T14:08:26.602Z,Learning Git,false,1,0,5,0,True,5,5,0
463,5a413176f21cc20001f47b56,1a233da8-e6e5-48a6-8c3c-806e312cce12,2017-12-25T17:11:51.136Z,Intermediate Python Programming,false,1,1,4,1,False,2,4,0
3120,5a413176f21cc20001f47b56,1a233da8-e6e5-48a6-8c3c-806e312cce12,2017-12-25T17:11:51.136Z,Intermediate Python Programming,false,1,1,4,1,False,2,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
828,5a40a0cbdd34dc00014d8d64,94b741b2-fc67-4db4-adc2-aafae130848f,2017-12-25T06:54:21.690Z,Intermediate C# Programming,false,1,0,4,0,True,4,4,0
1032,5a40a0cbdd34dc00014d8d64,94b741b2-fc67-4db4-adc2-aafae130848f,2017-12-25T06:54:21.690Z,Intermediate C# Programming,false,1,0,4,0,True,4,4,0
3119,5a40a0cbdd34dc00014d8d64,94b741b2-fc67-4db4-adc2-aafae130848f,2017-12-25T06:54:21.690Z,Intermediate C# Programming,false,1,0,4,0,True,4,4,0
463,5a413176f21cc20001f47b56,1a233da8-e6e5-48a6-8c3c-806e312cce12,2017-12-25T17:11:51.136Z,Intermediate Python Programming,false,1,1,4,1,False,2,4,0


In [345]:
df_dupes.duplicated(keep=False).value_counts()

True    170
dtype: int64

In [348]:
df_diff_dupes = df_dupes[df_dupes.duplicated(keep=False)==False]

In [349]:
df_diff_dupes

Unnamed: 0,assessment_id,exam_id,started_at,name,certification,attempts,incomplete,submitted,incorrect,all_correct,correct,total,unanswered


In [354]:
import json
for id in dup_ids[-1:]:
    print(df_questions[df_questions['assessment_id']==id]['question'].values, '\n\n')

[{'user_incomplete': False, 'user_correct': True, 'options': [{'checked': False, 'id': '7daed693-eaa0-4299-ac31-072fb9d4cd86'}, {'checked': False, 'at': '2017-12-25T17:10:23.841Z', 'id': 'b19c8abe-e2d1-4af1-824a-d2c10be41446'}, {'checked': False, 'id': '338cf4a1-e8ab-4751-9892-ee1cd0ee52d4'}, {'checked': True, 'at': '2017-12-25T17:10:26.415Z', 'id': 'fc820a68-189f-48c3-a5f6-f72775ae1985', 'submitted': 1, 'correct': True}], 'user_submitted': True, 'id': '32fe7d8d-6d89-4db4-a17a-a368c5ea3ca0', 'user_result': 'correct'}
 {'user_incomplete': False, 'user_correct': False, 'options': [{'checked': False, 'id': '1284a320-bf24-4dfe-93d6-078a34731db3'}, {'checked': False, 'id': 'c3e76204-878f-4afb-85b9-1c0e408dd8e5'}, {'checked': False, 'correct': True, 'id': '2da628dc-44bb-4d27-a3f2-9198c793fb51'}, {'checked': True, 'at': '2017-12-25T17:10:35.776Z', 'id': '000db74a-f463-42e0-b07f-219653da78f1', 'submitted': 1}], 'user_submitted': True, 'id': '5c34cf19-8cfd-4f56-91c2-0a109dc990b9', 'user_result'

In [322]:
e1 = data[data[0]['keen_id']==dup_ids.values[0]]
e2 = data[data[0]['keen_id']==dup_ids.values[-2]]

In [329]:
data['keen_id']

TypeError: list indices must be integers or slices, not str

In [323]:
e1 == e2

True

In [324]:
e1

{'keen_timestamp': '1516717442.735266',
 'max_attempts': '1.0',
 'started_at': '2018-01-23T14:23:19.082Z',
 'base_exam_id': '37f0a30a-7464-11e6-aa92-a8667f27e5dc',
 'user_exam_id': '6d4089e4-bde5-4a22-b65f-18bce9ab79c8',
 'sequences': {'questions': [{'user_incomplete': True,
    'user_correct': False,
    'options': [{'checked': True,
      'at': '2018-01-23T14:23:24.670Z',
      'id': '49c574b4-5c82-4ffd-9bd1-c3358faf850d',
      'submitted': 1,
      'correct': True},
     {'checked': True,
      'at': '2018-01-23T14:23:25.914Z',
      'id': 'f2528210-35c3-4320-acf3-9056567ea19f',
      'submitted': 1,
      'correct': True},
     {'checked': False,
      'correct': True,
      'id': 'd1bf026f-554f-4543-bdd2-54dcf105b826'}],
    'user_submitted': True,
    'id': '7a2ed6d3-f492-49b3-b8aa-d080a8aad986',
    'user_result': 'missed_some'},
   {'user_incomplete': False,
    'user_correct': False,
    'options': [{'checked': True,
      'at': '2018-01-23T14:23:30.116Z',
      'id': 'a35d0e

In [325]:
e2

{'keen_timestamp': '1516717442.735266',
 'max_attempts': '1.0',
 'started_at': '2018-01-23T14:23:19.082Z',
 'base_exam_id': '37f0a30a-7464-11e6-aa92-a8667f27e5dc',
 'user_exam_id': '6d4089e4-bde5-4a22-b65f-18bce9ab79c8',
 'sequences': {'questions': [{'user_incomplete': True,
    'user_correct': False,
    'options': [{'checked': True,
      'at': '2018-01-23T14:23:24.670Z',
      'id': '49c574b4-5c82-4ffd-9bd1-c3358faf850d',
      'submitted': 1,
      'correct': True},
     {'checked': True,
      'at': '2018-01-23T14:23:25.914Z',
      'id': 'f2528210-35c3-4320-acf3-9056567ea19f',
      'submitted': 1,
      'correct': True},
     {'checked': False,
      'correct': True,
      'id': 'd1bf026f-554f-4543-bdd2-54dcf105b826'}],
    'user_submitted': True,
    'id': '7a2ed6d3-f492-49b3-b8aa-d080a8aad986',
    'user_result': 'missed_some'},
   {'user_incomplete': False,
    'user_correct': False,
    'options': [{'checked': True,
      'at': '2018-01-23T14:23:30.116Z',
      'id': 'a35d0e

In [326]:
dup_ids

825     5a4092ebdd97a30001a25bbb
827     5a40a188e258670001e8b2aa
829     5a40a15f08a89200018f3e1f
830     5a409fa0ea6e4b0001bc37f9
832     5a4130e6d919850001113a15
836     5a40b8baa87570000139bdb5
1030    5a4092ebdd97a30001a25bbb
1031    5a409fe6e433fd000152ae37
1032    5a40a0cbdd34dc00014d8d64
1033    5a40a15f08a89200018f3e1f
1034    5a409fa0ea6e4b0001bc37f9
1035    5a40a093b8237a000177d566
1036    5a40a00a81caf700019f4c3e
1053    5a4106a38149bb0001436a3d
1934    5a409fe6e433fd000152ae37
1935    5a40a1b45682ed0001fa29f5
1936    5a40a093b8237a000177d566
1937    5a40a244448eb2000193e31a
1938    5a40a20e475ca8000162d446
1939    5a40b8baa87570000139bdb5
1940    5a40a00a81caf700019f4c3e
2142    5a40162fe674e700011b223a
2143    5a4022f356c2d3000137a7cf
2144    5a40230c852c2a00018d4bd9
2145    5a401940fc03f00001313d9f
2146    5a403621a9207c00010fa8f3
2552    5a40162fe674e700011b223a
2553    5a4022f356c2d3000137a7cf
2554    5a40230c852c2a00018d4bd9
2556    5a401940fc03f00001313d9f
2693    5a