# SRFREW-EDA

In [237]:
import os 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import altair as alt

alt.data_transformers.enable("vegafusion");

In [238]:
assignments = pd.read_csv(os.path.join("data", "assignments.csv"))
discussion_topics = pd.read_csv(os.path.join("data", "discussion_topics.csv"))
discussions = pd.read_csv(os.path.join("data", "discussions.csv"))
enrollments = pd.read_csv(os.path.join("data", "enrollments.csv"))
files = pd.read_csv(os.path.join("data", "files.csv"))
gradebook = pd.read_csv(os.path.join("data", "gradebook.csv"))
module_items = pd.read_csv(os.path.join("data", "module_items.csv"))
navigation_events = pd.read_csv(os.path.join("data", "navigation_events.csv"))
pages = pd.read_csv(os.path.join("data", "pages.csv"))

In [239]:

gradebook.loc[:, gradebook.columns != "Student"] = gradebook.loc[:, gradebook.columns != "Student"].apply(pd.to_numeric)

ValueError: Unable to parse string "Manual Posting" at position 0

In [240]:
# gradebook 
gradebook.drop(index=[0,1], inplace=True)
gradebook.loc[:, gradebook.columns != "Student"] = gradebook.loc[:, gradebook.columns != "Student"].apply(pd.to_numeric)

# drop strange/na columns
navigation_events.drop(columns=["ed_app", "type", "action", "course_offering_id", "statement_type", "statement_version", "event__object_type", "event__object_extensions_asset_subtype", "event__object_extensions_entity_id", "event__referrer", "event__extensions_request_url", "event__attachment_type"], inplace=True)

# drop learners that are not enrolled in the course
navigation_events = navigation_events.iloc[np.in1d(navigation_events.actor_id, enrollments.user_id), :]
navigation_events = navigation_events.query("actor_id != 'LEARNER_48'")
                       
navigation_events.event_time = pd.to_datetime(navigation_events.event_time, format='ISO8601').dt.normalize()

ID MAP: 
`object_id` maps to: 
- `discussion_topics.id`
- `discussions.discussion_topic_id`
- `assignments.id`
- `files.id`
- `pages.id`


Does NOT map to: 
- anything in `module_items`
- anything in `gradebook`

In [129]:
object_id_map = pd.concat([discussion_topics.loc[:,["id", "title"]],
           assignments.loc[:,["id", "name"]].rename({"name": "title"}, inplace=True),
           files.loc[:,["id", "filename_masked"]].rename({"filename_masked": "title"}, inplace=True),
           pages.loc[:,["id", "title"]]
          ])

Unnamed: 0,id,title
0,132f3fab56d60839d727b966a76c1b1e,Assignment 1 Discussion forum
1,e1f90c16c123e0f96b2af7d94a1c335c,Introduce yourself
2,7c9e39571ad00fe24282e5b5366d7563,Discussion 2: Reflect on rationales for eLearning
3,c916612763c04832d8e7b9e89deb7d2a,Discussion 1: The meaning of eLearning
4,bb62cd396b9a8a94ecef6b5d1c4f8ad1,Discussion 3: Funding issues
...,...,...
87,972196dd128aeb62a66f5c02b897e7dd,[wikipage] Assignment 2 Group sign up
88,9ee9a2cacefb8be58dde2a381617e0ad,Wikipage Assignment 2 Group Sign Up (Optional)
89,0790ff605875f7393f9fde5c50c3eae4,[wikipage] Assignment 3 Group sign up (optional)
90,47cc5ac28d341bcc8e175c649974a314,Your Instructor


In [140]:
alt.Chart(navigation_events).mark_bar().encode(
    x = alt.X("event_time:T"), 
    y = "count()",
    color = "event__object_extensions_asset_type"
)

In [141]:
alt.Chart(navigation_events).mark_bar().encode(
    x = alt.X("actor_id"), 
    y = "count()",
    color = "event__object_extensions_asset_type"
)

In [253]:
a = pd.DataFrame(navigation_events.groupby(["actor_id", "event__object_extensions_asset_type"]).count()["id"]).unstack()
a.columns = a.columns.droplevel()
a["total_clicks"] = np.sum(a, axis=1)

b = pd.merge(a, gradebook, how="left", right_on="Student", left_index=True)
b.describe()

Unnamed: 0,assignment,attachment,discussion_topic,wiki_page,total_clicks
count,33.0,33.0,33.0,33.0,33.0
mean,83.0,982.575758,179.151515,334.333333,1579.060606
std,41.953099,443.609769,130.637409,163.203031,717.507314
min,26.0,267.0,37.0,56.0,511.0
25%,60.0,680.0,78.0,204.0,1092.0
50%,73.0,898.0,159.0,321.0,1548.0
75%,97.0,1204.0,213.0,444.0,1901.0
max,198.0,2360.0,642.0,748.0,3687.0


In [264]:
alt.Chart(b).mark_point().encode(
    x = alt.X(alt.repeat(), type="quantitative"),
    y = "Current Score"
).properties(width = 150, height = 150).repeat(['assignment', 'attachment', 'discussion_topic', 'wiki_page', 'total_clicks'])

In [241]:
pd.merge(navigation_events.groupby("actor_id").count()["id"],  pd.to_numeric(gradebook.set_index("Student")["Current Score"]), how="left", left_index=True, right_index=True)



Unnamed: 0_level_0,id,Current Score
actor_id,Unnamed: 1_level_1,Unnamed: 2_level_1
LEARNER_1,1132,80.9
LEARNER_10,1096,78.4
LEARNER_11,2188,84.3
LEARNER_12,1901,81.2
LEARNER_13,3687,85.5
LEARNER_14,1548,84.4
LEARNER_15,998,76.2
LEARNER_16,1661,82.0
LEARNER_17,1892,65.7
LEARNER_18,1350,69.7


In [193]:
gradebook.set_index("Student")

Unnamed: 0_level_0,Assignment 1 (c81f04547a95da2a7b88054ef491b7c4),Assignment 2 (a4dc11e7e79361fc5886a9078aac66b8),Assignment 3 (option A) (811d93ea379b5cdd5a19f1b5dbab88cd),Assignment 3 (option B) (df8d1f1ff3f48fdc24a278b40c5f45cc),Assignment 3 (option C) (bb971b36c1578cede00150acda89aa99),Assignment 3 (option D) (f1ad954cd2cddda6e17f6fc225d1aa3e),Participation grade: Formative (not included in final grade) (5965e0b0c712861d0efdd9be54572114),Assignment 1 Current Score,Assignment 2 Current Score,Assignment 3 Current Score,Participation & engagement Current Score,Current Score
Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
LEARNER_4,88,76.0,75.0,,,,10.0,88,76.0,75.0,100.0,82.9
LEARNER_10,88,72.0,74.0,,,,8.5,88,72.0,74.0,85.0,78.4
LEARNER_34,90,78.0,73.0,,,,9.0,90,78.0,73.0,90.0,81.3
LEARNER_16,88,71.0,,,77.0,,10.0,88,71.0,77.0,100.0,82.0
LEARNER_7,92,83.0,,,80.0,,10.0,92,83.0,80.0,100.0,87.3
LEARNER_30,77,89.0,,,74.0,,7.0,77,89.0,74.0,70.0,78.3
LEARNER_15,90,79.0,75.0,,,,6.0,90,79.0,75.0,60.0,76.2
LEARNER_19,90,83.0,,,81.0,,9.0,90,83.0,81.0,90.0,85.2
LEARNER_36,56,84.0,75.0,,,,6.0,56,84.0,75.0,60.0,70.9
LEARNER_8,90,88.0,74.0,,,,8.5,90,88.0,74.0,85.0,83.6


In [189]:
navigation_events.groupby("actor_id").count()["id"]

actor_id
LEARNER_1     1132
LEARNER_10    1096
LEARNER_11    2188
LEARNER_12    1901
LEARNER_13    3687
LEARNER_14    1548
LEARNER_15     998
LEARNER_16    1661
LEARNER_17    1892
LEARNER_18    1350
LEARNER_19    1667
LEARNER_2     1981
LEARNER_20    1352
LEARNER_21     957
LEARNER_23    1868
LEARNER_24    1801
LEARNER_25     540
LEARNER_26     650
LEARNER_27    1355
LEARNER_29    2222
LEARNER_3     1901
LEARNER_30    1364
LEARNER_31     836
LEARNER_33     511
LEARNER_34     673
LEARNER_35     903
LEARNER_36    1377
LEARNER_37    1092
LEARNER_4     1819
LEARNER_48       1
LEARNER_5     1611
LEARNER_6     2801
LEARNER_7     2497
LEARNER_8     2878
Name: id, dtype: int64