# Explorando Stats

En este documento se parsea y revisan los logs de eol.

In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
course_json = json.load(open("redes-structure.json"))
course_df = pd.DataFrame.from_records(course_json["blocks"])
course_struct = course_df.T

In [3]:
def expand_list(df, list_column, new_column):
    """Expand a DataFrame with a column of list values.
    
    Source: https://medium.com/@johnadungan/expanding-lists-in-panda-dataframes-2724803498f8
    
    Arguments:
        df {pandas.core.frame.DataFrame} -- DataFrame with a column to expand
        list_column {str} -- name of the column (whose values are lists) to expand
        new_column {str} -- new name of the column
    Returns:
        pandas.core.frame.DataFrame -- DataFrame with list_column expanded
    """
    lens_of_lists = df[list_column].apply(len)
    max_len = max(lens_of_lists)
    origin_rows = range(df.shape[0])
    destination_rows = np.repeat(origin_rows, lens_of_lists)
    non_list_cols = (
        [idx for idx, col in enumerate(df.columns)
         if col != list_column]
    )
    expanded_df = df.iloc[destination_rows, non_list_cols].copy()
    expanded_df[new_column] = (
        [item for items in df[list_column] for item in items]
    )
    expanded_df[new_column+'_number'] = (
        [n for items in df[list_column]
            for item, n in zip(items, range(1, max_len + 1))]
    )
    expanded_df.reset_index(inplace=True, drop=True)
    return expanded_df

In [4]:

# 'type', 'block_id', 'student_view_url', 'lms_web_url', 'id', 'display_name', 'child', 'child_number'
course_children = expand_list(course_struct.dropna(), "children", "child")
#'index', 'type', 'block_id', 'student_view_url', 'lms_web_url','children', 'id', 'display_name'
course_no_children = course_struct[course_struct["children"].isna()].copy().reset_index()
# The id of terminal nodes at any level
course_no_children_name = course_no_children[['id', "display_name"]]
# Course child name number is: the id of the parent block and the child id + data
course_child_name = course_children[['id', 'child', 'display_name']].copy()
# Merge them
block_child_name = pd.concat([course_child_name, course_no_children_name], ignore_index=True, sort=True)
# Add what we know
block_father_name = block_child_name.copy().rename(columns={'id': 'father', 'child': 'id'})
# Start adding fathers to the left column
merged_course_structure = block_father_name.merge(block_child_name, on='id')\
        .rename(columns={'father': 'father_x', 'id': 'father_y', 'child': 'id'})\
        .merge(block_child_name, on='id')\
        .rename(columns={'id': 'father_z', 'child': 'id', 'display_name': 'display_name_z'})\
        .merge(block_child_name, on='id')\
        .rename(columns={'id': 'father_w', 'child': 'id', 'display_name': 'display_name_w'})\
        .merge(block_child_name, on='id')
# Recover and rename the columns
valuable_columns = [
        'father_x', 'display_name_x', 'father_y', 'display_name_y', 'father_z', 'display_name_z',
        'father_w', 'display_name_w', 'id']
columns_name = {
        'father_x': 'course',
        'father_y': 'chapter',
        'father_z': 'sequential',
        'father_w': 'vertical',
        'display_name_x': 'course_name',
        'display_name_y': 'chapter_name',
        'display_name_z': 'sequential_name',
        'display_name_w': 'vertical_name',
        'display_name': 'name'
    }
merged_course_structure = merged_course_structure[valuable_columns].rename(
        columns=columns_name)
# Add the missing types from course_no_children (terminal nodes)
course_structure = merged_course_structure.merge(course_children[['child', 'child_number']], left_on='vertical', right_on='child')\
        .drop(columns=['child']).rename(columns={'child_number': 'vertical_number'})\
        .merge(course_children[['child', 'child_number']], left_on='sequential', right_on='child')\
        .drop(columns=['child']).rename(columns={'child_number': 'sequential_number'})\
        .merge(course_children[['child', 'child_number']], left_on='chapter', right_on='child')\
        .drop(columns=['child']).rename(columns={'child_number': 'chapter_number'})\
        .merge(course_children[['child', 'child_number']], left_on='id', right_on='child')\
        .drop(columns=['child'])\
        .astype({'vertical_number': 'int64', 'child_number': 'int64','sequential_number': 'int64','chapter_number': 'int64'})\
        .merge(course_no_children[['index', 'type',"student_view_url","lms_web_url"]], left_on='id', right_on='index')\
        .drop(columns="index")


course_structure.to_csv('course_structure.csv', sep='\t')
course_structure

Unnamed: 0,course,course_name,chapter,chapter_name,sequential,sequential_name,vertical,vertical_name,id,vertical_number,sequential_number,chapter_number,child_number,type,student_view_url,lms_web_url
0,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 1: Introducción - Reglas del juego,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Bienvenida,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Vídeo de bienvenida,block-v1:UChile+LEIT01+2020_T2+type@video+bloc...,1,1,1,1,video,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
1,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 1: Introducción - Reglas del juego,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Programa del curso,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,PDF: Programa,block-v1:UChile+LEIT01+2020_T2+type@html+block...,1,2,1,1,html,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
2,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 1: Introducción - Reglas del juego,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Introducción a Redes,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Vídeo: Introducción. Internet. Infraestructura...,block-v1:UChile+LEIT01+2020_T2+type@video+bloc...,1,3,1,1,video,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
3,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 1: Introducción - Reglas del juego,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Material Docente,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Slides,block-v1:UChile+LEIT01+2020_T2+type@google-doc...,1,4,1,1,google-document,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
4,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 2: Capa de Aplicación,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Semana 2: HTTP,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Material de Apoyo,block-v1:UChile+LEIT01+2020_T2+type@google-doc...,1,1,2,1,google-document,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
5,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 2: Capa de Aplicación,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Semana 2: HTTP,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Clase Online,block-v1:UChile+LEIT01+2020_T2+type@video+bloc...,2,1,2,1,video,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
6,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 2: Capa de Aplicación,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Semana 2: HTTP,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Quiz,block-v1:UChile+LEIT01+2020_T2+type@problem+bl...,3,1,2,1,problem,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
7,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 2: Capa de Aplicación,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Semana 2: HTTP,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Taller,block-v1:UChile+LEIT01+2020_T2+type@html+block...,4,1,2,1,html,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
8,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 2: Capa de Aplicación,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Semana 2: HTTP,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Taller,block-v1:UChile+LEIT01+2020_T2+type@discussion...,4,1,2,2,discussion,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...
9,block-v1:UChile+LEIT01+2020_T2+type@course+blo...,Redes,block-v1:UChile+LEIT01+2020_T2+type@chapter+bl...,Módulo 2: Capa de Aplicación,block-v1:UChile+LEIT01+2020_T2+type@sequential...,Semana 2: HTTP,block-v1:UChile+LEIT01+2020_T2+type@vertical+b...,Evaluación - Probemos el Proxy,block-v1:UChile+LEIT01+2020_T2+type@problem+bl...,5,1,2,1,problem,https://eol.andhael.cl/xblock/block-v1:UChile+...,https://eol.andhael.cl/courses/course-v1:UChil...


In [18]:
with open("./back/DCC-CC4303_v1.json") as f:
    l = f.readlines()

records = [json.loads(el) for el in l]
# Clean context and event
expanded = []
for r in records:
    context = r["context"]
    n = r.copy()
    del n["context"]
    for c in context.keys():
        n["context."+c] = context[c]
    if "context.module" in n:
        n["context.display_name"] = n["context.module"]["display_name"]
        del n["context.module"]
    try:
        del n["context.asides"]
        del n["context.user_tags"]
    except:
        pass
    expanded.append(n)

In [20]:
import pandas as pd
import numpy as np
import re
import os, sys
import glob
import datetime
from datetime import timedelta
from filter_logs import filter_by_log_qty, filter_course_team
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

BASE_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(BASE_DIR, 'eol-stats')
SRC_DIR = os.path.join(BASE_DIR, '')
TIMEOUT = timedelta(minutes=10)
LOWER_LIMIT = timedelta(seconds=15)# files
course_file = os.path.join(DATA_DIR, 'course_structure.csv')# load full logs
logs_full = pd.DataFrame.from_records(expanded)
logs = filter_course_team(logs_full, other_people=['gap','francisco_sereno'])
print('Cantidad de logs sin equipo del curso: ' , len(logs))
logs = filter_by_log_qty(logs, min_logs=15)
print('Cantidad de logs luego de filtrar personas con pocos logs: ',len(logs))
logs_full["context.display_name"].dropna()

Cantidad de logs sin equipo del curso:  16649
Cantidad de logs luego de filtrar personas con pocos logs:  16629


429                          Checkboxes
430                          Checkboxes
436                          Checkboxes
447                          Checkboxes
448                          Checkboxes
                      ...              
23956                            Repaso
23960                            Repaso
23961                            Repaso
24001    Slides IP: Circuitos Virtuales
24024    Slides IP: Circuitos Virtuales
Name: context.display_name, Length: 860, dtype: object

In [7]:
from LogParser import LogParser# Crear el parser de logs
parser = LogParser(course_filepath=course_file)
parser.load_logs(logs)
parsed_logs = parser.parse_and_get_logs()

In [16]:
parsed_logs["event_type_vertical"]

1000                                                   NaN
1001                                        NOT_CLASSIFIED
1102                                                   NaN
1103                                        NOT_CLASSIFIED
1104                                        NOT_CLASSIFIED
                               ...                        
24124    block-v1:UChile+LEIT01+2020_T2+type@vertical+b...
24125                                       NOT_CLASSIFIED
24126                                             NO_MATCH
24127                                       NOT_CLASSIFIED
24128                                       NOT_CLASSIFIED
Name: event_type_vertical, Length: 16629, dtype: object