# Getting Items results by Week

## Import Packages

In [220]:
# data analysis and wrangling
import pandas as pd
import numpy as np

# visualization
%matplotlib inline
import matplotlib.pyplot as plt

# wrangling os
import os

## Load Data

In [221]:
# Selecting course name
'''
   - gestion-organizaciones-efectivas / 7
   - camino-excelencia-gestion-proyectos / 5
   - gestion-empresarial-pyme / 5
   - aprendiendo-programar-python / 6
   - electrones-en-accion / 4
   - web-semantica / 7
   - aula-constructivista / 10
'''

course_name = 'gestion-organizaciones-efectivas'
number_weeks = 7


In [222]:
# Grades & Progress
df = pd.read_csv('../data/coursera/' + course_name + '/course_grades.csv', index_col='ucchile_user_id')
df_formative_grades = pd.read_csv('../data/coursera/' + course_name + '/course_formative_quiz_grades.csv', index_col='ucchile_user_id')
df_items_grades = pd.read_csv('../data/coursera/' + course_name + '/course_item_grades.csv', index_col='ucchile_user_id')
df_progress = pd.read_csv('../data/coursera/' + course_name + '/course_progress.csv', index_col='ucchile_user_id')

## Data Cleaning and Preparation

In [223]:
def create_folder(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

In [224]:
# Dropping unnesesary columns
df.drop(columns=['course_id', 'course_grade_overall_passed_items', 'course_grade_overall', 'course_grade_ts'], inplace=True)
df_formative_grades.drop(columns=['course_id'], inplace=True)
df_items_grades.drop(columns=['course_id', 'course_item_grade_overall', 'course_item_grade_pending'], inplace=True)
df_progress.drop(columns=['course_id'], inplace=True)

In [225]:
# Dropping in df_formative_grades columns that are already in df_items_grades
df_formative_grades = df_formative_grades[df_formative_grades.course_item_id.isin(df_items_grades.course_item_id) == False]

In [226]:
# Items types availables in course_item_types.csv
item_categories = ['lecture', 
                      'assess_open_single_page', 
                      'supplement',
                      'peer', 
                      'quiz', 
                      'exam',
                      'others', 
                      'graded_peer',
                      'closed_peer', 
                      'graded_programming', 
                      'ungraded_programming', 
                      'phased_peer', 
                      'graded_lti',
                      'ungraded_lti', 
                      'slideshow', 
                      'staff_graded',
                      'notebook', 
                      'graded_discussion_prompt', 
                      'ungraded_widget']

In [227]:
# Creating Folders
for item_category in item_categories:
    create_folder('../data/coursera/cooked_data/' + course_name +'/by_week_results/' + item_category)
    create_folder('../data/coursera/cooked_data/' + course_name +'/by_week_results/' + item_category + '/formative_grades')
    create_folder('../data/coursera/cooked_data/' + course_name +'/by_week_results/' + item_category + '/items_grades') 
    create_folder('../data/coursera/cooked_data/' + course_name +'/by_week_results/' + item_category + '/progress')           

In [228]:
# Creating Data: 
for item_category in item_categories:
    for i in range(1, number_weeks + 1):
        # Leo el correspondiente a la week
        df_raw = pd.read_csv('../data/coursera/cooked_data/' + course_name + '/metadata/raw/' + item_category + '/week_' + str(i)+ '.csv', index_col='course_item_id') 

        df_raw.drop(axis=1,  labels='Unnamed: 0', inplace=True)
        
        df_formative = df_formative_grades[df_formative_grades.course_item_id.isin(df_raw.index)]
        df_formative.to_csv('../data/coursera/cooked_data/' + course_name + '/by_week_results/' + item_category + '/formative_grades/week_'+str(i)+ '.csv')

        df_items = df_items_grades[df_items_grades.course_item_id.isin(df_raw.index)]
        df_items.to_csv('../data/coursera/cooked_data/' + course_name + '/by_week_results/' + item_category + '/items_grades/week_'+str(i)+ '.csv')

        df_progress_aux = df_progress[df_progress.course_item_id.isin(df_raw.index)]
        df_progress_aux.to_csv('../data/coursera/cooked_data/' + course_name + '/by_week_results/' + item_category + '/progress/week_'+str(i)+ '.csv')
        
        if len(df_formative) > 0:
            cols = []
            for j in df_formative.course_item_id.unique():
                cols.append('week_' + str(i) + '_' + item_category + '_' + j)
                cols.append('week_' + str(i) + '_ts_' + item_category + '_' + j)

            aux = pd.DataFrame(index=df_formative.index.unique(), columns=cols)

            for index, row in df_formative.iterrows():
                aux.at[index, 'week_' + str(i) + '_' + item_category + '_' + row.course_item_id] = row.course_quiz_grade / row.course_quiz_max_grade
                aux.at[index, 'week_' + str(i) + '_ts_' + item_category + '_' + row.course_item_id] = row.course_quiz_grade_ts

            df = df.merge(aux, left_on='ucchile_user_id', right_on='ucchile_user_id', how='outer', copy=False)

        elif len(df_items) > 0:
            cols = []
            for j in df_items.course_item_id.unique():
                cols.append('week_' + str(i) + '_' + item_category + '_' + j)
                cols.append('week_' + str(i) + '_ts_' + item_category + '_' + j)

            aux = pd.DataFrame(index=df_items.index.unique(), columns=cols)
            for index, row in df_items.iterrows():
                aux.at[index, 'week_' + str(i) + '_' + item_category + '_' + row.course_item_id] = row.course_item_grade_verified
                aux.at[index, 'week_' + str(i) + '_ts_' + item_category + '_' + row.course_item_id] = row.course_item_grade_ts
                
            df = df.merge(aux, left_on='ucchile_user_id', right_on='ucchile_user_id', how='outer', copy=False)

        elif len(df_progress_aux) > 0:
            cols = []
            for j in df_progress_aux.course_item_id.unique():
                cols.append('week_' + str(i) + '_' + item_category + '_' + j)
                cols.append('week_' + str(i) + '_ts_' + item_category + '_' + j)
                
            aux = pd.DataFrame(index=df_progress_aux.index.unique(), columns=cols)
            
            for index, row in df_progress_aux.iterrows():
                aux.at[index, 'week_' + str(i) + '_' + item_category + '_' + row.course_item_id] = row.course_progress_state_type_id
                aux.at[index, 'week_' + str(i) + '_ts_' + item_category + '_' + row.course_item_id] = row.course_progress_ts

            df = df.merge(aux, left_on='ucchile_user_id', right_on='ucchile_user_id', how='outer', copy=False)



In [229]:
def create_folder(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

In [230]:
# Rename and Export Data
df.rename(columns={'course_passing_state_id': 'course_passing_state', 
                   'course_grade_verified_passed_items': 'course_passed_items', 
                   'course_grade_verified': 'course_grade'}, inplace=True)


## Export Data

In [231]:
create_folder('../data/super_log/' + course_name)

df.to_csv('../data/super_log/' + course_name + '/coursera.csv')