#Import management

In [31]:
#we import sys to add the models folder to path, allowing us to use those scripts in visualization
import sys
sys.path.insert(1, '../models/')

import pandas as pd
import numpy as np
import panel as pn
import clean_data as cd

from math import pi
from bokeh.models import ColumnDataSource
from bokeh.transform import cumsum
from bokeh.palettes import Spectral6
from bokeh.palettes import Category20c
from bokeh.plotting import figure, show
from fractions import Fraction



pn.extension('tabulator')

import hvplot.pandas

--Data Management--
Now, we get the .csv files with raw data from the '/data' folder.

In [32]:
data_path = '../data/notes_all_sections.csv'

--File Reading--

We're now going to read the files that we got beforehand.

In [33]:
data = pd.read_csv(data_path)
data.columns

Index(['OrgDefinedId', 'Secciones',
       'Entrega de la mañana D1 Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Entrega de la tarde D1 Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Entrega de la mañana (D2) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Entrega de la tarde (D2) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Entrega de la mañana (D3) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Entrega de la tarde (D3) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Entrega de la mañana (D4) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Entrega de la tarde (D4) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>',
       'Semana 1\Week1 Subtotal Numerator',
       'Semana 1\Week1 Subtotal Denominator',
       'Entrega de la tarde (D5) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 2\W

...And now we'll call the data cleaning scripts on this data.

In [34]:
#We rename the columns to make them easier to work with
data.rename(columns={
    #WEEK 1
    'OrgDefinedId':'user',
    'Secciones':'sec',
    'Entrega de la mañana D1 Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'1AM',
    'Entrega de la tarde D1 Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'1PM',
    'Entrega de la mañana (D2) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'2AM',
    'Entrega de la tarde (D2) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'2PM',
    'Entrega de la mañana (D3) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'3AM',
    'Entrega de la tarde (D3) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'3PM',
    'Entrega de la mañana (D4) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'4AM',
    'Entrega de la tarde (D4) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 1\Week1>':'4PM',

    'Semana 1\Week1 Subtotal Numerator':'week_1_numerator',
    'Semana 1\Week1 Subtotal Denominator':'week_1_denominator',
    
    #WEEK 2
    'Entrega de la tarde (D5) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 2\Week 2>':'5PM',
    'Entrega de la mañana (D5) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 2\Week 2>':'5AM',
    'Entrega de la mañana (D6) Points Grade <Numérico Puntos máx.:5 Categoría:Semana 2\Week 2>':'6AM',

    'Semana 2\Week 2 Subtotal Numerator':'week_2_numerator',
    'Semana 2\Week 2 Subtotal Denominator':'week_2_denominator',

    'Calculated Final Grade Numerator':'final_numerator',
    'Calculated Final Grade Denominator':'final_denominator'
}, inplace = True)

#drop unnecesary columns
data.drop(columns={'Adjusted Final Grade Numerator', 'Adjusted Final Grade Denominator','End-of-Line Indicator'}, axis=1, inplace=True)
#filter blank students
data = data[data['sec'].notna()]
#fill NaN with 0
data.fillna(0.0, inplace=True)

#change sections to numbers, this will be useful for binding on analytic tables later
data['sec'].replace(['Sección No. 1', 'Sección No. 2', 'Sección No. 3','Sección No. 4','Sección No. 5','Sección No. 6','Sección No. 7',
'Sección No. 8','Sección No. 9','Sección No. 10 - Las morcillitas'], [1,2,3,4,5,6,7,8,9,10], inplace=True)

#we change the type to a numeric float64 from 'object'
#Week 1
data['1AM'] = pd.to_numeric(data['1AM'])
data['1PM'] = pd.to_numeric(data['1PM'])
data['2AM'] = pd.to_numeric(data['2AM'])
data['2PM'] = pd.to_numeric(data['2PM'])
data['3AM'] = pd.to_numeric(data['3AM'])
data['3PM'] = pd.to_numeric(data['3PM'])
data['4AM'] = pd.to_numeric(data['4AM'])
data['4PM'] = pd.to_numeric(data['4PM'])
#Week 2
data['5AM'] = pd.to_numeric(data['5AM'])
data['5PM'] = pd.to_numeric(data['5PM'])
data['6AM'] = pd.to_numeric(data['6AM'])


#we recalculate the numerator and denominator for every student, as the data didn't take NaN into account.
for index in data.index:
        #week 1
        a = (sum([data.loc[index, '1AM'],
        data.loc[index, '1PM'],
        data.loc[index, '2AM'],
        data.loc[index, '2PM'],
        data.loc[index, '3AM'],
        data.loc[index, '3PM'],
        data.loc[index, '4AM'],
        data.loc[index, '4PM']]))
        b = 8.0

        data.loc[index, 'week_1_numerator'] = Fraction(a/b).numerator
        data.loc[index, 'week_1_denominator'] = Fraction(a/b).denominator

        #week 2
        a = (sum([data.loc[index, '5AM'],
                data.loc[index, '5PM'],
                data.loc[index, '6AM']]))
        b = 3.0

        data.loc[index, 'week_2_numerator'] = Fraction(a/b).numerator
        data.loc[index, 'week_2_denominator'] = Fraction(a/b).denominator

        #final values, this operation might come as redundant at first, but it is quite important for final averages. Either way, i might 
        #optimize it in the future.
        a = (sum([data.loc[index, '1AM'],
        data.loc[index, '1PM'],
        data.loc[index, '2AM'],
        data.loc[index, '2PM'],
        data.loc[index, '3AM'],
        data.loc[index, '3PM'],
        data.loc[index, '4AM'],
        data.loc[index, '4PM'],
        data.loc[index, '5AM'],
        data.loc[index, '5PM'],
        data.loc[index, '6AM']]))
        b = 11.0
        
        data.loc[index, 'week_2_numerator'] = Fraction(a/b).numerator
        data.loc[index, 'week_2_denominator'] = Fraction(a/b).denominator


interactive_data = data.interactive()
data    


Unnamed: 0,user,sec,1AM,1PM,2AM,2PM,3AM,3PM,4AM,4PM,week_1_numerator,week_1_denominator,5PM,5AM,6AM,week_2_numerator,week_2_denominator,final_numerator,final_denominator
1,#1000594810,3,0.0,0.0,3.5,5.0,2.5,0.0,2.5,0.0,27.0,16.0,0.0,5.0,0.0,7.574236e+15,4.503600e+15,18.5,25.0
2,#101051396,7,0.0,5.0,3.5,5.0,0.0,5.0,5.0,0.0,47.0,16.0,5.0,0.0,5.0,6.857754e+15,2.251800e+15,33.5,35.0
3,#1010840777,1,5.0,5.0,5.0,5.0,5.0,5.0,5.0,0.0,35.0,8.0,0.0,0.0,0.0,1.791204e+15,5.629500e+14,35.0,35.0
4,#1010961769,4,5.0,5.0,5.0,5.0,3.5,5.0,5.0,0.0,67.0,16.0,5.0,5.0,5.0,4.964195e+15,1.125900e+15,53.5,55.0
5,#1010962618,5,0.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,35.0,8.0,5.0,5.0,5.0,5.117727e+15,1.125900e+15,50.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,#1141715139,2,5.0,5.0,5.0,5.0,5.0,3.5,5.0,5.0,77.0,16.0,3.5,3.5,5.0,5.168904e+15,1.125900e+15,50.5,55.0
113,#1145924881,1,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,0.0,5.117727e+15,1.125900e+15,55.0,55.0
114,#202225959,10,0.0,3.5,5.0,3.5,5.0,5.0,5.0,0.0,27.0,8.0,5.0,5.0,5.0,2.149445e+15,5.629500e+14,42.0,45.0
116,#665101,2,5.0,3.5,5.0,5.0,5.0,3.5,5.0,5.0,37.0,8.0,5.0,5.0,5.0,5.322436e+15,1.125900e+15,52.0,55.0


Once all of that is said and done, it's time to start making some graphs from this information.

Average notes by day

In [35]:
days = ['Day 1','Day 2','Day 3','Day 4','Day 5','Day 6']

day_averages = [(data['1AM'].sum()+data['1PM'].sum())/(len(data)*2),
(data['2AM'].sum()+data['2PM'].sum())/(len(data)*2),
(data['3AM'].sum()+data['3PM'].sum())/(len(data)*2),
(data['4AM'].sum()+data['4PM'].sum())/(len(data)*2),
(data['5AM'].sum()+data['5PM'].sum())/(len(data)*2),
data['6AM'].sum()/(len(data))]


avg_day_notes_source = ColumnDataSource(data=dict(days = days, day_averages = day_averages, color = Spectral6))
day_notes_figure = figure(x_range = days, y_range=(0,5), height=400, title="Average notes per day",toolbar_location=None, tools="")
day_notes_figure.vbar(x='days', top='day_averages', width=0.9, color='color', legend_field="days", source=avg_day_notes_source)

day_notes_figure.xgrid.grid_line_color = None
day_notes_figure.legend.orientation = "horizontal"
day_notes_figure.legend.location = "top_center"

show(day_notes_figure)


Distribution of notes by ranges

In [36]:
note_ranges = {
    '5: Perfect implementation.':0,
    '3.5 - 4.9: Functional implementation with small problems.':0,
    '0.1 - 3.4: Implementation with serious problems (not working)':0,
    "0: Did not submit the work.": 0
}
concept_checks = ['1AM','1PM','2AM','2PM','3AM','3PM','4AM','4PM','5AM','5PM','6AM'] 

for index in data.index:
    for concept in concept_checks:
        grade = data.loc[index, concept]
        if grade == 0:
            note_ranges["0: Did not submit the work."] += 1
        elif grade == 5:
            note_ranges['5: Perfect implementation.'] += 1
        elif grade in np.arange(3.5, 5):
            note_ranges['3.5 - 4.9: Functional implementation with small problems.'] += 1
        else:
            note_ranges['0.1 - 3.4: Implementation with serious problems (not working)'] += 1

note_dist_data = pd.Series(note_ranges).reset_index(name='value').rename(columns={'index': 'range'})
note_dist_data['angle'] = note_dist_data['value']/note_dist_data['value'].sum() * 2*pi
note_dist_data['color'] = Category20c[len(note_ranges)]

pie_chart = figure(height=400, title="Distribution of notes by ranges", toolbar_location=None,
           tools="hover", tooltips="@range: @value", x_range=(-0.5, 1.0))

pie_chart.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='range', source=note_dist_data)

show(pie_chart)
print(note_ranges)

{'5: Perfect implementation.': 631, '3.5 - 4.9: Functional implementation with small problems.': 93, '0.1 - 3.4: Implementation with serious problems (not working)': 40, '0: Did not submit the work.': 336}


Notes per concept check

In [37]:
#reintroduced here to make the cell easier to run on its own.
concept_checks = ['1AM','1PM','2AM','2PM','3AM','3PM','4AM','4PM','5AM','5PM','6AM'] 

concept_averages = []
for concept in concept_checks:
    concept_averages.append(data[concept].sum()/(len(data)))

avg_concept_notes_source = ColumnDataSource(data=dict(concept_checks = concept_checks, concept_averages = concept_averages, color = Category20c[len(concept_checks)]))
concept_notes_figure = figure(x_range = concept_checks, y_range=(0,5), height=400, title="Average notes per concept check",toolbar_location=None, tools="")
concept_notes_figure.vbar(x='concept_checks', top='concept_averages', width=0.45, color='color', legend_field="concept_averages", source=avg_concept_notes_source)

concept_notes_figure.xgrid.grid_line_color = None
concept_notes_figure.legend.orientation = "horizontal"
concept_notes_figure.legend.location = "top_center"

show(concept_notes_figure)

Final note curve graph

Web Server Display Template

In [38]:
#Layout using Template
template = pn.template.FastListTemplate(
    title='CS Bridge - Initial Findings',
    sidebar=[pn.pane.Markdown("CS Bridge -Uniandes")
    
    ], 
    main=[pn.Column(pn.Row(pn.pane.Bokeh(concept_notes_figure),pn.pane.Bokeh(day_notes_figure))), 
        pn.Column(pn.Row(pn.pane.Bokeh(pie_chart)))],
    accent_base_color="#2f3e46",
    header_background="#354f52",
)
template.show()

SyntaxError: invalid syntax (1894955692.py, line 7)