# Performance study on BTI's programming classes

In [1]:
# Importing necessary modules
# Import the ColumnDataSource class from bokeh.plotting
# Import figure from bokeh.plotting
from bokeh.plotting import figure
# Import output_notebook and show from bokeh.io
from bokeh.io import output_notebook, show
# Import gridplot from bokeh.layouts
from bokeh.layouts import gridplot
from sqlalchemy import create_engine
import pandas as pd

## Create engine to get the original dataframe from SQLite database

In [2]:
#Create database engine
engine = create_engine('sqlite:///programming.sqlite')

# Execute query and store records in DataFrame: df
df = pd.read_sql_query("select * from Grade",engine)

# drop the unnecessary column
df = df.drop('index', 1)

print("Number of students on the database:")
print(len(df['Aluno_ID'].unique()))

# Print head of DataFrame
df.head()


Number of students on the database:
766


Unnamed: 0,Aluno_ID,Período,Disciplina,Situaçăo,Média Final,Unidade I,Unidade II,Unidade III
0,0,2014.1,IMD0012.0 - INTRODUÇĂO ŔS TÉCNICAS DE PROGRAMAÇĂO,APROVADO,7.0,4.9,9.0,7.0
1,1,2014.1,IMD0012.0 - INTRODUÇĂO ŔS TÉCNICAS DE PROGRAMAÇĂO,APROVADO,7.3,8.0,7.0,7.0
2,2,2014.1,IMD0012.0 - INTRODUÇĂO ŔS TÉCNICAS DE PROGRAMAÇĂO,APROVADO,9.3,9.5,8.3,10.0
3,3,2014.1,IMD0012.0 - INTRODUÇĂO ŔS TÉCNICAS DE PROGRAMAÇĂO,APROVADO,7.3,6.5,7.0,8.3
4,4,2014.1,IMD0012.0 - INTRODUÇĂO ŔS TÉCNICAS DE PROGRAMAÇĂO,APROVADO,7.5,5.5,8.0,9.0


## Get semesters

In [3]:
semesters = pd.read_sql_query("SELECT DISTINCT Período FROM Grade", engine)
semesters = semesters[semesters.columns[0]].tolist()
first_semester = min(semesters)
print("Semesters:")
semesters

Semesters:


[2014.1, 2014.2, 2015.1, 2015.2, 2016.1, 2016.2]

## Create dataframes for the grades of each student

In [4]:
maxID = pd.read_sql_query("SELECT MAX(Aluno_ID) as max FROM Grade", engine) 
maxID = maxID['max'][0]
students = dict()
for i in range(0, maxID):
    student = pd.read_sql_query(
        "SELECT Período, Disciplina, \"Situaçăo\", \"Média Final\" FROM Grade WHERE Aluno_ID = " + str(i), 
        engine)
    #print(student)
    students[i] = student

## Create dictionary with the students of IMD0012.0 of each semester

In [5]:
studentsBySemester = dict()
for s in semesters:
    studentsBySemester[s] = []
for key, value in students.items():
    first = value[value['Disciplina'] == 'IMD0012.0 - INTRODUÇĂO ŔS TÉCNICAS DE PROGRAMAÇĂO']['Período']
    first = first.tolist()
    if len(first) > 0:
        first = first[0]
        studentsBySemester[first].append(value)

## Define function to get the a student's grade on IMD0012.0 and his average grades on the other classes

In [24]:
def studentGrades(grades):
    itp = grades.loc[
        (grades['Situaçăo'] == "REPROVADO") &
        (grades['Disciplina'] == 'IMD0012.0 - INTRODUÇĂO ŔS TÉCNICAS DE PROGRAMAÇĂO')]
    color = 'green'
    if (len(itp) > 0):
        color = 'red'
    s = 0
    count = 0
    itp = 0
    for index, row in grades.iterrows(): 
        if not "IMD0012.0" in row['Disciplina']:
            g = row['Média Final']
            if g == None:
                g = 0
            s += g
            count += 1
        elif "IMD0012.0" in row['Disciplina']:
            itp = row['Média Final']
    if count > 0:
        average = s/count
    else:
        average = 0
    if itp == None:
        itp = 0
    if(color == 'green' and itp < 7.0):
        color = 'yellow'
    return itp, average, color

## Separate the students with red (failed) and green (succeded) circles

In [25]:
l = len(studentsBySemester)
points_red = [None] * l
points_green = [None] * l
points_yellow = [None] * l
i = 0
for key, value in studentsBySemester.items():
    points_red[i] = [[],[]]
    points_green[i] = [[],[]]
    points_yellow[i] = [[],[]]
    for grades in value:
        itp, ave, color = studentGrades(grades)
        if itp > 0:
            if color is 'green':
                points_green[i][0].append(itp)
                points_green[i][1].append(ave)
            elif color is 'yellow':
                points_yellow[i][0].append(itp)
                points_yellow[i][1].append(ave)
            else:
                points_red[i][0].append(itp)
                points_red[i][1].append(ave)
    i += 1

# Scatter plots relating the grades in ITP (IMD0012.0) with the average grades, on each semester

In [34]:
rows = []
circleAlpha = 0.52
circleSize = 40
greenColor = '#006611'
redColor = '#EE0000'
yellowColor = '#EEEE00'
for i in range(0, l, 2):    
    f1 = figure(title = str(semesters[i]) + ' class of ITP: Green=SUCCESS, Red=FAILURE, Yellow=', 
                x_axis_label='ITP Grade', y_axis_label='Average grade in other classes')
    f2 = figure(title = str(semesters[i+1]) + ' class of ITP: Green=SUCCESS, Red=FAILURE;', 
                x_axis_label='ITP Grade', y_axis_label='Average grade in other classes')
    
    f1.circle(points_green[i][0], points_green[i][1], fill_color = greenColor, line_alpha=0.0,
              size=circleSize, fill_alpha=circleAlpha)
    f1.circle(points_yellow[i][0], points_yellow[i][1], fill_color = yellowColor, line_alpha=0.0,
              size=circleSize, fill_alpha=circleAlpha)
    f1.circle(points_red[i][0], points_red[i][1], fill_color = redColor, line_alpha=0.0,
              size=circleSize, fill_alpha=circleAlpha)
    f2.circle(points_green[i+1][0], points_green[i+1][1], fill_color = greenColor, line_alpha=0.0,
              size=circleSize, fill_alpha=circleAlpha)
    f2.circle(points_yellow[i+1][0], points_yellow[i+1][1], fill_color = yellowColor, line_alpha=0.0,
              size=circleSize, fill_alpha=circleAlpha)
    f2.circle(points_red[i+1][0], points_red[i+1][1], fill_color = redColor, line_alpha=0.0,
              size=circleSize, fill_alpha=circleAlpha)
    rows.append([f1,f2])

layout = gridplot(rows,sizing_mode='scale_width')
output_notebook()
show(layout)