In [None]:
import pandas as pd
import numpy as np

# Visualization
from matplotlib import pyplot as plt
import seaborn as sns
import itertools

### Plot Mid Scores over Assignment

In [None]:
mid_scores = pd.read_csv('./data/mid_scores.csv')
mid_scores.sort_values('AssignmentID')

In [None]:
sns.scatterplot(data=mid_scores, x='AssignmentID', y='MID_Score', hue='SubjectID')

In [None]:
sns.scatterplot(data=mid_scores, x='MID_Score', y='AssignmentScore')
plt.savefig('./images/AssignmentScore_MidScore.png')

In [None]:
mid_scores.columns

In [None]:
variables = filter(lambda v: v != 'Unnamed: 0', mid_scores.columns)
pairs = list(itertools.combinations(variables, 2))

fig, axs = plt.subplots(len(pairs) // 2, 2, figsize=(12,48))
for col in range(len(pairs) // 2):
    for row in range(2):
        pair = pairs.pop(0)
        ax = axs[col][row]
        sns.scatterplot(data=mid_scores, x=pair[0], y=pair[1], hue='Incremental', ax=ax)
        ax.set_title(f'{pair[0]} vs {pair[1]}')
fig.tight_layout()

In [None]:
sns.boxplot(data=mid_scores, x='AssignmentID', y='MID_Score')

In [None]:
sns.countplot(data=mid_scores, x='AssignmentID')

#### Mid Score in Relation to Final Grade

In [None]:
SUBJECT_ID_KEY = 'SubjectID'
ASSIGNMENT_ID_KEY = 'AssignmentID'
MID_SCORE_KEY = 'MID_Score'
INCREMENTAL_KEY = 'Incremental' # boolean key in df, <=2.5 Incremental, >2.5 Non-Incremental
ASSIGNMENT_SCORE_KEY = 'AssignmentScore'
FINAL_SCORE_KEY = 'FinalScore'

In [None]:
mid_scores_unedited = pd.read_csv("data/mid_scores.csv")
mid_scores = mid_scores_unedited.copy()

In [None]:
students = mid_scores[SUBJECT_ID_KEY].unique()

score_df = pd.DataFrame()

for student in students: 
    student_df = mid_scores.loc[mid_scores[SUBJECT_ID_KEY] == student]
    # do those with 7 or more assignments
    if len(student_df[ASSIGNMENT_ID_KEY]) > 6:
        row = pd.DataFrame({
                SUBJECT_ID_KEY: student,
                FINAL_SCORE_KEY: student_df[FINAL_SCORE_KEY].unique()[0],
                'IncrementalPercent': ((student_df[INCREMENTAL_KEY]==1).sum() / student_df[INCREMENTAL_KEY].count()) * 100
        }, index=[0])
        score_df = pd.concat([score_df, row], ignore_index=True)
    # print(incremental_development_count)
score_df = score_df.dropna()

ax = sns.scatterplot(x=FINAL_SCORE_KEY, y='IncrementalPercent', data=score_df)
ax.set(title='Incremental Development in Relation to Final Score')
ax.set_ylabel("Percentage of Incremental Development Over Course")
ax.set_xlabel("Final Score")