In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# === Read IU grades in year 96,01,16,11,16,21
df = pd.read_csv("/kaggle/input/iu-math-enrollment-numbers/IU_grade_dist.csv")

# === Prepare df
# Remove small classes
df = df[df['A'] != 'NR'].copy()
# Change values to numerical
for c in ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-','D+', 'D', 'D-', 'F']:
    df[c] = df[c].apply(lambda s: int(s))
for c in ['W', 'WX', 'I', 'R', 'P', 'S', 'NY', 'NR', 'NC', 'OTHER']:
    df[c] = df[c].apply(lambda s: int(s))
df['COURSE'] = df['COURSE'].apply(lambda s: s if type(s) == 'str' else str(s))
df['COURSE'] = df['COURSE'].apply(lambda s: int(s[1:]) if ord(s[0]) > ord('9') else int(s))
# Change values for terms
df['TERM'] = df['TERM'].apply(lambda s: s if type(s) == 'str' else str(s))
df['TERM'] = df['TERM'].apply(lambda s: int(s[:4]) + 2)
df['SchYr'] = (df['TERM'] % 1000) // 10 
df['SchYr'] = df['SchYr'].apply(lambda x: x + 1900 if x > 50 else x + 2000)
df['Sem'] = df['TERM'] % 10 
df['TERM'] = df['TERM'] - 2
# Merge GPA grades into A, B, C, and D only
for c in ['A', 'B', 'C', 'D']:
    for sub in ['+', '-']:
        df[c] += df[c + sub]
# Merge unusual grades into 'OTHER'
for c in ['I','R','P','NY', 'NR', 'NC']:
    df['OTHER'] += df[c]
Grades = ['A','B','C','D','F','S','W','WX','OTHER']
Columns = ['TERM', 'SchYr', 'Sem', 'DEPARTMENT', 'COURSE'] + Grades
df = df[Columns]

In [3]:
def grade_dist_plot(dg, columns, title, filename):
    dg = dg.groupby('SchYr').sum()
    dg['Total'] = 0
    for c in columns:
        dg['Total'] += dg[c]
    for c in columns:
        dg[c] /= dg['Total']
    dg = dg[columns]
    ax = dg.plot.bar(stacked = True, figsize = (12,9))
    plt.title("Indiana University " + title + " Grade Distribution")
    plt.xlabel('School year ending in')
    
    # How to save a picture: https://www.kaggle.com/questions-and-answers/162231
    plt.savefig("IU_" + filename + 'grade_dist.jpg')

In [4]:
# Plot university-wide grade distribution
columns = ['A','B','C','D','S','F','W','WX','OTHER']
dg = df.copy()
dg = dg[dg['COURSE'] < 500]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "UG", "UG_")

In [5]:
# Plot grade distribution of specified departments
# Drop Department and COURSE
columns = ['A','B','C','D','S','F','W','WX','OTHER']
dg = df.copy()
dg = dg[(dg['COURSE'] < 500) & (dg['DEPARTMENT'] == 'MATH')]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "MATH UG", "math_ug_")

In [6]:
# Plot university-wide GPA grade distribution
# Drop Department and COURSE
columns = ['A','B','C','D','F']
dg = df.copy()
dg = dg[dg['COURSE'] < 500]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "UG GPA", "ug_gpa_")

In [7]:
# Plot grade distribution of specified departments
# Drop Department and COURSE
#dg = df[df['DEPARTMENT'] == 'MATH']
dg = df.copy()
columns = ['A','B','C','D','F']
dg = dg[dg['DEPARTMENT'] == 'MATH']
dg = dg[(dg['COURSE'] > 300) & (dg['COURSE'] < 500)]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "math UG UD GPA", "math_ug_ud_gpa_")

In [8]:
# Plot grade distribution of specified departments
# Drop Department and COURSE
#dg = df[df['DEPARTMENT'] == 'MATH']
dg = df.copy()
columns = ['A','B','C','D','F']
dg = dg[dg['DEPARTMENT'] == 'MATH']
dg = dg[dg['COURSE'] < 300]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "math UG LD GPA", "math_ug_ld_gpa_")

In [11]:
# Plot university-wide grade distribution
# Drop Department and COURSE

course = 119
columns = ['A','B','C','D','S','F','W','WX','OTHER']
dg = df[(df['COURSE'] == course) & (df['DEPARTMENT'] == 'MATH')]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "math " + str(course), "math_" + str(course) + "_")

In [13]:
# Plot university-wide grade distribution
# Drop Department and COURSE
columns = ['A','B','C','D','F','W']
dg = df[df['COURSE'] < 500]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "UG ABCDFW", "ug_abcdfw_")

In [15]:

# Plot university-wide grade distribution
# Drop Department and COURSE
columns = ['A','B','C','D','F','W']
dg = df.copy()
dg = dg[(dg['COURSE'] < 500) & (dg['DEPARTMENT'] == 'MATH')]
dg = dg[['SchYr'] + columns]
grade_dist_plot(dg, columns, "Math UG ABCDFW", "math_ug_abcdfw_")