<a href="https://colab.research.google.com/github/bodiman/mathmodeling/blob/main/GradeInflation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from matplotlib import pyplot as plt
import statistics
import numpy as np

In [2]:
# grades = ["A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F"]
# values = [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

# #These are our assumptions for class difficulties
# min_average = 5
# max_average = 11

# min_stdv = 0
# max_stdv = 5

In [95]:
grades = ["A", "B", "C", "D", "F"]
values = [4, 3, 2, 1, 0]

#These are our assumptions for class difficulties
min_average = 4
max_average = 2

min_stdv = 0
max_stdv = 1.5

In [20]:
"""
Generates mean and standard deviations for each class' grading curve


Parameters:

n_classes: number of classes to generate

min_average: the minimum average class grade

max_average: the maximum average class grade

min_stdv: the minimum class curve standard deviation

max_stdv: the maximum class curve standard deviation


Output:

classes: a list of shape (n_classes, 2) cointaining a mean and standard deviation for each class

"""
def gen_classes(n_classes, min_average, max_average, min_stdv, max_stdv):
  classes = []
  for k in range(n_classes):
    average_grades=np.random.uniform(min_average, max_average)
    standard_deviations=np.random.uniform(min_stdv, max_stdv)
    classes.append([average_grades,standard_deviations])
  return classes

classes = gen_classes(10, min_average, max_average, min_stdv, max_stdv)

In [4]:
"""
Generates an ordered list of student "intrinsic skill levels"

Parameters:

n_students: number of student skill levels to generate
"""
def generate_students(n_students):
  students = np.random.normal(0, 1, n_students).tolist()
  students.sort()
  return students

students = generate_students(100)

In [27]:
"""
Sorts students into classes


Parameters:

students: the sorted list of student skill levels

classes: a list of means and standard deviations for each class

n_classes: the number of classes each student is assigned to


Output:

class_package: a multidimensional list of shape (n_classes, 2, n_students)
                containing the strengths and indices of each student where
                n_students is the number of students in the particular class
"""

def generate_class_package(students, classes, n_classes):
  #for each student, assign n_classes classes from the classes indices
  student_classes = [[idx, students[idx]] + np.random.choice(np.array(list(range(len(classes)))), n_classes, replace = False).tolist() for idx in range(len(students))]

  class_package = [[[], []] for _ in range(len(classes))]

  for student in student_classes:
    for classidx in student[2:]:
      class_package[classidx][0].append(student[0])
      class_package[classidx][1].append(student[1])

  return class_package

In [28]:
class_packages = generate_class_package(students, classes, 5)

In [97]:
"""
Produces letter grades for students in a particular class


Parameters:

strengths: the "intrinsic skill levels" of each student

mean: the average numerical grade for a class

standard_deviation: the standard deviation of a class' grading curve


Output:

grade_performances: letter grades for students in a particular class
"""

def add_noise(strengths, mean, standard_deviation):
  mean_true = sum(strengths)/len(strengths)
  stdv_true = statistics.stdev(strengths)

  normalized_performances = [(score - mean_true)/stdv_true for score in strengths]

  #apply grading distribution
  distribution_performances = [score*standard_deviation + mean for score in normalized_performances]

  #quantize and clip at 0-12
  quantized_performances = [max(min(int(score), values[0]), values[-1]) for score in distribution_performances]
  grade_performances = [grades[values.index(score)] for score in quantized_performances]

  return grade_performances

In [52]:
def grade_class_package(class_package, class_distributions):
  graded_package = []

  for school_class, distribution in zip(class_package, class_distributions):
    idxs = school_class[0]
    scores = school_class[1]

    mean = distribution[0]
    stdv = distribution[1]

    letter_grades = add_noise(scores, mean, stdv)

    graded_package.append([idxs, letter_grades])

  return graded_package

In [59]:
def score_graded_package(graded_package):
  z_score_package = []

  for school_class in graded_package:
    idxs = school_class[0]
    letter_grades = school_class[1]

    z_scores = rank_students(letter_grades)

    z_score_package.append([idxs, z_scores])

  return z_score_package

In [33]:
"""
Takes a list of letter grades and produces z-scores for each class


Parameters:

classgrades: a list of letter grades for a particular class


Outputs:

z_scores: a list of z-scores grades for a particular class

"""

def rank_students(classgrades):
  scores = [values[grades.index(grade)] for grade in classgrades]
  mean = sum(scores)/len(scores)
  stdv = statistics.stdev(scores)

  z_scores = [(score - mean)/stdv for score in scores]

  return z_scores


In [62]:
def compute_total_scores(z_score_package, n_students):
  student_final_scores = [0 for _ in range(n_students)]

  for school_class in z_score_package:
    indices, scores = school_class

    for idx, score in zip(indices, scores):
      student_final_scores[idx] += score

  return student_final_scores

In [93]:
def compute_total_scores_standard(graded_package, n_students):
  student_final_scores = [0 for _ in range(n_students)]

  for school_class in graded_package:
    indices, letter_grades = school_class

    for idx, letter_grade in zip(indices, letter_grades):
      student_final_scores[idx] += values[grades.index(letter_grade)]

  return student_final_scores

In [80]:
def compute_decile_retention(student_scores, n):
  top_scorers = sorted(range(len(student_scores)), key=lambda i: student_scores[i], reverse=True)[:len(student_scores) // n]
  best_students = list(range(len(student_scores)))[-len(student_scores) // n:]

  return len(set(top_scorers).intersection(set(best_students)))/(len(student_scores) // n)


In [98]:
retention_total = 0
num_samples = 0

for _ in range(1000):
  try:
    #generate students and classes
    students = generate_students(100)
    classes = gen_classes(10, min_average, max_average, min_stdv, max_stdv)

    #randomly assign students to classes
    class_packages = generate_class_package(students, classes, 5)

    #grade students
    graded_packages = grade_class_package(class_packages, classes)

    #z-score students for each class

    z_score_package = score_graded_package(graded_packages)

  except ZeroDivisionError:
    continue

  #take the sum of the z-scores
  final_scores = compute_total_scores(z_score_package, 100)

  #see how many students from the top 10% are in the actual top 10%

  retention_total += compute_decile_retention(final_scores, 10)
  num_samples += 1

retention_rate = retention_total/num_samples
print(f"Retention rate across {num_samples} samples: {retention_rate}")

Retention rate across 453 samples: 0.7699779249448131


In [99]:
retention_total = 0
num_samples = 0

for _ in range(1000):
  try:
    #generate students and classes
    students = generate_students(100)
    classes = gen_classes(10, min_average, max_average, min_stdv, max_stdv)

    #randomly assign students to classes
    class_packages = generate_class_package(students, classes, 5)

    #grade students
    graded_packages = grade_class_package(class_packages, classes)

    #score students by gpa
    standard_scored_package = score_graded_package(graded_packages)

  except ZeroDivisionError:
    continue

  #take the sum of the z-scores
  final_scores = compute_total_scores(standard_scored_package, 100)

  #see how many students from the top 10% are in the actual top 10%

  retention_total += compute_decile_retention(final_scores, 10)
  num_samples += 1

retention_rate = retention_total/num_samples
print(f"Retention rate across {num_samples} samples: {retention_rate}")

Retention rate across 469 samples: 0.7744136460554367


In [None]:
rank_students(["A+", "A+", "A", "A", "A-", "B+",])

[1.0614455552060436,
 1.0614455552060436,
 0.0816496580927723,
 0.0816496580927723,
 -0.8981462390204988,
 -1.3880441875771345]

In [None]:
"""
Algorithm:

1. Translate Grades to numbers
2. Find the mean and standard deviation of grades
3. Give each student a score of their z score
4. Rank students by average z score


"""

'\nAlgorithm:\n\n1. Translate Grades to numbers\n2. Find the mean and standard deviation of grades\n3. Give each student a score of their z score\n4. Rank students by average z score\n\n\n'