In [53]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [17]:
import random
import string

def get_random_string(prefix, length):
    letters = string.ascii_lowercase
    result_str = ''.join(random.choice(letters) for i in range(length))
    return prefix + "-" + result_str

# Simulating school data

The idea is that we want to be able to identify statistical anomalies in high school to make interventions and improve student outcomes. Possible anomalies are 
- individual student performance changes
- teacher's performance deviating from average
- curriculum being taught poorly

## Available data
The available data looks like
- individual student
 - list of classes
 - list of test scores in those classes
- individual teacher
 - list of classes
- class
 - list of students in that class
 - teacher who teaches the class
 - time of the class
 - scores of each student in that class
- score
 - class (uid)
 - score (int)
 - student (uid)

In [111]:
# create fake student
class Student:
    def __init__(self, name = ''):
        self.name = name
        self.id = get_random_string('s', 6)
        self.grades = defaultdict(list)
        self.courses = []
    
    def show(self):
        print("--- {} ---".format(self.id))
        print("grades: {}".format(self.grades))
        print("courses: {}".format([ c.id for c in self.courses ]))
        print("\n")
        
    def addCourse(self, course):
        self.courses += [course]
        
    def addGrade(self, courseId, assignmentId, assignmentType, grade, time):
        self.grades[courseId] += [(assignmentId, assignmentType, grade, time)]

In [112]:
# creating a fake course
class Course:
    def __init__(self, name = '', slot = 0 ):
        self.name = name
        self.slot = slot
        self.id = get_random_string('c', 5)
        self.assignments = []
        self.teacher = None
        self.students = []
        
    def setStudents(self, students):
        self.students = students
        for student in students:
            student.addCourse(self)
            
    def setTeacher(self, teacher):
        self.teacher = teacher
        teacher.addCourse(self)
        
    def createAssignment(self, assignmentType, time):
        assignmentId = get_random_string('a', 5)
        self.assignments += [(assignmentId, assignmentType, time)]
        for student in self.students:
            grade = random.randint(0, 100)
            student.addGrade(self.id, assignmentId, assignmentType, grade, time)
    
    def show(self):
        print("--- {} ---".format(self.id))
        print("teacher: {}".format(self.teacher.id))
        print("students: {}".format([s.id for s in self.students]))
        print("assignments: {}".format([a.id for a in self.assignments]))
        print("\n")

In [113]:
class Teacher:
    def __init__(self, name = ''):
        self.name = name
        self.id = get_random_string('t', 5)
        self.courses = []
        
    def addCourse(self, course):
        self.courses += [course]
        
    def show(self):
        print("--- {} ---".format(self.id))
        print("courses: {}".format(self.courses))
        print("total number of students: {}".format(sum([len(course.students) for course in self.courses])))
        print("\n")

In [114]:
students = [Student() for i in range(10)]

In [115]:
history = Course('history')
history.setStudents(students)

In [116]:
students[0].show()

--- s-kjbhgn ---
grades: defaultdict(<class 'list'>, {})
courses: ['c-lcduj']




In [117]:
history.createAssignment('test', 100)

In [118]:
students[0].show()

--- s-kjbhgn ---
grades: defaultdict(<class 'list'>, {'c-lcduj': [('a-cakex', 'test', 93, 100)]})
courses: ['c-lcduj']




## Scaling out

In [119]:
# instantiate all the courses and students at 
# the beginning of the school year (t = 0)

students = [Student() for i in range(100)] # school has 100 students
teachers = [Teacher() for i in range(10)] # school has 10 teachers

courses = []
for teacher in teachers:
    # get number of courses taught by a teacher
    numberOfCourses = random.randint(1, 5)
    
    for i in range(numberOfCourses):
        # get a random number of students in a course
        numberOfStudentsInCourse = random.randint(10, 25)
        # sample students
        studentsInCourse = random.sample(students, numberOfStudentsInCourse)

        # create the course
        c = Course()
        c.setTeacher(teacher)
        c.setStudents(studentsInCourse)
        
        courses += [c]

In [120]:
students[0].show()
teachers[0].show()
courses[0].show()

--- s-yiusbo ---
grades: defaultdict(<class 'list'>, {})
courses: ['c-mimuq', 'c-ebxih', 'c-cjgsk', 'c-hlczn']


--- t-kabgy ---
courses: [<__main__.Course object at 0x11c1f0710>, <__main__.Course object at 0x11c1f0780>, <__main__.Course object at 0x11c1f07f0>, <__main__.Course object at 0x11c1f0860>]
total number of students: 61


--- c-liztz ---
teacher: t-kabgy
students: ['s-fdgxmx', 's-ngocse', 's-xnboam', 's-yksswq', 's-vrvblp', 's-ujmfds', 's-iuvgje', 's-kndyxe', 's-dmvmtv', 's-akwltt', 's-qivxwy', 's-uoaolh', 's-yyngnw', 's-uqpbjc']
assignments: []


