# Test Data Generation: Base-Truth Tables Class

This OEA test data generation class notebook generates the base-truth tables, which are then used to generate test datasets for modules (or other purposes); this notebook is needed to successfully run the base_test_data_gen_demo notebook.

This class notebook primarily leans on the use of the OEA_py class notebook, Faker and random-address python packages to generate 5 base-truth tables:
 1. **Students**,
 2. **Schools**,
 3. **Courses**: general classes offered,
 4. **Sections**: instructor-specific classes offered, and
 5. **Enrollment**: students enrollment-assignment in specfic sections.

This notebook defines and uses 1 main function, and 9 helper methods. Below describes the main function:
 - **gen_base_tables(numstudents, numschools, numenroll, hed_numcourses, ed_level)**: Generates base-truth tables mentioned above. Accepts user-defined number of students, schools, courses and desired education-level of students. Accepted **ed_level** values are:
    * ```hed``` - generates higher education schools (such as colleges), courses, sections, and higher ed. students with class enrollment.
    * ```k12``` - generates K-12 schools (elementary, middle, and high schools), courses, sections, and K-12 students with class enrollment.
    * Under-development:
        * ```prek``` - for generating pre-kindergarten base-truth tables.
 - **Current Function Notes and Constraints**: The gen_base_tables function currently has some additional notes and constraints, as described below.
    * ```k12``` - *numschools*: min of 4. *numenroll*: max of 21.
    * ```hed``` - *numschools*: max of 3. *numenroll*: max of 21. *hed_numcourses*: max of 21.

In [1]:
import logging
import random
from tokenize import Ignore, String
from faker import Faker
import pandas as pd
import datetime as dt
from datetime import date
import numpy as np
from pyparsing import nums
from torch import real
import random_address

class TestDataGen_BaseTables():
    def __init__(self, source_folder='test_data'):
        
        # set current datetime for rundate folder
        currentDate = dt.datetime.now()
        self.currentDateTime = currentDate.strftime("%Y-%m-%d %H-%M-%S")

        self.faker = Faker('en_US')

        students = {
            'Gender': [],
            'FirstName': [],
            'MiddleName': [],
            'LastName': [],
            'StudentID': [],
            'Birthday': [],
            'SchoolName': [],
            'SchoolID': [],
            'SchoolType':[],
            'Grade': [],
            'Performance': [],
            'HispanicLatino': [],
            'Race': [],
            'Flag': [],
            'Email': [],
            'Phone': [],
            'Address': [],
            'City': [],
            'State': [],
            'Zipcode': []
        }
        self.students = pd.DataFrame(students, dtype=object)

        schools = {
            'SchoolName':[],
            'SchoolID':[],
            'SchoolType':[]
        }
        self.schools = pd.DataFrame(schools, dtype=object)

        courses = {
            'CourseName':[],
            'CourseID':[],
            'SchoolName':[],
            'SchoolID':[],
            'SchoolType':[],
            'CourseSubject':[],
            'CourseGradeLevel':[]
        }
        self.courses = pd.DataFrame(courses, dtype=object)

        sections = {
            'SectionName':[],
            'SectionID':[],
            'CourseName':[],
            'CourseID':[],
            'SchoolName':[],
            'SchoolID':[],
            'SchoolType':[],
            'SectionSubject':[],
            'SectionGradeLevel':[]
        }
        self.sections = pd.DataFrame(sections, dtype=object)

        enrollment = {
            'StudentName':[],
            'StudentID':[],
            'SectionName':[],
            'SectionID':[],
            'CourseName':[],
            'CourseID':[],
            'CourseGradeLevel':[],
            'SchoolName':[],
            'SchoolID':[]
        }
        self.enrollment = pd.DataFrame(enrollment, dtype=object)

    def gen_base_tables(self, numstudents, numschools, numenroll, hed_numcourses, ed_level='k12'):
        if (ed_level != 'k12') & (ed_level != 'hed'):
            logger.info('Unable to generate tables from ed_level input - please choose k12 or hed')
        elif (ed_level == 'k12') & (numschools <= 3):
            logger.info('Unable to generate less than 3 schools for K-12 generation - please input a number more than 3 for numschools')
        elif (ed_level == 'hed') & (numschools > 3):
            logger.info('Unable to generate more than 3 schools for Higher Ed. generation - please either edit functions or input a number less than 4 for numschools')
        else:
            self.edlevel = ed_level
            self._gen_students(numstudents)
            self._gen_schools(numschools)
            self._assign_schools()
            self._gen_courses(hed_numcourses)
            self._gen_sections()
            self._gen_enrollment(numenroll)
            dfStudents = spark.createDataFrame(self.students)
            dfSchools = spark.createDataFrame(self.schools)
            dfCourses = spark.createDataFrame(self.courses)
            dfSections = spark.createDataFrame(self.sections)
            dfEnrollment = spark.createDataFrame(self.enrollment)
            # NOTE: type of batch data can be updated as needed
            dfStudents.coalesce(1).write.save(oea.to_url('stage1/Transactional/test_data/v0.1/base_students/snapshot_batch_data/rundate='+self.currentDateTime), format='csv', mode='overwrite', header='true', mergeSchema='true')
            dfSchools.coalesce(1).write.save(oea.to_url('stage1/Transactional/test_data/v0.1/base_schools/snapshot_batch_data/rundate='+self.currentDateTime), format='csv', mode='overwrite', header='true', mergeSchema='true')
            dfCourses.coalesce(1).write.save(oea.to_url('stage1/Transactional/test_data/v0.1/base_courses/snapshot_batch_data/rundate='+self.currentDateTime), format='csv', mode='overwrite', header='true', mergeSchema='true')
            dfSections.coalesce(1).write.save(oea.to_url('stage1/Transactional/test_data/v0.1/base_sections/snapshot_batch_data/rundate='+self.currentDateTime), format='csv', mode='overwrite', header='true', mergeSchema='true')
            dfEnrollment.coalesce(1).write.save(oea.to_url('stage1/Transactional/test_data/v0.1/base_enrollment/snapshot_batch_data/rundate='+self.currentDateTime), format='csv', mode='overwrite', header='true', mergeSchema='true')

    # helper methods

    def __get_age(self, born):
        today = date.today()
        return today.year - born.year - ((today.month, today.day) < (born.month, born.day))

    def __gen_student(self, k12_edlevel='NA'):
        gender = random.choices(['M','F','O'],weights=[0.47,0.5,0.03])
        if gender == ['M']:
            firstname = self.faker.first_name_male()
            middlename = self.faker.first_name_male()
        elif gender == ['F']: 
            firstname = self.faker.first_name_female()
            middlename = self.faker.first_name_female()
        elif gender == ['O']:
            firstname = self.faker.first_name_nonbinary()
            middlename = self.faker.first_name_nonbinary()
        lastname = self.faker.last_name()
        studentid = self.faker.uuid4()
        if k12_edlevel == 'high':
            birthday = self.faker.date_of_birth(minimum_age=15,maximum_age=18)
        elif k12_edlevel == 'middle':
            birthday = self.faker.date_of_birth(minimum_age=12,maximum_age=14)
        elif k12_edlevel == 'elementary':
            birthday = self.faker.date_of_birth(minimum_age=6,maximum_age=11)
        elif k12_edlevel == 'prek':
            birthday = self.faker.date_of_birth(minimum_age=2,maximum_age=5)
        elif k12_edlevel == 'NA':
            birthday = self.faker.date_of_birth(minimum_age=18,maximum_age=22)
        # education
        school = ''
        schoolid = ''
        if k12_edlevel == 'high':
            schooltype = 'High School'
        elif k12_edlevel == 'middle':
            schooltype = 'Middle School'
        elif k12_edlevel == 'elementary':
            schooltype = 'Elementary School'
        elif k12_edlevel == 'NA':
            schooltype = 'College'
        # demographics
        if self.edlevel == 'k12':
            if (self.__get_age(birthday) - 6) == 0:
                grade = 'KG'
            else:
                grade = self.__get_age(birthday) - 6
                grade = str(grade)
        elif self.edlevel == 'hed':
            grade = random.choices(['undergraduate: year 1','undergraduate: year 2','undergraduate: year 3', 'undergraduate: year 4', 'graduate: year 1'], weights=[0.3,0.4,0.15,0.1,0.05])
        performance = random.choices(['high','avg','low'], weights=[0.3,0.6,0.1])
        hispaniclatino = random.choices(['True','False'], weights=[0.189,0.811])
        race = random.choices(['white','blackafricanamerican','americanindianalaskanative','asian','nativehawaiianpacificislander','twoormoreraces'], weights=[0.708,0.149,0.013,0.061,0.009,0.06])
        flag = random.choices(['','FreeLunch','ReducedLunch','Homeless'],weights=[0.78,0.1,0.1,0.02])
        # contact
        email = f'{firstname}{lastname}@contoso.edu'
        phone = self.faker.phone_number()
        address = random_address.real_random_address_by_state('CA')
        try:
            city = address['city']
        except KeyError:
            city = 'Quantico'
        state = address['state']
        zipcode = address['postalCode']
        address = address['address1']
        if self.edlevel == 'k12':
            self.students.loc[len(self.students.index)] = [gender[0], firstname, middlename, lastname, studentid, birthday, school, \
            schoolid, schooltype, grade, performance[0], hispaniclatino[0], race[0], flag[0], email, phone, address, city, state, zipcode]
        elif self.edlevel == 'hed':
            self.students.loc[len(self.students.index)] = [gender[0], firstname, middlename, lastname, studentid, birthday, school, \
            schoolid, schooltype, grade[0], performance[0], hispaniclatino[0], race[0], flag[0], email, phone, address, city, state, zipcode]

    def _gen_students(self, numstudents=100):
        if self.edlevel == 'k12':
            while numstudents > 0:
                ran_student_ed_level = random.choices(['high','middle','elementary'], weights=[0.34,0.33,0.33])
                self.__gen_student(k12_edlevel=ran_student_ed_level[0])
                numstudents = numstudents - 1
        else:
            while numstudents > 0:
                self.__gen_student(k12_edlevel='NA')
                numstudents = numstudents - 1

    def __gen_k12_school(self, k12_edlevel='high'):
        if k12_edlevel == 'high':
            schoolname = f'{self.faker.last_name()} High'
            schooltype = 'High School'
        elif k12_edlevel == 'middle':
            schoolname = f'{self.faker.last_name()} Middle'
            schooltype = 'Middle School'
        elif k12_edlevel == 'elementary':
            schoolname = f'{self.faker.last_name()} Elementary'
            schooltype = 'Elementary School'
        schoolid = self.faker.uuid4()
        self.schools.loc[len(self.schools.index)] = [schoolname, schoolid,schooltype]

    def _gen_schools(self, numschools=3):
        if self.edlevel == 'hed':
            # NOTE: The commented-options array contains future options. The uncommented-options array contains options currently built and supported
            #options = ['School of Fine Arts', 'School of Science and Health', 'School of Business', 'School of Music', 'School of Engineering', 'School of Information Technology', 'School of Culinary Arts']
            if numschools == 3:
                schools = ['School of Fine Arts', 'School of Science and Health', 'School of Business']
                for school in schools:
                    schoolname = school
                    schoolid = self.faker.uuid4()
                    schooltype = 'College'
                    self.schools.loc[len(self.schools.index)] = [schoolname,schoolid,schooltype]
            elif numschools < 3:
                options = ['School of Fine Arts', 'School of Science and Health', 'School of Business']
                schools = random.sample(options,numschools)
                for school in schools:
                    schoolname = school
                    schoolid = self.faker.uuid4()
                    schooltype = 'College'
                    self.schools.loc[len(self.schools.index)] = [schoolname,schoolid,schooltype]
            else:
                logger.info('Unable to generate more than 3 different higher ed. schools - either define functions for more schools or choose numschool to be < 4')
        elif (self.edlevel == 'k12') & (numschools < 4):
            logger.info('Unable to generate less than 4 different K-12 schools - either edit functions or choose numschool to be > 3.')
        else:
            # K-12 school gen: start by generating at least one of each elementary, middle, and high school
            self.__gen_k12_school(k12_edlevel='high')
            self.__gen_k12_school(k12_edlevel='middle')
            self.__gen_k12_school(k12_edlevel='elementary')
            numschools = numschools - 3
            while numschools > 0:
                # randomize K-12 school generation for the rest of the number of schools chosen
                ran_schooltype = random.choices(['high','middle','elementary'], weights=[0.34,0.33,0.33])
                self.__gen_k12_school(ran_schooltype[0])
                numschools = numschools - 1

    def _assign_schools(self):
        i = len(self.students.index) - 1
        while i >= 0:
            # if generating K-12 data: subset schools df for specific school/education type the student belongs to
            if self.edlevel == 'k12':
                hs_df = self.schools[self.schools['SchoolType'] == 'High School'].copy()
                ms_df = self.schools[self.schools['SchoolType'] == 'Middle School'].copy()
                es_df = self.schools[self.schools['SchoolType'] == 'Elementary School'].copy()
                hs_df.reset_index(drop=True, inplace=True)
                ms_df.reset_index(drop=True, inplace=True)
                es_df.reset_index(drop=True, inplace=True)
                if self.students.at[i,'SchoolType'] == 'High School':
                    school = random.randint(0, len(hs_df.index) - 1)
                    self.students.at[i, 'SchoolName'] = hs_df.at[school, 'SchoolName']
                    self.students.at[i, 'SchoolID'] = hs_df.at[school, 'SchoolID']
                    i = i - 1
                elif self.students.at[i,'SchoolType'] == 'Middle School':
                    school = random.randint(0, len(ms_df.index) - 1)
                    self.students.at[i, 'SchoolName'] = ms_df.at[school, 'SchoolName']
                    self.students.at[i, 'SchoolID'] = ms_df.at[school, 'SchoolID']
                    i = i - 1
                elif self.students.at[i,'SchoolType'] == 'Elementary School':
                    school = random.randint(0, len(es_df.index) - 1)
                    self.students.at[i, 'SchoolName'] = es_df.at[school, 'SchoolName']
                    self.students.at[i, 'SchoolID'] = es_df.at[school, 'SchoolID']
                    i = i - 1
            else:
                school = random.randint(0, len(self.schools.index) - 1)
                self.students.at[i, 'SchoolName'] = self.schools.at[school, 'SchoolName']
                self.students.at[i, 'SchoolID'] = self.schools.at[school, 'SchoolID']
                i = i - 1
    
    def _gen_courses(self, numcourses=6):
        # high school courses: options, grades, and subjects array
        options_high = [
            'Trigonomotry',
            'Pre-Calculus',
            'Calculus',
            'Choir',
            'Band',
            'Orchestra',
            'Reading',
            'English',
            'World Literature',
            'Astronomy',
            'Biology',
            'Chemistry',
            'Physics',
            'Physical Education',
            'Health',
            'Pottery',
            'Art',
            'Theatre',
            'Computer Science',
            'US History',
            'World History'
        ]
        grades_high = {
            'Trigonomotry':'0',
            'Pre-Calculus':'11',
            'Calculus':'12',
            'Choir':'0',
            'Band':'0',
            'Orchestra':'0',
            'Reading':'0',
            'English':'0',
            'World Literature':'11',
            'Astronomy':'9',
            'Biology':'10',
            'Chemistry':'11',
            'Physics':'12',
            'Physical Education':'0',
            'Health':'0',
            'Pottery':'0',
            'Art':'0',
            'Theatre':'0',
            'Computer Science':'12',
            'US History':'10',
            'World History':'11'
        }
        subjects_high = {
            'Trigonomotry':'Mathematics',
            'Pre-Calculus':'Mathematics',
            'Calculus':'Mathematics',
            'Choir':'Visual and Performing Arts',
            'Band':'Visual and Performing Arts',
            'Orchestra':'Visual and Performing Arts',
            'Reading':'English Language and Literature',
            'English':'English Language and Literature',
            'World Literature':'English Language and Literature',
            'Astronomy':'Life and Physical Sciences',
            'Biology':'Life and Physical Sciences',
            'Chemistry':'Life and Physical Sciences',
            'Physics':'Life and Physical Sciences',
            'Physical Education':'Physical Health and Safety Education',
            'Health':'Physical Health and Safety Education',
            'Pottery':'Visual and Performing Arts',
            'Art':'Visual and Performing Arts',
            'Theatre':'Visual and Performing Arts',
            'Computer Science':'Information Technology',
            'US History':'Social Sciences and History',
            'World History':'Social Sciences and History'
        }
        # middle school courses: options, grades, and subjects array
        options_mid = [
            'Remedial Math',
            'Pre-Algebra',
            'Algebra 1',
            'Geometry',
            'Algebra 2',
            'Choir',
            'Band',
            'Orchestra',
            'ELA',
            'Advanced ELA',
            'World Literature',
            'Physical Science',
            'Earth Science',
            'Engineering Sciences',
            'Computer Science',
            'Physical Education',
            'Health',
            'Technical Education',
            'Home Economics',
            'Art',
            'Theatre',
            'US History',
            'World History',
            'Ancient History'
        ]
        grades_mid = {
            'Remedial Math':'0',
            'Pre-Algebra':'6',
            'Algebra 1':'7',
            'Geometry':'8',
            'Algebra 2':'8',
            'Choir':'0',
            'Band':'0',
            'Orchestra':'0',
            'ELA':'0',
            'Advanced ELA':'0',
            'World Literature':'7',
            'Physical Science':'6',
            'Earth Science':'7',
            'Engineering Sciences':'8',
            'Computer Science':'8',
            'Physical Education':'0',
            'Health':'0',
            'Technical Education':'7',
            'Home Economics':'7',
            'Art':'0',
            'Theatre':'0',
            'US History':'7',
            'World History':'6',
            'Ancient History':'8'
        }
        subjects_mid = {
            'Remedial Math':'Mathematics',
            'Pre-Algebra':'Mathematics',
            'Algebra 1':'Mathematics',
            'Geometry':'Mathematics',
            'Algebra 2':'Mathematics',
            'Choir':'Visual and Performing Arts',
            'Band':'Visual and Performing Arts',
            'Orchestra':'Visual and Performing Arts',
            'ELA':'English Language Arts and Literature',
            'Advanced ELA':'English Language Arts and Literature',
            'World Literature':'English Language Arts and Literature',
            'Physical Science':'Sciences',
            'Earth Science':'Sciences',
            'Engineering Sciences':'Sciences',
            'Computer Science':'Sciences',
            'Physical Education':'Physical Health and Safety Education',
            'Health':'Physical Health and Safety Education',
            'Technical Education':'Industrial Education',
            'Home Economics':'Industrial Education',
            'Art':'Visual and Performing Arts',
            'Theatre':'Visual and Performing Arts',
            'US History':'Social Sciences and History',
            'World History':'Social Sciences and History',
            'Ancient History':'Social Sciences and History'
        }
        # elementary school courses: options, grades, and subjects array
        options_ele = [
            'Kindergarten Class',
            '1st Grade Class',
            '2nd Grade Class',
            '3rd Grade Class',
            '4th Grade Class',
            '5th Grade Class'
        ]
        grades_ele = {
            'Kindergarten Class':'KG',
            '1st Grade Class':'1',
            '2nd Grade Class':'2',
            '3rd Grade Class':'3',
            '4th Grade Class':'4',
            '5th Grade Class':'5'
        }
        subjects_ele = {
            'Kindergarten Class':'General Elementary Education',
            '1st Grade Class':'General Elementary Education',
            '2nd Grade Class':'General Elementary Education',
            '3rd Grade Class':'General Elementary Education',
            '3rd Grade Class':'General Elementary Education',
            '4th Grade Class':'General Elementary Education',
            '5th Grade Class':'General Elementary Education'
        }
        # hed school courses: options, grades, and subjects array
        # art school
        options_hed_art = [
            'Art Appreciation',
            'Dance Appreciation',
            'Drawing Foundations',
            'Painting Media',
            'Intro to Digital Art',
            'Graphic Design',
            'Ceramics',
            'Intro to Sculpture',
            'Intro to Ceramics',
            'Intro to Metalsmithing',
            'Intro to Blacksmithing',
            'Drawing',
            'Photography and Imaging',
            'World Art',
            'Color Theory',
            'Themes of Contemporary Art',
            'Practice of Art Therapy',
            'Documentary Strategies',
            'Portraiture in Photography',
            'Methods in Art Education',
            'Art Seminar'
        ]
        grades_hed_art = {
            'Art Appreciation':'general education',
            'Dance Appreciation':'general education',
            'Drawing Foundations':'undergraduate: year 1',
            'Painting Media':'undergraduate: year 2',
            'Intro to Digital Art':'undergraduate: year 2',
            'Graphic Design':'undergraduate: year 3',
            'Ceramics':'undergraduate: year 3',
            'Intro to Sculpture':'undergraduate: year 2',
            'Intro to Ceramics':'undergraduate: year 2',
            'Intro to Metalsmithing':'undergraduate: year 2',
            'Intro to Blacksmithing':'undergraduate: year 2',
            'Drawing':'undergraduate: year 3',
            'Photography and Imaging':'undergraduate: year 3',
            'World Art':'undergraduate: year 3',
            'Color Theory':'graduate: year 1',
            'Themes of Contemporary Art':'undergraduate: year 4',
            'Practice of Art Therapy':'undergraduate: year 4',
            'Documentary Strategies':'graduate: year 1',
            'Portraiture in Photography':'undergraduate: year 4',
            'Methods in Art Education':'undergraduate: year 4',
            'Art Seminar':'general education'
        }
        subjects_hed_art = {
            'Art Appreciation':'General Art Knowledge',
            'Dance Appreciation':'General Art Knowledge',
            'Drawing Foundations':'Visual Art',
            'Painting Media':'Visual Art',
            'Intro to Digital Art':'Digital Art',
            'Graphic Design':'Digital Art',
            'Ceramics':'Sculpture Art',
            'Intro to Sculpture':'Sculpture Art',
            'Intro to Ceramics':'Sculpture Art',
            'Intro to Metalsmithing':'Sculpture Art',
            'Intro to Blacksmithing':'Sculpture Art',
            'Drawing':'Visual Art',
            'Photography and Imaging':'Photographic Art',
            'World Art':'Theory of Art',
            'Color Theory':'Theory of Art',
            'Themes of Contemporary Art':'Theory of Art',
            'Practice of Art Therapy':'Theory of Art Education',
            'Documentary Strategies':'Performance Art',
            'Portraiture in Photography':'Photographic Art',
            'Methods in Art Education':'Theory of Art Education',
            'Art Seminar':'General Art Knowledge'
        }
        # hed school courses: options, grades, and subjects array
        # science and health school
        options_hed_sah = [
            'Intro to Physics',
            'Intro to Chemistry',
            'Intro to Biology',
            'Foundations of Advanced Math',
            'Logic',
            'Classical Mechanics',
            'Organic Chemistry',
            'Anatomy and Physiology',
            'Differential Equations',
            'Intro to Philosophy',
            'Quantum Physics',
            'Analytical Chemistry',
            'Genetics',
            'Intro to Topology',
            'Western Philosophies',
            'General Relativity',
            'Advanced Physical Chemistry',
            'Neurobiology',
            'Symplectic Manifolds',
            'Metaphysics and Epistemology',
            'Field Research'
        ]
        grades_hed_sah = {
            'Intro to Physics':'undergraduate: year 1',
            'Intro to Chemistry':'undergraduate: year 1',
            'Intro to Biology':'undergraduate: year 1',
            'Foundations of Advanced Math':'undergraduate: year 2',
            'Logic':'undergraduate: year 1',
            'Classical Mechanics':'undergraduate: year 3',
            'Organic Chemistry':'undergraduate: year 3',
            'Anatomy and Physiology':'undergraduate: year 2',
            'Differential Equations':'undergraduate: year 3',
            'Intro to Philosophy':'undergraduate: year 2',
            'Quantum Physics':'undergraduate: year 4',
            'Analytical Chemistry':'undergraduate: year 3',
            'Genetics':'undergraduate: year 4',
            'Intro to Topology':'undergraduate: year 4',
            'Western Philosophies':'undergraduate: year 3',
            'General Relativity':'graduate: year 1',
            'Advanced Physical Chemistry':'undergraduate: year 4',
            'Neurobiology':'graduate: year 1',
            'Symplectic Manifolds':'graduate: year 2',
            'Metaphysics and Epistemology':'graduate: year 1',
            'Field Research':'general education'
        }
        subjects_hed_sah = {
            'Intro to Physics':'Physics',
            'Intro to Chemistry':'Chemistry',
            'Intro to Biology':'Biology',
            'Foundations of Advanced Math':'Mathematics',
            'Logic':'Philosophy',
            'Classical Mechanics':'Physics',
            'Organic Chemistry':'Chemistry',
            'Anatomy and Physiology':'Biology',
            'Differential Equations':'Mathematics',
            'Intro to Philosophy':'Philosophy',
            'Quantum Physics':'Physics',
            'Analytical Chemistry':'Chemistry',
            'Genetics':'Biology',
            'Intro to Topology':'Mathematics',
            'Western Philosophies':'Philosophy',
            'General Relativity':'Advanced Physics',
            'Advanced Physical Chemistry':'Advanced Chemistry',
            'Neurobiology':'Advanced Biology',
            'Symplectic Manifolds':'Advanced Mathematics',
            'Metaphysics and Epistemology':'Advanced Philosophy',
            'Field Research':'General Applied Knowledge'
        }
        # hed school courses: options, grades, and subjects array
        # business school
        options_hed_bus = [
            'Business Communication',
            'Organizational Behavior',
            'Intro to Management',
            'Principles of Sustainable Business',
            'International Management',
            'Human Resource Management',
            'Compensation and Benefits Admin',
            'Production and Operations Management',
            'Management Science',
            'Project Management',
            'Healthcare Analytics Management',
            'Leadership and Team Development',
            'Entrepreneurship',
            'Social Entrepreneurship',
            'Cross-Cultural Management',
            'Business, Labor, and Human Rights',
            'Employee Training and Development',
            'Collective Bargaining',
            'Global Supply Chain Management',
            'Green Operations Management'
        ]
        grades_hed_bus = {
            'Business Communication':'undergraduate: year 1',
            'Organizational Behavior':'undergraduate: year 1',
            'Intro to Management':'undergraduate: year 1',
            'Principles of Sustainable Business':'undergraduate: year 3',
            'International Management':'undergraduate: year 3',
            'Human Resource Management':'undergraduate: year 2',
            'Compensation and Benefits Admin':'undergraduate: year 2',
            'Production and Operations Management':'undergraduate: year 4',
            'Management Science':'undergraduate: year 2',
            'Project Management':'undergraduate: year 3',
            'Healthcare Analytics Management':'undergraduate: year 3',
            'Leadership and Team Development':'undergraduate: year 4',
            'Entrepreneurship':'undergraduate: year 2',
            'Social Entrepreneurship':'undergraduate: year 3',
            'Cross-Cultural Management':'undergraduate: year 4',
            'Business, Labor, and Human Rights':'undergraduate: year 2',
            'Employee Training and Development':'undergraduate: year 4',
            'Collective Bargaining':'graduate: year 1',
            'Global Supply Chain Management':'graduate: year 1',
            'Green Operations Management':'undergraduate: year 3'
        }
        subjects_hed_bus = {
            'Business Communication':'Communication & Behavior',
            'Organizational Behavior':'Communication & Behavior',
            'Intro to Management':'Theory of Management',
            'Principles of Sustainable Business':'Theory of Business',
            'International Management':'Applied Management',
            'Human Resource Management':'Applied Management',
            'Compensation and Benefits Admin':'Theory of Business',
            'Production and Operations Management':'Theory of Management',
            'Management Science':'Theory of Management',
            'Project Management':'Applied Management',
            'Healthcare Analytics Management':'Applied Management',
            'Leadership and Team Development':'Theory of Business',
            'Entrepreneurship':'Theory of Entrepreneuring',
            'Social Entrepreneurship':'Theory of Entrepreneuring',
            'Cross-Cultural Management':'Theory of Management',
            'Business, Labor, and Human Rights':'Theory of Business',
            'Employee Training and Development':'Applied Management',
            'Collective Bargaining':'Theory of Business',
            'Global Supply Chain Management':'Applied Management',
            'Green Operations Management':'Applied Management'
        }
        for index, school in self.schools.iterrows():
            # K-12 courses generation: each course listed is generated to be 
            if self.edlevel == 'k12':
                if school['SchoolType'] == 'High School':
                    courses = options_high
                    grades = grades_high
                    subjects = subjects_high
                elif school['SchoolType'] == 'Middle School':
                    courses = options_mid
                    grades = grades_mid
                    subjects = subjects_mid
                elif school['SchoolType'] == 'Elementary School':
                    courses = options_ele
                    grades = grades_ele
                    subjects = subjects_ele
            elif self.edlevel == 'hed':
                if school['SchoolName'] == 'School of Fine Arts':
                    courses = random.sample(options_hed_art,numcourses)
                    grades = grades_hed_art
                    subjects = subjects_hed_art
                elif school['SchoolName'] == 'School of Science and Health':
                    courses = random.sample(options_hed_sah,numcourses)
                    grades = grades_hed_sah
                    subjects = subjects_hed_sah
                elif school['SchoolName'] == 'School of Business':
                    courses = random.sample(options_hed_bus,numcourses)
                    grades = grades_hed_bus
                    subjects = subjects_hed_bus
                else:
                    logger.info('No defined courses for the higher ed. school generated - either add to _gen_courses function or limit scope of school options')
            else:
                logger.info('ed_level parameter input not recognized - choose to either input high, middle, or hed')
            for course in courses:
                coursename = course
                courseid = self.faker.uuid4()
                schoolname = school['SchoolName']
                schoolid = school['SchoolID'] 
                schooltype = school['SchoolType']
                coursesubject = subjects[course]
                coursegradelevel = grades[course]
                self.courses.loc[len(self.courses.index)] = [coursename,courseid,schoolname,schoolid,schooltype,coursesubject,coursegradelevel]

    def _gen_sections(self):
        for index, course in self.courses.iterrows():
            # if generating sections for an elementary school - instead generate a few more sections per grade
            if course['SchoolType'] == 'Elementary School':
                numsections = random.randint(2,4)
                while numsections > 0:
                    sectionnum = random.randint(1,20)
                    coursename = course['CourseName'] 
                    sectionname = f'{coursename} {sectionnum}'
                    sectionid = self.faker.uuid4()
                    courseid = course['CourseID']
                    schoolname = course['SchoolName']
                    schoolid = course['SchoolID']
                    schooltype = course['SchoolType']
                    sectionsubject = course['CourseSubject']
                    sectiongradelevel = course['CourseGradeLevel']
                    self.sections.loc[len(self.sections.index)] = [sectionname,sectionid,coursename,courseid,schoolname,schoolid,schooltype,sectionsubject,sectiongradelevel]
                    numsections = numsections - 1
            else:
                numsections = random.randint(1,2)
                while numsections > 0:
                    sectionnum = random.randint(100,300)
                    coursename = course['CourseName'] 
                    sectionname = f'{coursename} {sectionnum}'
                    sectionid = self.faker.uuid4()
                    courseid = course['CourseID']
                    schoolname = course['SchoolName']
                    schoolid = course['SchoolID']
                    schooltype = course['SchoolType']
                    sectionsubject = course['CourseSubject']
                    sectiongradelevel = course['CourseGradeLevel']
                    self.sections.loc[len(self.sections.index)] = [sectionname,sectionid,coursename,courseid,schoolname,schoolid,schooltype,sectionsubject,sectiongradelevel]
                    numsections = numsections - 1

    def _gen_enrollment(self,numenroll=6):
        for index, student in self.students.iterrows():
            studentcourses = self.courses[self.courses['SchoolID'] == student['SchoolID']]
            studentgrade = student['Grade']
            # K-12 enrollment assignment
            if self.edlevel == 'k12':
                # only assign elementary students to a single course and section
                if student['SchoolType'] == 'Elementary School':
                    studentgradecourses = studentcourses[studentcourses['CourseGradeLevel'] == studentgrade]
                    coursesample = studentgradecourses.sample(n=1)
                    for index, course in coursesample.iterrows():
                        coursesections = self.sections[self.sections['CourseID'] == course['CourseID']]
                        coursesection = coursesections.sample(n=1)
                        coursesection.reset_index(inplace=True)
                        studentfirstname = student['FirstName']
                        studentlastname = student['LastName']
                        studentname = f'{studentfirstname} {studentlastname}'
                        studentid = student['StudentID']
                        sectionname = coursesection.loc[0,'SectionName']
                        sectionid = coursesection.loc[0,'SectionID']
                        coursename = course['CourseName']
                        courseid = course['CourseID']
                        coursegradelevel = course['CourseGradeLevel']
                        schoolname = course['SchoolName'] 
                        schoolid = course['SchoolID']
                        self.enrollment.loc[len(self.enrollment.index)] = [studentname,studentid,sectionname,sectionid,coursename,courseid,coursegradelevel,schoolname,schoolid]
                # assign high school or middle school students to 6 different course/sections
                else:
                    studentgradecourses = studentcourses[(studentcourses['CourseGradeLevel'] == studentgrade) | (studentcourses['CourseGradeLevel'] == '0')]
                    coursesample = studentgradecourses.sample(n=numenroll)
                    for index, course in coursesample.iterrows():
                        coursesections = self.sections[self.sections['CourseID'] == course['CourseID']]
                        coursesection = coursesections.sample(n=1)
                        coursesection.reset_index(inplace=True)
                        studentfirstname = student['FirstName']
                        studentlastname = student['LastName']
                        studentname = f'{studentfirstname} {studentlastname}'
                        studentid = student['StudentID']
                        sectionname = coursesection.loc[0,'SectionName']
                        sectionid = coursesection.loc[0,'SectionID']
                        coursename = course['CourseName']
                        courseid = course['CourseID']
                        coursegradelevel = course['CourseGradeLevel']
                        schoolname = course['SchoolName'] 
                        schoolid = course['SchoolID']
                        self.enrollment.loc[len(self.enrollment.index)] = [studentname,studentid,sectionname,sectionid,coursename,courseid,coursegradelevel,schoolname,schoolid]
            # Higher Ed. enrollment assignment
            else:
                coursesample = studentcourses.sample(n=numenroll)
                for index, course in coursesample.iterrows():
                    coursesections = self.sections[self.sections['CourseID'] == course['CourseID']]
                    coursesection = coursesections.sample(n=1)
                    coursesection.reset_index(inplace=True)
                    studentfirstname = student['FirstName']
                    studentlastname = student['LastName']
                    studentname = f'{studentfirstname} {studentlastname}'
                    studentid = student['StudentID']
                    sectionname = coursesection.loc[0,'SectionName']
                    sectionid = coursesection.loc[0,'SectionID']
                    coursename = course['CourseName']
                    courseid = course['CourseID']
                    coursegradelevel = course['CourseGradeLevel']
                    schoolname = course['SchoolName'] 
                    schoolid = course['SchoolID']
                    self.enrollment.loc[len(self.enrollment.index)] = [studentname,studentid,sectionname,sectionid,coursename,courseid,coursegradelevel,schoolname,schoolid]