In [1]:
import sys
sys.path.append('../ppda')

import pandas as pd
from datetime import datetime
import re

from db_service import DBService
import config
print("Connecting to ",config.BASE_URL)
conn = DBService()
conn.login('administrator', 'admin')

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Connecting to  http://localhost:8000


In [2]:
from sis_school_year import SISSchoolYear

from sis_person import SISPerson
from sis_student import SISStudent
from sis_school_class import SISSchoolClass
from sis_school_grade_level import SISSchoolGradeLevel
from sis_family import SISFamily
from sis_family_guardian import SISFamilyGuardian
from sis_family_child import SISFamilyChild

from sis_staff import SISStaff
from sis_timetable import SISTimetable
from sis_timetable_column import SISTimetableColumn
from sis_timetable_day import SISTimetableDay
from sis_timetable_column_row import SISTimetableColumnRow

from sis_course import SISCourse
from sis_course_class import SISCourseClass
from sis_attendance_log_course_class import SISAttendanceLogCourseClass
from sis_attendance_log_school_class import SISAttendanceLogSchoolClass
from sis_class_feed import SISClassFeed
from pp_user import PPUser

def delete_all(cls):
    print('Deleting all', cls.doctype)
    while True:
        data = cls.find(limit_page_length=500)
        if len(data) == 0:
            break
        for id in data['name'].values:
            if cls.doctype == 'SIS Person':
                try:
                    cls.delete_by_id(id)
                except:
                    continue
            else:
                cls.delete_by_id(id)
        print('Deleted', len(data))
    print('-'*20)

def reset_db():
    delete_all(SISAttendanceLogSchoolClass)
    delete_all(SISAttendanceLogCourseClass)
    delete_all(SISClassFeed)
    delete_all(SISCourseClass)
    delete_all(SISCourse)
    delete_all(SISSchoolClass)
    delete_all(SISFamily)
    delete_all(SISStudent)
    delete_all(SISStaff)
    delete_all(SISPerson)

# reset_db()

In [3]:
# Get current school year

CUR_SCHOOL_YEAR_ID = SISSchoolYear.find_one(filters={"status": "Current"}).name

### Academic Year Event

In [None]:
from sis_academic_year_event import SISAcademicYearEvent

# Get all academic year events
aca_event_df = pd.read_excel('../input_data/SIS Academic Year Events.xlsx', engine="openpyxl")
aca_event_df.head()

for index, row in aca_event_df.iterrows():
    event = SISAcademicYearEvent({
        "start_date": datetime.strftime(row['Start Date'], "%Y-%m-%d"),
        "end_date": datetime.strftime(row['End Date'], "%Y-%m-%d"),
        "school_year": CUR_SCHOOL_YEAR_ID,
        "description": row['Description'] if not pd.isna(row['Description']) else "",
        "title": row['Title'],
    })
    event.save_if_not_exists(filters={"title": row['Title'], "school_year": CUR_SCHOOL_YEAR_ID})

### Student, Guardian, School Class, Family

In [None]:
# Load data student, parent, school class
import pandas as pd

df = pd.read_excel('../input_data/FINALIZED_ADMISSION_2324_DATA.xlsx', engine="openpyxl")
df['std_full_name'] = df['std_full_name'].str.strip()
df['mother_full_name'] = df['mother_full_name'].str.strip()
df['father_full_name'] = df['father_full_name'].str.strip()
df['std_2324_grade'] = df['std_2324_grade'].astype(str)

In [None]:
df.columns

In [None]:
df[df['std_full_name'].apply(lambda x: len(x.split(' ')) if type(x) == str else 0) == 1]

In [None]:
df.info()

In [None]:
df['std_2324_grade'].apply(lambda x: x.split('.')[0].split(' ')[1]).unique()

In [None]:
# Validate data
# wssg_std_code must be unique
assert df['wssg_std_code'].is_unique, "wssg_std_code is not unique"

# print duplicate std_full_name
# duplicate_std_full_name = df[df.duplicated('std_full_name')]
# if not duplicate_std_full_name.empty:
#     print("Duplicate std_full_name")
#     print(duplicate_std_full_name)

In [None]:
# Delete all family
families = SISFamily.find(limit_page_length=3000)
for id in families['name'].values:
    SISFamily.delete_by_id(id)

In [None]:


def split_name(full_name):
    try:
        last_name = full_name.split(' ', 1)[0].strip()
        first_name = full_name.split(' ', 1)[1].strip()
        return first_name, last_name
    except Exception as e:
        print(f"ERROR split_name: {full_name}")
        return None, None



grade_levels = []
for grade in range(1, 13):
    grade_level = SISSchoolGradeLevel({
        "title": f"Grade {grade}",
        "short_title": f"G{grade}",
        "sequence_number": grade
    })
    grade_level.save_if_not_exists(filters={"title": f"Grade {grade}"})
    grade_levels.append(grade_level)

for index, row in df.iterrows():
    print("Executing row ", index)
    # Create SIS Person for student, if not exists
    first_name, last_name = split_name(row['std_full_name'])
    std_person = SISPerson({
        "first_name": first_name,
        "last_name": last_name,
        "email": None,
        "phone_number": None,
        "gender": row["std_gender"],
        "date_of_birth": datetime.strftime(row["std_dob"], "%Y-%m-%d"),
        "primary_role": "Student",
    })
    std_person_exists = std_person.save_if_not_exists(filters={
        "first_name": first_name, 
        "last_name": last_name, 
        "date_of_birth": datetime.strftime(row["std_dob"], "%Y-%m-%d")
    })

    # Create SIS Student with person id
    sis_student = SISStudent({
        "person": std_person.name,
        "wellspring_student_code": row["wssg_std_code"],
    })
    sis_student.save_if_not_exists(filters={
        "wellspring_student_code": row["wssg_std_code"]
    })

    # Create SIS School Class
    try:
        grade_level_index = row['std_2324_grade'].split('.')[0].split(' ')[1]
        grade_level_id = grade_levels[int(grade_level_index) - 1].name
        sis_school_class = SISSchoolClass({
            "school_year": CUR_SCHOOL_YEAR_ID,
            "school_grade_level": grade_level_id,
            "title": row['std_2324_grade'],
            "short_title": row['std_2324_grade'].split(' ')[1],
        })
        sis_school_class_exists = sis_school_class.save_if_not_exists(filters={
            "title": row['std_2324_grade'],
            "school_year": CUR_SCHOOL_YEAR_ID,
        })
        if sis_school_class_exists:
            sis_school_class = SISSchoolClass.find_by_id(sis_school_class_exists)
    except Exception as e:
        print("ERROR importing School Class:")
        print({
            "school_year": CUR_SCHOOL_YEAR_ID,
            "school_grade_level": grade_level_id,
            "title": row['std_2324_grade'],
            "short_title": row['std_2324_grade'].split(' ')[1],
        })
        break
   

    # Enroll student to school class
    if not hasattr(sis_school_class, "participants"):
        setattr(sis_school_class, "participants", [])
        print("------------Create participants")
    std_already_enrolled = False
    for participant in sis_school_class.participants:
        if participant["person"] == std_person.name:
            std_already_enrolled = True
            break
    if not std_already_enrolled:
        sis_school_class.participants.append({
            "person": std_person.name,
            "role": "Student",
        })
        sis_school_class.save()
        print(f"----------Enroll student to school class {sis_school_class.title} - {std_person.last_name} {std_person.first_name}")

    # Create SIS Person for mother, if not exists
    has_mother = pd.notna(row['mother_full_name'])
    if has_mother:
        mother_first_name, mother_last_name = split_name(row['mother_full_name'])
        mother_person = SISPerson({
            "first_name": mother_first_name,
            "last_name": mother_last_name,
            "email": row["mother_email"] if pd.notna(row["mother_email"]) else None,
            "phone_number": row["mother_mobile_nr_1"] if pd.notna(row["mother_mobile_nr_1"]) else None,
            "gender": "Female",
            "date_of_birth": None,
            "primary_role": "Guardian",
        })

        mother_exists = mother_person.save_if_not_exists(filters={
            "first_name": mother_first_name, 
            "last_name": mother_last_name, 
            "phone_number": row["mother_mobile_nr_1"] if pd.notna(row["mother_mobile_nr_1"]) else None,
        })

    # Create SIS Person for father, if not exists
    has_father = pd.notna(row['father_full_name'])
    if has_father:    
        father_first_name, father_last_name = split_name(row['father_full_name'])
        father_person = SISPerson({
            "first_name": father_first_name,
            "last_name": father_last_name,
            "email": row["father_email"] if pd.notna(row["father_email"]) else None,
            "phone_number": row["father_mobile_nr_1"] if pd.notna(row["father_mobile_nr_1"]) else None,
            "gender": "Male",
            "date_of_birth": None,
            "primary_role": "Guardian",
        })
        father_exists = father_person.save_if_not_exists(filters={
            "first_name": father_first_name, 
            "last_name": father_last_name, 
            "phone_number": row["father_mobile_nr_1"] if pd.notna(row["father_mobile_nr_1"]) else None,
        })

    

    families = SISFamily.find(filters={"home_address": row["address"]}, output="Object")
    if len(families) > 1:
        print("Address Duplicate")

    family_found = False
    for family in families:
        sis_family = SISFamily.find_by_id(family.name)
        if (has_mother and (sis_family.guardian_exists(mother_exists)) or (has_father and (sis_family.guardian_exists(father_exists)))):
            if not sis_family.child_exists(std_person.name):
                sis_family.children.append({"person": std_person.name})
                sis_family.save()
                print("add student to family", sis_family.home_address)
            family_found = True
            break
        
    if not family_found:
        guardians = []
        if has_mother:
            guardians.append({"person": mother_person.name, "relationship_with_student": "Mother"})
        if has_father:
            guardians.append({"person": father_person.name, "relationship_with_student": "Father"})
        sis_family = SISFamily({
            "home_address": row["address"],
            "children": [{"person": std_person.name}],
            "guardians": guardians
        })
        sis_family.save()


    # if len(families) > 1 and not family_found:
    #     for family in families:
    #         sis_family = SISFamily.find_by_id(family.name)
    #         print({
    #             "name": sis_family.name,
    #             "home_address": sis_family.home_address,
    #             "children": [SISPerson.find_by_id(child['person']).full_name for child in sis_family.children],
    #             "guardians": [SISPerson.find_by_id(guardian['person']).full_name for guardian in sis_family.guardians]
    #         })
    #         print("-------------------------------------------------")
    #     print(mother_person.to_json(), std_person.to_json())
    #     break


    # if family_id:
    #     sis_family = SISFamily.find_by_id(family_id)
    #     if (has_mother and ( not sis_family.guardian_exists(mother_person.name)) or (has_father and (not sis_family.guardian_exists(father_person.name)))):
    #         # Case 1: different family with the same address
    #         guardians = []
    #         if has_mother:
    #             guardians.append({"person": mother_person.name, "relationship_with_student": "Mother"})
    #         if has_father:
    #             guardians.append({"person": father_person.name, "relationship_with_student": "Father"})
            
    #         # create a hash for family address
    #         family_hash = abs(hash(row["address"])) % (10 ** 8)

    #         sis_family = SISFamily({
    #             "home_address": row["address"] + f" ({family_hash})",
    #             "children": [{"person": std_person.name}],
    #             "guardians": guardians
    #         })
    #         sis_family.save()
    #         print("--------------Family with same address", row["address"])
    #         break
    #     else:
    #         # Case 2: same family -> add student to children list, add mother and father to guardians list if not exists
    #         if has_mother and (not sis_family.guardian_exists(mother_person.name)):
    #             sis_family.guardians.append({"person": mother_person.name, "relationship_with_student": "Mother"})
    #         if has_father and (not sis_family.guardian_exists(father_person.name)):
    #             sis_family.guardians.append({"person": father_person.name, "relationship_with_student": "Father"})
    #         if not sis_family.child_exists(std_person.name):
    #             sis_family.children.append({"person": std_person.name})
    #         sis_family.save()
        
    # else:
        # guardians = []
        # if has_mother:
        #     guardians.append({"person": mother_person.name, "relationship_with_student": "Mother"})
        # if has_father:
        #     guardians.append({"person": father_person.name, "relationship_with_student": "Father"})
        # sis_family = SISFamily({
        #     "home_address": row["address"],
        #     "children": [{"person": std_person.name}],
        #     "guardians": guardians
        # })
        # sis_family.save()

In [None]:
abs(hash('123')) % (10 ** 8)

### Course, Course Class, Course Class Person, Timetable

```
Subject Columns:  ['id', 'title', 'short_title']
School Class Columns:  ['id', 'title', 'short_title']
Course Class Columns:  ['grade_level', 'school_class', 'subject', 'course', 'course_class', 'teacher', 'email', 'wssg_id']
Timetable Columns:  ['Thứ', 'Thời gian', 'Tiết', 'Lớp 1.1\n (Ms.Dương)', 'Lớp 1.2\n (Ms. Trâm)', 'Lớp 1.3\n (Ms. Nguyệt)', 'Lớp 1.4\n (Ms. Duyên)', 'Lớp 1.5\n (Ms. Tuyền)', 'Lớp 1.6\n (Ms. Trang)', 'Lớp 1.7\n (Ms. Hà)']
```

TODO:
- [x] Get current school year id
- From timetable_id extract course_class and timetable_day_row_class:
    - [ ] Validation: subject must be in subject_df if the third column is a number
    - [ ] If the third column is not a number, it's a special course class (break, lunch, etc.)
    - [ ] Course Class Title = subject + grade_level + school_class
    - [ ] Get timetable_day_row_class (periods) from timetable_day_row_class
- For each row in course_class_df:
    - [ ] Create SIS Course, if not exists
    - [ ] Create SIS Course Class
    - [ ] Create SIS Course Class Person for teachers
    - [ ] Get student list from SIS School Class and add to SIS Course Class
    - [ ] Find timetable_day_row_class for each course_class
- [ ] Get student from all grade levels and add to special course classes

In [4]:
import unicodedata

# Function to normalize data
def normalize_unicode(data, form='NFC'):
    return unicodedata.normalize(form, data) if pd.notna(data) else None

def normalize_df(df):
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].str.strip()
            df[col] = df[col].apply(normalize_unicode)
    return df

def extract_school_class_from_text(text):
    """
    Using regex to extract school class from text.. School class is in the format: [Grade].[Class order] e.g. 1.1, 6.10

    Text is a string that could has multiple lines. The function should return the first school class found in the text.
    """
    text = text.replace('\n', ' ')
    match = re.search(r'\b\d+\.\d+\b', text)
    if match:
        return match.group()
    return None

In [64]:
subject_df = pd.read_excel('../input_data/SIS Subject Updated.xlsx', engine="openpyxl")
subject_df = normalize_df(subject_df)

school_class_df = pd.read_excel('../input_data/SIS School Class.xlsx', engine="openpyxl")
school_class_df = normalize_df(school_class_df)

course_class_df = pd.read_excel('../input_data/SIS Course Class Updated.xlsx', engine="openpyxl")
course_class_df = normalize_df(course_class_df)

In [66]:
# Extract data from timetable
# Step 1: Validation: subject must be in subject_df if the third column is a number

school_class_df = SISSchoolClass.find(limit_page_length=1000)
school_class_df['short_title'] = school_class_df['title'].apply(lambda x: x.split(' ')[1])

TIMETABLE_EXCEL_FILENAME = '../input_data/TKB_2324_Tieuhoc.xlsx'
sc_title = "title"


# TIMETABLE_EXCEL_FILENAME = '../input_data/TIMETABLE_2324_Trunghoc.xlsx'
# sc_title = "short_title"

# Go through all sheets in timetable excel file
sheet_names = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=None).keys()

not_found_subject = []
found_subject = []
for sheet_name in sheet_names:
    print("Processing sheet ", sheet_name)
    timetable_df = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=sheet_name, engine="openpyxl")
    timetable_df = normalize_df(timetable_df)
    
    for column in timetable_df.columns[3:]:
        school_class_short_title = extract_school_class_from_text(column)
        school_class_found = school_class_df[school_class_df["short_title"] == school_class_short_title].shape[0]
        if not school_class_found:
            print("ERROR: Invalid school class", column)
    
        for index, row in timetable_df.iterrows():
            # if pd.notna(row.iloc[2]) and (row.iloc[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']):
            subject_title = row[column]
            subject_found = subject_df[subject_df[sc_title] == subject_title].shape[0]
            if subject_found == 0 and subject_title not in not_found_subject:
                print(f"Subject |{subject_title}| not found in subject_df. Class {school_class_short_title}, row {index}")
                not_found_subject.append(subject_title)
            if subject_title not in found_subject:
                found_subject.append(subject_title)
            # TODO: Validate school class


Processing sheet  Sheet1
Subject |Registration| not found in subject_df. Class 1.1, row 0
Subject |Break| not found in subject_df. Class 1.1, row 3
Subject |Lunch| not found in subject_df. Class 1.1, row 7
Subject |CLB| not found in subject_df. Class 1.1, row 12
Subject |Snack| not found in subject_df. Class 1.1, row 24
Processing sheet  Sheet2
Processing sheet  Sheet3
Processing sheet  Sheet4
Processing sheet  Sheet5


#### Primary Timetable

In [50]:
# Create Timetable TieuHoc

# Create timetable if not exists
timetable = SISTimetable({
    "grade_level_list": "1,2,3,4,5",
    "school_year": CUR_SCHOOL_YEAR_ID,
    "short_title": "TKB_TH",
    "status": "Active",
    "title": "Primary Timetable",
})
timetable.save_if_not_exists(filters={"title": timetable.title})

# Extract timetable column
TIMETABLE_EXCEL_FILENAME = '../input_data/TKB_2324_Tieuhoc.xlsx'
timetable_df = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=0, engine="openpyxl")
timetable_df = timetable_df[timetable_df.iloc[:,0]=='Ba']
timetable_column_dict = timetable_df.iloc[:, :3].values

type_dict = {
    "Registration": "Other",
    "Break": "Break",
    "Lunch": "Lunch",
    "Snack": "Snack",
    "Lesson": "Lesson"
}

timetable_column = SISTimetableColumn({
    "title": "TT Primary Weekday",
    "short_title": "TT Primary",
})
timetable_column.save_if_not_exists(filters={
    "title": timetable_column.title
})
timetable_column.timetable_column_row = [
    {
        "title": item[2] if item[2] not in list(range(11)) else f'Period {item[2]}',
        "short_title": item[2] if item[2] not in list(range(11)) else f'P{item[2]}',
        "type": type_dict[item[2]] if item[2] in type_dict.keys() else "Lesson" if item[2] in list(range(11)) else "Other",
        "time_start": item[1].split('-')[0].strip(),
        "time_end": item[1].split('-')[1].strip(),
    }
    for item in timetable_column_dict
]
timetable_column.save()

timetable.timetable_days = [
  { "title": "TKB_TH Monday", "short_title": "TKB_TH Mon", "timetable_column": timetable_column.name, "weekday": "Mon" },
  { "title": "TKB_TH Tuesday", "short_title": "TKB_TH Tue", "timetable_column": timetable_column.name, "weekday": "Tue" },
  { "title": "TKB_TH Wednesday", "short_title": "TKB_TH Wed", "timetable_column": timetable_column.name, "weekday": "Wed" },
  { "title": "TKB_TH Thursday", "short_title": "TKB_TH Thu", "timetable_column": timetable_column.name, "weekday": "Thu" },
  { "title": "TKB_TH Friday", "short_title": "TKB_TH Fri", "timetable_column": timetable_column.name, "weekday": "Fri" },
]
timetable.save()

timetable = SISTimetable.find_by_id(timetable.name)
timetable_column = SISTimetableColumn.find_by_id(timetable_column.name)

In [70]:
def get_timetable_day(timetable_days, text):
    vn_translations = {
        "Hai": "Mon",
        "Ba": "Tue",
        "Tư": "Wed",
        "Năm": "Thu",
        "Sáu": "Fri",
    }
    en_translations = {
        "Monday": "Mon",
        "Tuesday": "Tue",
        "Wednesday": "Wed",
        "Thursday": "Thu",
        "Friday": "Fri",
    }
    if text in vn_translations.keys():
        translated_weekday = vn_translations[text]
    elif text in en_translations.keys():
        translated_weekday = en_translations[text]
    else:
        raise ValueError(f"Invalid weekday: {text}")

    for timetable_day in timetable_days:
        if timetable_day["weekday"] == translated_weekday:
            return timetable_day
        
    return None

In [68]:
delete_all(SISCourseClass)

Deleting all SIS Course Class
Deleted 500
Deleted 145
--------------------


In [69]:
# Create Course Class for Primary

sheet_names = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=None).keys()
special_subjects = ["CLB", "Registration", "Snack", "Break", "Lunch", "TC1", "TC2", "TC3", "TC4"]

not_found_subject = []
found_subject = []
for sheet_name in sheet_names:
    print("Processing sheet ", sheet_name)
    timetable_df = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=sheet_name, engine="openpyxl")
    timetable_df = normalize_df(timetable_df)

    for column in timetable_df.columns[3:]:
        school_class_short_title = extract_school_class_from_text(column)
        school_class = school_class_df[school_class_df["short_title"] == school_class_short_title]
        if school_class.empty:
            print("ERROR: Invalid school class", column)
        school_class = school_class.to_dict(orient="records")[0]['name']
        school_class = SISSchoolClass.find_by_id(school_class)
        print("Processing school class ", school_class_short_title)
    
        for index, row in timetable_df.iterrows():
            # if pd.notna(row.iloc[2]) and (row.iloc[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']):
            subject_title = row[column]
            subject_found = subject_df[subject_df["title"] == subject_title]
            if (subject_found.empty):
                if (subject_title not in not_found_subject):
                    not_found_subject.append(subject_title)
                    print(f"Subject |{subject_title}| not found in subject_df. Class {school_class_short_title}, row {index}")
                
                if subject_title in special_subjects:
                    course = SISCourse({
                        "title": f"{subject_title}",
                        "short_title": f"{subject_title}",
                        "description": "",
                        "program_type": "Other",
                    })
                    course.save_if_not_exists(filters={"title": course.title})

                    course_class = SISCourseClass({
                        "school_year": CUR_SCHOOL_YEAR_ID,
                        "course": course.name,
                        "class_type": "Other",
                        "title": f'{subject_title}',
                        "short_title": f'{subject_title} Primary',
                    })
                    course_class.save_if_not_exists(filters={"short_title": course_class.short_title})
                    course_class = SISCourseClass.find_by_id(course_class.name)

                    for person in school_class.participants:
                        person_exists = False
                        for participant in course_class.participants:
                            if participant['person'] == person['person']:
                                person_exists = True
                                break
                        if not person_exists:
                            course_class.participants.append({
                                "person": person['person'],
                                "role": "Student",
                            })
                    course_class.save()

            else:
                if subject_title not in found_subject:
                    found_subject.append(subject_title)
                
                grade_level = school_class_short_title.split('.')[0]
                course = SISCourse({
                    "title": f"{subject_title} K{grade_level}",
                    "short_title": f"{subject_found['short_title'].values[0]} K{grade_level}",
                    "description": "",
                    "program_type": subject_found['program'].values[0],
                })
                course.save_if_not_exists(filters={"title": course.title})

                course_class = SISCourseClass({
                    "from_school_class": school_class.name,
                    "school_year": CUR_SCHOOL_YEAR_ID,
                    "course": course.name,
                    "class_type": "School Class",
                    "enrollment_max": 25,
                    "enrollment_min": 0,
                    "participants": school_class.participants,
                    "title": f'{subject_title} K{grade_level} {school_class_short_title}',
                    "short_title": f'{subject_found["short_title"].values[0]} K{grade_level} {school_class_short_title}',
                })
                course_class.save_if_not_exists(filters={"title": course_class.title})
                course_class = SISCourseClass.find_by_id(course_class.name)

            # Add timetable_day_row_class
            timetable_day = get_timetable_day(timetable.timetable_days, row.iloc[0])
            if not timetable_day:
                raise ValueError(f"Invalid weekday: {row.iloc[0]}")
            
            start_time = row.iloc[1].split('-')[0].strip()
            end_time = row.iloc[1].split('-')[1].strip()
            for column_row in timetable_column.timetable_column_row:
                if column_row['time_start'].startswith(start_time) and column_row['time_end'].startswith(end_time):
                    timetable_column_row = column_row
                    break

            period_exists = False
            for item in course_class.timetable_day_row_class:
                if item['timetable_day'] == timetable_day['name'] and item['timetable_column_row'] == timetable_column_row['name']:
                    period_exists = True
                    break
            if not period_exists:
                course_class.timetable_day_row_class.append({
                    "timetable_day": timetable_day['name'],
                    "timetable_column_row": timetable_column_row['name'],
                })
                course_class.save()

Processing sheet  Sheet1
Processing school class  1.1
Subject |Registration| not found in subject_df. Class 1.1, row 0
Subject |Break| not found in subject_df. Class 1.1, row 3
Subject |Lunch| not found in subject_df. Class 1.1, row 7
Subject |CLB| not found in subject_df. Class 1.1, row 12
Subject |Snack| not found in subject_df. Class 1.1, row 24
Processing school class  1.2
Processing school class  1.3
Processing school class  1.4
Processing school class  1.5
Processing school class  1.6
Processing school class  1.7
Processing sheet  Sheet2
Processing school class  2.1
Processing school class  2.2
Processing school class  2.3
Processing school class  2.4
Processing school class  2.5
Processing school class  2.6
Processing school class  2.7
Processing sheet  Sheet3
Processing school class  3.1
Processing school class  3.2
Processing school class  3.3
Processing school class  3.4
Processing school class  3.5
Processing school class  3.6
Processing school class  3.7
Processing sheet  S

In [71]:
# Validate data
course_class_list = SISCourseClass.find(limit_page_length=2000, output="Object")

participant_count = {}
for course_class in course_class_list:
    cc = SISCourseClass.find_by_id(course_class.name)
    pc = len(cc.participants)
    participant_count[cc.title] = pc

In [80]:
school_class_df['short_title'].values

array(['1.1', '1.2', '1.3', '1.4', '1.5', '1.6', '1.7', '10.1', '10.2',
       '10.3', '10.4', '11.1', '11.2', '11.3', '12.1', '12.2', '12.3',
       '2.1', '2.2', '2.3', '2.4', '2.5', '2.6', '2.7', '3.1', '3.2',
       '3.3', '3.4', '3.5', '3.6', '3.7', '4.1', '4.2', '4.3', '4.4',
       '4.5', '4.6', '4.7', '5.1', '5.2', '5.3', '5.4', '5.5', '5.6',
       '5.7', '5.8', '5.9', '6.1', '6.10', '6.2', '6.3', '6.4', '6.5',
       '6.6', '6.7', '6.8', '6.9', '7.1', '7.2', '7.3', '7.4', '7.5',
       '7.6', '7.7', '8.1', '8.2', '8.3', '8.4', '8.5', '9.1', '9.2',
       '9.3', '9.4', '9.5'], dtype=object)

In [82]:
# Count total number of primary student
count = 0
for school_class_id in school_class_df['name'].values:
    school_class = SISSchoolClass.find_by_id(school_class_id)
    if school_class.short_title.split('.')[0] in ['1', '2', '3', '4', '5']:
        print(f"School class {school_class.title}: {len(school_class.participants)} students")
        count += len(school_class.participants)
print(f"Total number of primary student: {count}")

School class Class 1.1: 19 students
School class Class 1.2: 20 students
School class Class 1.3: 20 students
School class Class 1.4: 20 students
School class Class 1.5: 20 students
School class Class 1.6: 20 students
School class Class 1.7: 20 students
School class Class 2.1: 23 students
School class Class 2.2: 24 students
School class Class 2.3: 22 students
School class Class 2.4: 23 students
School class Class 2.5: 22 students
School class Class 2.6: 21 students
School class Class 2.7: 22 students
School class Class 3.1: 23 students
School class Class 3.2: 24 students
School class Class 3.3: 23 students
School class Class 3.4: 20 students
School class Class 3.5: 23 students
School class Class 3.6: 23 students
School class Class 3.7: 24 students
School class Class 4.1: 20 students
School class Class 4.2: 19 students
School class Class 4.3: 22 students
School class Class 4.4: 20 students
School class Class 4.5: 22 students
School class Class 4.6: 23 students
School class Class 4.7: 23 s

#### Secondary Timetable

In [85]:
# Create Timetable TieuHoc

# Create timetable if not exists
timetable = SISTimetable({
    "grade_level_list": "6,7,8,9,10,11,12",
    "school_year": CUR_SCHOOL_YEAR_ID,
    "short_title": "TKB_TrH",
    "status": "Active",
    "title": "Secondary Timetable",
})
timetable.save_if_not_exists(filters={"title": timetable.title})

# Extract timetable column
TIMETABLE_EXCEL_FILENAME = '../input_data/TIMETABLE_2324_Trunghoc.xlsx'
timetable_df = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=0, engine="openpyxl")
timetable_df = timetable_df[timetable_df.iloc[:,0]=='Monday']
timetable_column_dict = timetable_df.iloc[:, :3].values

type_dict = {
    "Registration": "Other",
    "Break": "Break",
    "Lunch": "Lunch",
    "Snack": "Snack",
    "Lesson": "Lesson"
}

timetable_column = SISTimetableColumn({
    "title": "TT Secondary Weekday",
    "short_title": "TT Secondary",
})
timetable_column.save_if_not_exists(filters={
    "title": timetable_column.title
})
timetable_column.timetable_column_row = [
    {
        "title": item[2] if item[2] not in list(range(11)) else f'Period {item[2]}',
        "short_title": item[2] if item[2] not in list(range(11)) else f'P{item[2]}',
        "type": type_dict[item[2]] if item[2] in type_dict.keys() else "Lesson" if item[2] in list(range(11)) else "Other",
        "time_start": item[1].split('-')[0].strip(),
        "time_end": item[1].split('-')[1].strip(),
    }
    for item in timetable_column_dict
]
timetable_column.save()

timetable.timetable_days = [
  { "title": "TKB_TrH Monday", "short_title": "TKB_TrH Mon", "timetable_column": timetable_column.name, "weekday": "Mon" },
  { "title": "TKB_TrH Tuesday", "short_title": "TKB_TrH Tue", "timetable_column": timetable_column.name, "weekday": "Tue" },
  { "title": "TKB_TrH Wednesday", "short_title": "TKB_TrH Wed", "timetable_column": timetable_column.name, "weekday": "Wed" },
  { "title": "TKB_TrH Thursday", "short_title": "TKB_TrH Thu", "timetable_column": timetable_column.name, "weekday": "Thu" },
  { "title": "TKB_TrH Friday", "short_title": "TKB_TrH Fri", "timetable_column": timetable_column.name, "weekday": "Fri" },
]
timetable.save()

timetable = SISTimetable.find_by_id(timetable.name)
timetable_column = SISTimetableColumn.find_by_id(timetable_column.name)

In [86]:
# Create Course Class for Primary

sheet_names = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=None).keys()
special_subjects = ["CLB", "Registration", "Snack", "Break", "Lunch", "TC1", "TC2", "TC3", "TC4"]

not_found_subject = []
found_subject = []
for sheet_name in sheet_names:
    print("Processing sheet ", sheet_name)
    timetable_df = pd.read_excel(TIMETABLE_EXCEL_FILENAME, sheet_name=sheet_name, engine="openpyxl")
    timetable_df = normalize_df(timetable_df)

    for column in timetable_df.columns[3:]:
        school_class_short_title = extract_school_class_from_text(column)
        school_class = school_class_df[school_class_df["short_title"] == school_class_short_title]
        if school_class.empty:
            print("ERROR: Invalid school class", column)
        school_class = school_class.to_dict(orient="records")[0]['name']
        school_class = SISSchoolClass.find_by_id(school_class)
        print("Processing school class ", school_class_short_title)
    
        for index, row in timetable_df.iterrows():
            # if pd.notna(row.iloc[2]) and (row.iloc[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']):
            subject_title = row[column]
            subject_found = subject_df[subject_df["short_title"] == subject_title]
            if (subject_found.empty):
                if (subject_title not in not_found_subject):
                    not_found_subject.append(subject_title)
                    print(f"Subject |{subject_title}| not found in subject_df. Class {school_class_short_title}, row {index}")
                
                if subject_title in special_subjects:
                    course = SISCourse({
                        "title": f"{subject_title}",
                        "short_title": f"{subject_title}",
                        "description": "",
                        "program": "Other",
                    })
                    course.save_if_not_exists(filters={"title": course.title})

                    course_class = SISCourseClass({
                        "school_year": CUR_SCHOOL_YEAR_ID,
                        "course": course.name,
                        "class_type": "Other",
                        "title": f'{subject_title}',
                        "short_title": f'{subject_title} Secondary',
                    })
                    course_class.save_if_not_exists(filters={"short_title": course_class.short_title})
                    course_class = SISCourseClass.find_by_id(course_class.name)

                    for person in school_class.participants:
                        person_exists = False
                        for participant in course_class.participants:
                            if participant['person'] == person['person']:
                                person_exists = True
                                break
                        if not person_exists:
                            course_class.participants.append({
                                "person": person['person'],
                                "role": "Student",
                            })
                    course_class.save()

            else:
                if subject_title not in found_subject:
                    found_subject.append(subject_title)
                
                grade_level = school_class_short_title.split('.')[0]
                course = SISCourse({
                    "title": f"{subject_title} K{grade_level}",
                    "short_title": f"{subject_found['short_title'].values[0]} K{grade_level}",
                    "description": "",
                    "program": subject_found['program'].values[0],
                })
                course.save_if_not_exists(filters={"title": course.title})

                course_class = SISCourseClass({
                    "from_school_class": school_class.name,
                    "school_year": CUR_SCHOOL_YEAR_ID,
                    "course": course.name,
                    "class_type": "School Class",
                    "enrollment_max": 25,
                    "enrollment_min": 0,
                    "participants": school_class.participants,
                    "title": f'{subject_title} K{grade_level} {school_class_short_title}',
                    "short_title": f'{subject_found["short_title"].values[0]} K{grade_level} {school_class_short_title}',
                })
                course_class.save_if_not_exists(filters={"title": course_class.title})
                course_class = SISCourseClass.find_by_id(course_class.name)

            # Add timetable_day_row_class
            timetable_day = get_timetable_day(timetable.timetable_days, row.iloc[0])
            if not timetable_day:
                raise ValueError(f"Invalid weekday: {row.iloc[0]}")
            
            start_time = row.iloc[1].split('-')[0].strip()
            end_time = row.iloc[1].split('-')[1].strip()
            for column_row in timetable_column.timetable_column_row:
                if column_row['time_start'].startswith(start_time) and column_row['time_end'].startswith(end_time):
                    timetable_column_row = column_row
                    break

            period_exists = False
            for item in course_class.timetable_day_row_class:
                if item['timetable_day'] == timetable_day['name'] and item['timetable_column_row'] == timetable_column_row['name']:
                    period_exists = True
                    break
            if not period_exists:
                course_class.timetable_day_row_class.append({
                    "timetable_day": timetable_day['name'],
                    "timetable_column_row": timetable_column_row['name'],
                })
                course_class.save()

Processing sheet  Sheet1
Processing school class  6.1
Subject |Registration| not found in subject_df. Class 6.1, row 0
Subject |Break| not found in subject_df. Class 6.1, row 4
Subject |Lunch| not found in subject_df. Class 6.1, row 7
Subject |CLB| not found in subject_df. Class 6.1, row 25
Processing school class  6.2
Processing school class  6.3
Processing school class  6.4
Processing school class  6.5
Processing school class  6.6
Processing school class  6.7
Processing school class  6.8
Processing school class  6.9
Processing school class  6.10
Processing school class  7.1
Processing school class  7.2
Processing school class  7.3
Processing school class  7.4
Processing school class  7.5
Processing school class  7.6
Processing school class  7.7
Processing school class  8.1
Processing school class  8.2
Processing school class  8.3
Processing school class  8.4
Processing school class  8.5
Processing school class  9.1
Processing school class  9.2
Processing school class  9.3
Processing 

### Create User Account for guardians

In [5]:
from user import User

test_acc = User({
    "email": "mia.do@wellspringsaigon.edu.vn",
    "password": "123456",
    "first_name": "Mia",
    "last_name": "Do",
})
test_acc.save()

In [6]:
course_df = SISCourse.find_all()

In [8]:
course_df[[course_df['pro']]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201 entries, 0 to 200
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          201 non-null    object
 1   owner         201 non-null    object
 2   creation      201 non-null    object
 3   modified      201 non-null    object
 4   modified_by   201 non-null    object
 5   docstatus     201 non-null    int64 
 6   idx           201 non-null    int64 
 7   title         201 non-null    object
 8   short_title   201 non-null    object
 9   description   201 non-null    object
 10  program_type  201 non-null    object
dtypes: int64(2), object(9)
memory usage: 17.4+ KB
