In [None]:
###############################################################################
# This notebook provides a utility that can produce clean Pacific SIS bulk    #
# loading templates with data from clean official Pacific EMIS workbooks      #
# Useful in getting bulk data loaded into the Pacific SIS from a clean source #
###############################################################################

# Core stuff
import os
from pathlib import Path
import json
import datetime as dt

# Data stuff
import pandas as pd # Data analysis
import xlrd # excel 
import pyodbc # SQL DB
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
import sqlalchemy as sa

# Pretty printing stuff
from IPython.display import display, HTML
import pprint
pp = pprint.PrettyPrinter(indent=4)

# Initial setup
cwd = os.getcwd()

# Configuration
with open('config.json', 'r') as file:
     config = json.load(file)
        
# EMIS config
emis_lookup = config['emis_lookup']
emis_workbook_name = config['emis_workbook_name']
        
# SIS config
sis_database = config['sis_database']
sis_tenant_id = config['sis_tenant_id']
sis_user_guid = config['sis_user_guid']

# Config
data_directory = config['data_directory']
country = config['country']
datetime = dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

# Load the workbook
filename = data_directory + '/' + country + '/workbooks/' + emis_workbook_name
file = open(filename, "r")

In [None]:
print("Opening {}".format(filename))
df_workbook_students = pd.read_excel(filename, sheet_name='Students', header=2, engine='openpyxl')
#df_workbook_students = pd.read_csv(filename)
display(df_workbook_students)

In [None]:
school_year = df_workbook_students['SchoolYear'].unique()[0]
print("School year {}".format(school_year))
school_names = df_workbook_students['School Name'].unique()
school_names = [x for x in school_names if str(x) != 'nan']
print("School names {}".format(school_names))
school_ids = df_workbook_students['School ID'].unique()
print("School IDs {}".format(school_ids))

In [None]:
df_workbook_students.columns

In [None]:
df_workbook_students = df_workbook_students.rename(columns={'National Student ID': 'Student ID', 'First Name': 'First/Given Name',
                                    'Last Name': 'Last/Family Name', 'Date of Birth': 'Date of Birth (YYYY.MM.DD)',
                                    'Citizenship': 'Nationality'})
df_workbook_students['Alternate ID'] = df_workbook_students['Student ID']
df_workbook_students['First Language'] = 'English'
df_workbook_students['Enrollment Date (YYYY.MM.DD)'] = '2023.09.01'

df_workbook_students['Date of Birth (YYYY.MM.DD)'] = df_workbook_students['Date of Birth (YYYY.MM.DD)'].dt.strftime('%Y.%m.%d')

enroll_date = dt.datetime.now().strftime('%Y.%m.%d')
df_workbook_students['Enrollment Date (YYYY.MM.DD)'] = enroll_date

df_workbook_students['Nationality'] = 'Federated States of Micronesia'

df_workbook_students_final = df_workbook_students[['School Name', 'Alternate ID','Student ID','First/Given Name','Middle Name','Last/Family Name','Gender','Date of Birth (YYYY.MM.DD)',
'Nationality','First Language','Grade Level','Enrollment Date (YYYY.MM.DD)']]

df_workbook_students_final

In [None]:
df_workbook_students_final[df_workbook_students_final['School Name'] == 'Chuuk High School']

In [None]:
export_dir = data_directory + '/' + country + '/workbooks/sis-bulk-load-templates/' 
Path(export_dir).mkdir(parents=True, exist_ok=True)

for school in school_names:
    print("Saving students from school {} for the year {} into the SIS bulk template format".format(school, school_year))
    
    # Handle any school name with characters that won't work as filenames (and that shouldn't)
    school_str = school.replace("/","-")
    school_str = school_str.replace(".","-")
    school_str = school_str.replace("&","-")
    school_str = school_str.replace(" ","-")
    school_str = school_str.replace("---","-")
    school_str = school_str.replace("--","-")
    
    # Write to Excel the data for a final observation before a direct SQL insertion
    with pd.ExcelWriter(export_dir+'students-'+school_str+'-'+school_year+'-roster.xlsx') as writer:  
        df_workbook_students_final2 = df_workbook_students_final[df_workbook_students_final['School Name'] == school].copy()
        del df_workbook_students_final2['School Name']
        df_workbook_students_final2.to_excel(writer, index=False)