# A Python kernel backed by Pyodide

![](https://raw.githubusercontent.com/pyodide/pyodide/master/docs/_static/img/pyodide-logo.png)

In [1]:
import pyodide_kernel
pyodide_kernel.__version__

ModuleNotFoundError: No module named 'pyodide_kernel'

## Consolidating 2021-2022 TAPR databases

### Databases:

- Staff
- Student
- District Reference
- Accountability Summary

In [6]:
import pandas as pd
import numpy as np
from string import ascii_uppercase as letters
from IPython.display import display

SCHOOL_YEAR = "2021-2022"

# Setting up the paths of the TAPR data
district_reference_path = f"data/TAPR/{SCHOOL_YEAR}/DREF.csv"
district_reference_reference = f"data/TAPR/{SCHOOL_YEAR}/DREF_reference.xlsx"

df = pd.read_csv(district_reference_path)

def gen_header_column_legend(path):
    # Load spreadsheet
    xl = pd.ExcelFile(path)

    # Load a sheet into a DataFrame
    df = xl.parse(xl.sheet_names[0])

    # Make sure the required columns exist
    if 'NAME' not in df.columns or 'LABEL' not in df.columns:
        raise ValueError("The provided Excel file doesn't have the required 'NAME' or 'LABEL' columns.")
        
    # Convert DataFrame to dictionary
    result_dict = df.set_index('NAME')['LABEL'].to_dict()
    
    # Remove non-breaking spaces from keys and values
    result_dict = {k.rstrip('\xa0'): v.rstrip('\xa0') for k, v in result_dict.items()}

    return result_dict

def pad_district_number(dist_num):
    dist_num = str(dist_num)
    
    dist_num = dist_num.replace("'", "").replace("`", "")
    
    if len(dist_num) != 6:
        dist_num = ("0" * (6 - len(dist_num))) + dist_num
        
    return "'" + dist_num

district_reference_dict = gen_header_column_legend(district_reference_reference)

df = df.rename(columns=district_reference_dict)

# print(district_reference_dict)

df["District Number"] = df["District Number"].apply(pad_district_number)

staff_path = f"data/TAPR/{SCHOOL_YEAR}/DSTAF.csv"
staff_reference = f"data/TAPR/{SCHOOL_YEAR}/DSTAF_reference.xlsx"

student_path = f"data/TAPR/{SCHOOL_YEAR}/DSTUD.csv"
student_reference = f"data/TAPR/{SCHOOL_YEAR}/DSTUD_reference.xlsx"

accountability_summary_path = f"data/TAPR/{SCHOOL_YEAR}/DISTRATE.csv"
accountability_summary_reference = f"data/TAPR/{SCHOOL_YEAR}/DISTRATE_reference.xlsx"

tapr_reports = [(staff_path, staff_reference), (student_path, student_reference), (accountability_summary_path, accountability_summary_reference)]

# print(list(df.columns))

for report in tapr_reports:
    report_path = report[0]
    reference_path = report[1]
    
    df_local = pd.read_csv(report_path)
    
    reference_dict = gen_header_column_legend(reference_path)
    
    df_local = df_local.rename(columns=reference_dict)
    
    df_local["District Number"] = df_local["District Number"].apply(pad_district_number)

    # Get the names of the columns to keep in df_local. This is the set of columns in df_local
    # that are not in df.
    cols_to_keep = df_local.columns.difference(df.columns)

    # Only keep those columns in df_local, along with the column we're merging on.
    df_local = df_local[np.append('District Number', cols_to_keep)]
        
    # print(list(df_local.columns))
        
    df = pd.merge(df, df_local, how="left", on="District Number")

df


{'DISTNAME': 'District Name', 'DISTRICT': 'District Number', 'CNTYNAME': 'County Name', 'COUNTY': 'County Number', 'REGION': 'Region Number', 'D_RATING': '2022 District Overall Grade', 'ASVAB_STATUS': 'District 2022 Armed Services Vocational Aptitude Battery (ASVAB) Status', 'DFLCHART': 'District 2022 Flag - Charter Operator (Y/N)', 'DFLALTED': 'District 2022 Flag - Rated under AEA Procedures (Y/N)', 'OUTCOME': 'District 2022 Special Education Determination Status', 'DAD_POST': 'District 2022: Postsecondary Readiness Distinction'}
['District Number', 'County Name', 'County Number', 'District 2022: Postsecondary Readiness Distinction', 'District 2022 Flag - Rated under AEA Procedures (Y/N)', 'District 2022 Flag - Charter Operator (Y/N)', 'District Name', '2022 District Overall Grade', 'District 2022 Special Education Determination Status', 'Region Number', 'asvab_status']
['District Number', 'District 2021 Finance: Instructional Expenditures Ratio', 'District 2022 Staff: All Staff Minor

Unnamed: 0,District Number,County Name,County Number,District 2022: Postsecondary Readiness Distinction,District 2022 Flag - Rated under AEA Procedures (Y/N),District 2022 Flag - Charter Operator (Y/N),District Name,2022 District Overall Grade,District 2022 Special Education Determination Status,Region Number,...,District 2022 Student: % Econ Disadv,District 2022 Student: % Enrolled in Early College HS,District 2022 Student: % Enrolled in Grades 3-12,District 2022 Student: % Enrolled in a T-Stem academy,District 2022 Student: % LEP Students,District 2022 Student: % Pathways in Technology ECHS,District 2022 Student: % Special Education,District 2022 Student: All Students Count,District 2022 Total AEA Bonus Points Received,Region Name
0,'001902,ANDERSON,'001,1,N,N,CAYUGA ISD,A,Meets Requirements,'07,...,40.8,0.0,78.2,0,1.0,0,14.6,574,.,REGION 07: KILGORE
1,'001903,ANDERSON,'001,0,N,N,ELKHART ISD,A,Meets Requirements,'07,...,45.4,0.0,72.2,0,2.8,0,12.1,1150,.,REGION 07: KILGORE
2,'001904,ANDERSON,'001,0,N,N,FRANKSTON ISD,A,Meets Requirements,'07,...,54.2,0.0,74.4,0,4.1,0,13.1,808,.,REGION 07: KILGORE
3,'001906,ANDERSON,'001,1,N,N,NECHES ISD,A,Meets Requirements,'07,...,54.1,0.0,72.5,0,2.0,0,10.5,342,.,REGION 07: KILGORE
4,'001907,ANDERSON,'001,0,N,N,PALESTINE ISD,B,Needs Assistance,'07,...,81.6,0.0,72,0,17.7,0,13.5,3360,.,REGION 07: KILGORE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1202,'252902,YOUNG,'252,,N,N,NEWCASTLE ISD,A,Meets Requirements,'09,...,62.7,0.0,.,.,1.4,.,17.0,212,.,REGION 09: WICHITA FALLS
1203,'252903,YOUNG,'252,0,N,N,OLNEY ISD,A,Meets Requirements,'09,...,64.2,0.0,69.8,0,7.8,0,16.2,696,.,REGION 09: WICHITA FALLS
1204,'253901,ZAPATA,'253,0,N,N,ZAPATA COUNTY ISD,B,Needs Assistance,'01,...,88.1,0.0,75.8,0,31.6,0,13.2,3284,.,REGION 01: EDINBURG
1205,'254901,ZAVALA,'254,0,N,N,CRYSTAL CITY ISD,B,Needs Intervention,'20,...,84.7,0.0,72.8,0,2.1,0,13.0,1779,.,REGION 20: SAN ANTONIO


In [None]:
df.to_excel("2021-2022 TAPR_Merged Selected Reports.xlsx")