# Executive Summary

This notebook will attempt to answer the following research question:

    What's money got to do with it?

## PLANNING

- [X] Planning
    - [X] import libraries/packages
    - [X] configure notebook environment
    - [X] define helper functions
- [X] Acquire data
    - [X] get PEIMS financial data
    - [X] get STAAR performance data
    - [X] get ETHNICITY data
- [X] Prepare the data
    - [X] prepare ETHNICITY data
        - [X] remove the nans (-999)
        - [X] get rid of unwanted columns
    - [X] prepare STAAR data
        - [X] get rid of duplicates
        - [X] get rid of unwanted columns
        - [X] get rid of NaNs
        - [X] create new columns
        - [X] merge three datasets together
    - [X] prepare PEIMS data
        - [X] get rid of unwanted columns
        - [x] get rid of NaNs
         
*First, let's prepare the notebook environment*

In [1]:
# for manipulating dataframes
import pandas as pd
import numpy as np

# to print out all the outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

## ACQUIRE DATA

The datasets can be downloaded here:

- https://tea.texas.gov/finance-and-grants/state-funding/state-funding-reports-and-data/peims-financial-data-downloads  
- https://www.kaggle.com/datasets/9e3ce42f60ded3ba2a6dd890993493f2c4b284c5cfa035d711bd98fa3359924c?resource=download  
- https://rptsvr1.tea.texas.gov/adhocrpt/adste.html

In [2]:
# Get the PEIMS and STAAR datasets

peims_df = pd.read_csv('../data/in/financial/2007-2022-summarized-peims-financial-data.csv')
staar_df1 = pd.read_csv('../data/in/performance/tidy_campstaar1_2012to2019.csv')
staar_df2 = pd.read_csv('../data/in/performance/tidy_campstaar2_2013to2019.csv')
ethnic_df = pd.read_csv('../data/in/ethnicity/Enrollment Report_Statewide_Districts_Grade_Ethnicity_2018-2019.csv', skiprows=4)

  staar_df1 = pd.read_csv('../data/in/performance/tidy_campstaar1_2012to2019.csv')


## PREPARATION

### ETHNICITY Dataset

In [3]:
ethnic_df.head()

Unnamed: 0,YEAR,REGION,COUNTY NAME,DISTRICT,DISTRICT NAME,ETHNICITY,CHARTER STATUS,GRADE,ENROLLMENT
0,2018-2019,7.0,ANDERSON COUNTY,1902.0,CAYUGA ISD,Asian,TRADITIONAL ISD/CSD,Pre-kindergarten,-999.0
1,2018-2019,7.0,ANDERSON COUNTY,1902.0,CAYUGA ISD,Black or African American,TRADITIONAL ISD/CSD,Pre-kindergarten,-999.0
2,2018-2019,7.0,ANDERSON COUNTY,1902.0,CAYUGA ISD,Two or more races,TRADITIONAL ISD/CSD,Pre-kindergarten,-999.0
3,2018-2019,7.0,ANDERSON COUNTY,1902.0,CAYUGA ISD,White,TRADITIONAL ISD/CSD,Pre-kindergarten,-999.0
4,2018-2019,7.0,ANDERSON COUNTY,1902.0,CAYUGA ISD,Black or African American,TRADITIONAL ISD/CSD,Kindergarten,2.0


In [4]:
ethnic_df = ethnic_df[['DISTRICT',
                       'ETHNICITY',
                       'ENROLLMENT',
                      ]]

In [5]:
ethnic_df.head()

Unnamed: 0,DISTRICT,ETHNICITY,ENROLLMENT
0,1902.0,Asian,-999.0
1,1902.0,Black or African American,-999.0
2,1902.0,Two or more races,-999.0
3,1902.0,White,-999.0
4,1902.0,Black or African American,2.0


In [6]:
ethnic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66510 entries, 0 to 66509
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   DISTRICT    66508 non-null  float64
 1   ETHNICITY   66508 non-null  object 
 2   ENROLLMENT  66508 non-null  float64
dtypes: float64(2), object(1)
memory usage: 1.5+ MB


In [7]:
# get rid of invalid enrollment values where no data is provided (-999)
ethnic_df = ethnic_df[ethnic_df['ENROLLMENT'] != -999]

In [8]:
ethnic_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 60005 entries, 4 to 66509
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   DISTRICT    60003 non-null  float64
 1   ETHNICITY   60003 non-null  object 
 2   ENROLLMENT  60003 non-null  float64
dtypes: float64(2), object(1)
memory usage: 1.8+ MB


In [9]:
# create a column with number of white students enrolled
ethnic_df['ENROLLED_WHITE'] = np.where(ethnic_df['ETHNICITY'] == 'White', ethnic_df['ENROLLMENT'], 0)

# create a column with number of non-white students enrolled
ethnic_df['ENROLLED_X'] = np.where(ethnic_df['ETHNICITY'] != 'White', ethnic_df['ENROLLMENT'], 0)

In [10]:
ethnic_df.head(99)

Unnamed: 0,DISTRICT,ETHNICITY,ENROLLMENT,ENROLLED_WHITE,ENROLLED_X
4,1902.0,Black or African American,2.0,0.0,2.0
5,1902.0,Hispanic/Latino,6.0,0.0,6.0
6,1902.0,White,30.0,30.0,0.0
9,1902.0,Black or African American,2.0,0.0,2.0
10,1902.0,Hispanic/Latino,3.0,0.0,3.0
11,1902.0,White,41.0,41.0,0.0
12,1902.0,Black or African American,3.0,0.0,3.0
13,1902.0,Hispanic/Latino,3.0,0.0,3.0
14,1902.0,Two or more races,2.0,0.0,2.0
15,1902.0,White,37.0,37.0,0.0


In [11]:
ethnic_df = ethnic_df.drop(columns=['ENROLLMENT', 'ETHNICITY'])

In [12]:
ethnic_df.head()

Unnamed: 0,DISTRICT,ENROLLED_WHITE,ENROLLED_X
4,1902.0,0.0,2.0
5,1902.0,0.0,6.0
6,1902.0,30.0,0.0
9,1902.0,0.0,2.0
10,1902.0,0.0,3.0


In [13]:
# turn dataset into something-level data
ethnic_df = ethnic_df.groupby(['DISTRICT',
                              ])[["ENROLLED_WHITE",
                                  "ENROLLED_X"]].sum().reset_index()

In [14]:
ethnic_df.head()

Unnamed: 0,DISTRICT,ENROLLED_WHITE,ENROLLED_X
0,1902.0,447.0,76.0
1,1903.0,981.0,263.0
2,1904.0,598.0,162.0
3,1906.0,245.0,90.0
4,1907.0,907.0,2309.0


In [15]:
# create column with percentage of non-white students and drop the numerator and denominator
ethnic_df['XFACTOR'] = ethnic_df["ENROLLED_X"] / (ethnic_df["ENROLLED_WHITE"] + ethnic_df["ENROLLED_X"] )
ethnic_df = ethnic_df.drop(columns=['ENROLLED_WHITE', 'ENROLLED_X'])

In [16]:
ethnic_df[ethnic_df['DISTRICT'] == 1902.0]

Unnamed: 0,DISTRICT,XFACTOR
0,1902.0,0.145315


In [17]:
# for readability
ethnic_df = ethnic_df.rename(columns={'DISTRICT':'District',
                                      'XFACTOR':'Percentage of Non-White Students'
                        })

In [18]:
ethnic_df['District'] = ethnic_df['District'].astype('int64')

In [19]:
ethnic_df.dtypes

District                              int64
Percentage of Non-White Students    float64
dtype: object

In [20]:
# prepend the district numbers with 0's
ethnic_df['District'] = ethnic_df['District'].apply(lambda x: '{0:0>6}'.format(x))

In [21]:
ethnic_df.head()

Unnamed: 0,District,Percentage of Non-White Students
0,1902,0.145315
1,1903,0.211415
2,1904,0.213158
3,1906,0.268657
4,1907,0.717973


In [22]:
# consolidate the two files into one dataframe
staar_df = pd.concat([staar_df1, staar_df2])

In [23]:
# get rid of duplciates
staar_df = staar_df.drop_duplicates(keep='first')

In [24]:
columns_to_drop = ['data_release',
                   'data_category',
                   'data_level',
                   'release_year'
                  ]

# get rid of unnecessary columns
staar_df.drop(columns=columns_to_drop, inplace=True)

In [25]:
# prepend campus number with 0's
staar_df['campus_number'] = staar_df['campus_number'].apply(lambda x: '{0:0>9}'.format(x))

In [26]:
# drop 'new_rate'
staar_df.drop(columns=['new_rate'], inplace=True)

In [27]:
# get the district number
staar_df['district'] = staar_df['campus_number'].str[:6]

In [28]:
# turn dataset into district-level data
staar = staar_df.groupby(['test_year',
                          'district',
                          'grade_level',
                          'subject',
                          'proficiency',
                          'demog'
                         ])[["numerator", "denominator"]].sum().reset_index()

In [29]:
# remove subsets
staar = staar[staar['grade_level'] == 'all']
staar = staar[staar['subject'] == 'all_subjects']
staar = staar[staar['demog'] == 'all_students']
staar = staar.drop(columns=['grade_level',
                            'subject',
                            'demog'
                           ])

In [30]:
staar.head()

Unnamed: 0,test_year,district,proficiency,numerator,denominator
99,2012,1902,approaches,1030,1219
110,2012,1902,masters,144,1219
120,2012,1902,meets,471,1219
372,2012,1903,approaches,1796,2279
383,2012,1903,masters,203,2279


In [31]:
# create new column
staar['total_student'] = staar['denominator']

In [32]:
staar['proficiency'].value_counts()

proficiency
approaches    9641
masters       7126
meets         4780
Name: count, dtype: int64

In [33]:
# create column with number of not passing students
staar['not_passing'] = np.where(staar['proficiency'] == 'approaches', staar['numerator'], 0)

# create column with number of passing students
staar['passing'] = np.where(staar['proficiency'] != 'approaches', staar['numerator'], 0)

In [34]:
# isolate the 2019 test year and drop the year column
staar = staar[staar['test_year'] == 2019]

In [35]:
staar = staar.drop(columns=['test_year',
                            'proficiency',
                            'numerator',
                            'denominator',
                            'not_passing'
                           ])

In [36]:
staar[staar['district'] == '001902']

Unnamed: 0,district,total_student,passing
2682938,1902,932,0
2682951,1902,932,269
2682964,1902,932,573


In [37]:
# for readability
staar = staar.rename(columns={'district':'District',
                        'total_student':'Total Number of Students',
                        'passing':'Total Number of Passing Students'
                        })

In [38]:
# Turn dataset into something-level data
staar = staar.groupby(['District',
                       'Total Number of Students',
                      ])[["Total Number of Passing Students"]].sum().reset_index()

In [39]:
staar.head(10)

Unnamed: 0,District,Total Number of Students,Total Number of Passing Students
0,1902,932,842
1,1903,1991,1651
2,1904,1365,1064
3,1906,569,420
4,1907,5497,3274
5,1908,2467,1240
6,1909,635,489
7,2901,7268,4476
8,3801,1774,1493
9,3902,4693,1461


### PEIMS Dataset

In [40]:
peims_df.head()

Unnamed: 0,DISTRICT NUMBER,DISTRICT NAME,YEAR,GEN FUNDS-LOCAL TAX REVENUE FROM M&O,ALL FUNDS-LOCAL TAX REVENUE FROM M&O,GEN FUNDS-STATE REVENUE,ALL FUNDS-STATE REVENUE,GEN FUNDS-FEDERAL REVENUE,ALL FUNDS-FEDERAL REVENUE,GEN FUNDS-OTHER LOCAL & INTERMEDIATE REVENUE,ALL FUNDS-OTHER LOCAL & INTERMEDIATE REVENUE,GEN FUNDS-TOTAL OPERATING REVENUE,ALL FUNDS-TOTAL OPERATING REVENUE,GEN FUNDS-LOCAL PROPERTY TAXES FROM I&S,ALL FUNDS-LOCAL PROPERTY TAXES FROM I&S,GEN FUNDS-STATE DEBT FUNDS,ALL FUNDS-STATE DEBT FUNDS,GEN FUNDS-OTHER RECEIPTS,ALL FUNDS-OTHER RECEIPTS,GEN FUNDS-OTHER REVENUE,ALL FUNDS-OTHER REVENUE,GEN FUNDS-TOTAL OPERATING REVENUE AND OTHER REVENUE,ALL FUNDS-TOTAL OPERATING REVENUE AND OTHER REVENUE,GEN FUNDS-EQUITY TRANSFERS,ALL FUNDS-EQUITY TRANSFERS,GEN FUNDS-TOT DEBT SERV FIN AND TRS EST REV,ALL FUNDS-TOT DEBT SERV FIN AND TRS EST REV,GEN FUNDS-TOTAL OPERATING REVENUE AND OTHER REVENUE AND RECAPTUR,ALL FUNDS-TOTAL OPERATING REVENUE AND OTHER REVENUE AND RECAPTUR,GEN FUNDS-DEBT SERVICE FINANCING RELATED REVENUE,ALL FUNDS-DEBT SERVICE FINANCING RELATED REVENUE,GEN FUNDS-ESTIMATED STATE TRS CONTRIBUTIONS,ALL FUNDS-ESTIMATED STATE TRS CONTRIBUTIONS,GEN FUNDS-TOTAL DEBT SERVICE FINANCING AND TRS ESTIMATE REVENUE,ALL FUNDS-TOTAL DEBT SERVICE FINANCING AND TRS ESTIMATE REVENUE,"GEN FUNDS-TOTAL OPERATING, OTR, DEBT SERV FIN, AND TRS EST REVEN","ALL FUNDS-TOTAL OPERATING, OTR, DEBT SERV FIN, AND TRS EST REVEN",GEN FUNDS-TOTAL PAYROLL EXPENDITURES,ALL FUNDS-TOTAL PAYROLL EXPENDITURES,GEN FUNDS-TOTAL PROFESSIONAL & CONTRACTED SERVICES EXPENDITURES,ALL FUNDS-TOTAL PROFESSIONAL & CONTRACTED SERVICES EXPENDITURES,GEN FUNDS-TOTAL SUPPLIES & MATERIALS EXPENDITURES,ALL FUNDS-TOTAL SUPPLIES & MATERIALS EXPENDITURES,GEN FUNDS-TOTAL OTHER OPERATING EXPENDITURES,ALL FUNDS-TOTAL OTHER OPERATING EXPENDITURES,GEN FUNDS-TOTAL OPERATING EXPENDITURES BY OBJ,ALL FUNDS-TOTAL OPERATING EXPENDITURES BY OBJ,GEN FUNDS-TOTAL DEBT SERVICE EXPEND BY OBJ,ALL FUNDS-TOTAL DEBT SERVICE EXPEND BY OBJ,GEN FUNDS-TOTAL CAPITAL PROJECTS EXPEND BY OBJ,ALL FUNDS-TOTAL CAPITAL PROJECTS EXPEND BY OBJ,GEN FUNDS-TOTAL NON-OPER EXPENDITURES BY OBJ,ALL FUNDS-TOTAL NON-OPER EXPENDITURES BY OBJ,GEN FUNDS-TOTAL NON-OPER AND OPER EXPENDITURES BY OBJ,ALL FUNDS-TOTAL NON-OPER AND OPER EXPENDITURES BY OBJ,"GEN FUNDS-INSTRUCTION + TRANSFER EXPEND-FCT11,95","ALL FUNDS-INSTRUCTION + TRANSFER EXPEND-FCT11,95","GEN FUNDS-INSTRUC RESOURCE MEDIA SERVICE EXP, FCT12","ALL FUNDS-INSTRUC RESOURCE MEDIA SERVICE EXP, FCT12","GEN FUNDS-CURRICULUM/STAFF DEVELOPMENT EXP, FCT13","ALL FUNDS-CURRICULUM/STAFF DEVELOPMENT EXP, FCT13","GEN FUNDS-INSTRUC LEADERSHIP EXPEND, FCT21","ALL FUNDS-INSTRUC LEADERSHIP EXPEND, FCT21","GEN FUNDS-CAMPUS ADMINISTRATION EXPEND, FCT23","ALL FUNDS-CAMPUS ADMINISTRATION EXPEND, FCT23","GEN FUNDS-GUIDANCE & COUNSELING SERVICES EXP, FCT31","ALL FUNDS-GUIDANCE & COUNSELING SERVICES EXP, FCT31","GEN FUNDS-SOCIAL WORK SERVICES EXP, FCT32","ALL FUNDS-SOCIAL WORK SERVICES EXP, FCT32","GEN FUNDS-HEALTH SERVICES EXP, FCT33","ALL FUNDS-HEALTH SERVICES EXP, FCT33","GEN FUNDS-TRANSPORTATION EXPENDITURES, FCT34","ALL FUNDS-TRANSPORTATION EXPENDITURES, FCT34","GEN FUNDS-FOOD SERVICE EXPENDITURES, FCT35","ALL FUNDS-FOOD SERVICE EXPENDITURES, FCT35","GEN FUNDS-EXTRACURRICULAR EXPENDITURES, FCT36","ALL FUNDS-EXTRACURRICULAR EXPENDITURES,FCT36","GEN FUNDS-GENERAL ADMINISTRAT EXPEND-FCT41,92","ALL FUNDS-GENERAL ADMINISTRAT EXPEND-FCT41,92","GEN FUNDS-PLANT MAINTENANCE/OPERA EXPEND, FCT51","ALL FUNDS-PLANT MAINTENANCE/OPERA EXPEND, FCT51","GEN FUNDS-SECURITY/MONITORING SERVICE EXPEND, FCT52","ALL FUNDS-SECURITY/MONITORING SERVICE EXPEND, FCT52","GEN FUNDS-DATA PROCESSING SERVICES EXPEND, FCT53","ALL FUNDS-DATA PROCESSING SERVICES EXPEND, FCT53","GEN FUNDS-COMMUNITY SERVICES, FCT61","ALL FUNDS-COMMUNITY SERVICES, FCT61",GEN FUNDS-TOTAL OPERATE EXPEND BY FUNCTION,ALL FUNDS-TOTAL OPERATE EXPEND BY FUNCTION,GEN FUNDS-NON-OPER EXP BY FUNCTION(1X-9X)(65XX),ALL FUNDS-NON-OPER EXP BY FUNCTION(1X-9X)(65XX),GEN FUNDS-NON-OPER EXP BY FUNCTION(1X-9X)(66XX),ALL FUNDS-NON-OPER EXP BY FUNCTION(1X-9X)(66XX),GEN FUNDS-TOT NON-OPER EXPEND BY FUNCTION,ALL FUNDS-TOT NON-OPER EXPEND BY FUNCTION,GEN FUNDS-TOT OPER AND NON-OPER EXP BY FUNCTION,ALL FUNDS-TOT OPER AND NON-OPER EXP BY FUNCTION,GEN FUNDS-REGULAR PROGRAM EXPEND--11,ALL FUNDS-REGULAR PROGRAM EXPEND--11,GEN FUNDS-GIFTED/TALENTED PROGRAM EXPEND--21,ALL FUNDS-GIFTED/TALENTED PROGRAM EXPEND--21,GEN FUNDS-CAREER & TECHNOLOGY PGM EXPEND--22,ALL FUNDS-CAREER & TECHNOLOGY PGM EXPEND--22,GEN FUNDS-STUDENTS WITH DISABILITIES PGM EXPEND--23,ALL FUNDS-STUDENTS WITH DISABILITIES PGM EXPEND--23,"GEN FUNDS-STATE COMPENSATORY ED EXPEND--24, 26, 28, 29, 30, 34","ALL FUNDS-STATE COMPENSATORY ED EXPEND--24, 26, 28, 29, 30, 34","GEN FUNDS-BILINGUAL PROGRAM EXP--25, 35","ALL FUNDS-BILINGUAL PROGRAM EXP--25, 35",GEN FUNDS-HIGH SCHOOL ALLOTMENT PROGRAM EXPEND--31,ALL FUNDS-HIGH SCHOOL ALLOTMENT PROGRAM--31,"GEN FUNDS-PREKINDERGARTEN EXPEND--32,35","ALL FUNDS-PREKINDERGARTEN--32,35",GEN FUNDS-ATHLETICS PROGRAM EXPEND--91,ALL FUNDS-ATHLETICS PROGRAM EXPEND--91,GEN FUNDS-UNDISTRIBUTED PROGRAM EXP--99,ALL FUNDS-UNDISTRIBUTED PROGRAM EXP--99,GEN FUNDS-TOTAL PROGRAM OPERATING EXPENDITURES,ALL FUNDS-TOTAL PROGRAM OPERATING EXPENDITURES,GEN FUNDS-NON OPER EXP BY PIC(65XX),ALL FUNDS-NON OPER EXP BY PIC(65XX),GEN FUNDS-NON OPER EXP BY PIC(66XX),ALL FUNDS-NON OPER EXP BY PIC(66XX),GEN FUNDS-TOT NON-OPER EXPENDITURES BY PIC,ALL FUNDS-TOT NON-OPER EXPENDITURES BY PIC,GEN FUNDS-TOT OPER AND NON-OPER EXP BY PIC,ALL FUNDS-TOT OPER AND NON-OPER EXP BY PIC,GEN FUNDS-TOTAL OPER EXPENDITURES FOR TD,ALL FUNDS-TOTAL OPER EXPENDITURES FOR TD,GEN FUNDS-EQUITY TRANSFERS FOR TD,ALL FUNDS-EQUITY TRANSFERS FOR TD,GEN FUNDS-TOTAL OTHER USES,ALL FUNDS-TOTAL OTHER USES,GEN FUNDS-INTERGOVERN CHARGES EXPEND,ALL FUNDS-INTERGOVERN CHARGES EXPEND,GEN FUNDS-DEBT SERVICE (OBJECT 6500) FOR TD,ALL FUNDS-DEBT SERVICE (OBJECT 6500) FOR TD,GEN FUNDS-CAPITAL PROJECTS(OBJECT 6600) FOR TD,ALL FUNDS-CAPITAL PROJECTS(OBJECT 6600) FOR TD,GEN FUNDS-TOTAL DISBURSEMENTS,ALL FUNDS-TOTAL DISBURSEMENTS,FALL SURVEY ENROLLMENT
0,1902,CAYUGA ISD,2007,"$4,122,552","$4,122,552","$1,114,179","$1,176,283",$0,"$250,168","$215,239","$354,535","$5,451,970","$5,903,538",$0,$0,$0,$0,"$1,418","$1,418","$1,418","$1,418","$5,453,388","$5,904,956",$0,$0,$0,$0,"$5,453,388","$5,904,956",$0,$0,"$198,676","$198,676","$198,676","$198,676","$5,652,064","$6,103,632","$3,405,211","$3,611,026","$444,063","$456,042","$287,153","$492,377","$128,670","$133,649","$4,265,097","$4,693,094","$421,865","$421,865","$145,130","$156,886","$566,995","$578,751","$4,832,092","$5,271,845","$2,450,934","$2,642,749","$76,087","$77,503",$820,"$4,969",$0,"$4,000","$255,309","$255,309","$140,044","$140,044",$0,$0,"$46,080","$46,080","$188,873","$188,873",$0,"$190,100","$186,577","$223,094","$300,100","$300,100","$540,789","$540,789","$1,857","$1,857","$77,627","$77,627",$0,$0,"$4,265,097","$4,693,094","$421,865","$421,865","$145,130","$156,886","$566,995","$578,751","$4,832,092","$5,271,845","$2,368,474","$2,382,088","$9,815","$9,815","$114,517","$114,517","$358,842","$358,842","$125,940","$313,706","$1,197","$1,197",$0,$0,$0,$0,"$147,213","$183,730","$1,139,099","$1,329,199","$4,265,097","$4,693,094","$421,865","$421,865","$145,130","$156,886","$566,995","$578,751","$4,832,092","$5,271,845","$4,265,097","$4,693,094",$0,$0,"$4,411","$5,829","$70,703","$70,703","$421,865","$421,865","$145,130","$156,886","$4,907,206","$5,348,377",569
1,1902,CAYUGA ISD,2008,"$3,641,351","$3,641,351","$1,845,232","$2,350,131",$0,"$1,852,221","$183,293","$609,434","$5,669,876","$8,453,137",$0,$0,$0,$0,$0,"$953,589",$0,"$953,589","$5,669,876","$9,406,726",$0,$0,$0,$0,"$5,669,876","$9,406,726",$0,$0,"$226,753","$266,516","$226,753","$266,516","$5,896,629","$9,673,242","$3,729,195","$5,509,091","$463,617","$934,387","$323,340","$752,505","$137,667","$221,087","$4,653,819","$7,417,070",$0,$0,"$647,852","$632,398","$647,852","$632,398","$5,301,671","$8,049,468","$2,741,830","$4,108,584","$54,416","$79,604","$4,571","$16,968",$0,"$302,836","$258,301","$258,301","$147,403","$877,655",$0,$0,"$45,898","$45,898","$224,996","$224,996",$0,"$217,411","$219,760","$275,561","$310,133","$310,133","$553,803","$606,415","$1,533","$1,533","$91,175","$91,175",$0,$0,"$4,653,819","$7,417,070",$0,$0,"$647,852","$632,398","$647,852","$632,398","$5,301,671","$8,049,468","$2,567,178","$2,598,844","$10,256","$10,256","$113,084","$113,084","$389,832","$2,689,110","$180,245","$339,340","$1,246","$1,246",$0,$0,$0,$0,"$182,724","$182,724","$1,209,254","$1,482,466","$4,653,819","$7,417,070",$0,$0,"$647,852","$632,398","$647,852","$632,398","$5,301,671","$8,049,468","$4,653,819","$7,417,070",$0,$0,"$15,000","$15,000","$75,304","$210,304",$0,$0,"$647,852","$632,398","$5,391,975","$8,274,772",580
2,1902,CAYUGA ISD,2009,"$3,319,164","$3,319,164","$1,950,169","$2,473,373",$0,"$1,714,988","$123,568","$521,780","$5,392,901","$8,029,305",$0,$0,$0,$0,$0,$0,$0,$0,"$5,392,901","$8,029,305","$185,597","$185,597","$185,597","$185,597","$5,578,498","$8,214,902",$0,$0,"$245,989","$245,989","$245,989","$245,989","$5,638,890","$8,275,294","$3,952,356","$5,716,404","$472,396","$941,931","$341,296","$754,588","$165,611","$251,355","$4,931,659","$7,664,278",$0,$0,"$722,766","$803,313","$722,766","$803,313","$5,654,425","$8,467,591","$2,963,259","$4,391,883","$60,497","$84,600","$1,277","$11,375",$0,"$217,697","$264,999","$264,999","$150,041","$845,635",$0,$0,"$46,497","$46,497","$189,287","$189,287",$0,"$256,598","$268,885","$322,772","$258,457","$258,457","$633,954","$679,972",$870,$870,"$93,636","$93,636",$0,$0,"$4,931,659","$7,664,278",$0,$0,"$722,766","$803,313","$722,766","$803,313","$5,654,425","$8,467,591","$2,793,147","$2,841,002","$10,252","$10,252","$133,520","$133,520","$376,075","$2,562,026","$181,981","$364,949","$1,261","$1,261",$0,$0,$0,$0,"$223,733","$277,620","$1,211,690","$1,473,648","$4,931,659","$7,664,278",$0,$0,"$722,766","$803,313","$722,766","$803,313","$5,654,425","$8,467,591","$4,931,659","$7,664,278","$185,597","$185,597",$0,$0,"$166,251","$166,251",$0,$0,"$722,766","$803,313","$6,006,273","$8,819,439",594
3,1902,CAYUGA ISD,2010,"$3,222,688","$3,222,688","$2,302,369","$2,917,268",$0,"$2,851,372","$161,664","$665,811","$5,686,721","$9,657,139",$0,$0,$0,$0,"$34,819","$62,162","$34,819","$62,162","$5,721,540","$9,719,301",$0,$0,$0,$0,"$5,721,540","$9,719,301",$0,$0,"$253,640","$290,497","$253,640","$290,497","$5,975,180","$10,009,798","$4,177,424","$6,152,112","$277,583","$964,779","$391,572","$1,117,922","$170,952","$422,119","$5,017,531","$8,656,932",$0,$0,"$1,498,127","$1,582,837","$1,498,127","$1,582,837","$6,515,658","$10,239,769","$3,150,102","$5,139,086","$58,224","$84,603","$2,273","$14,662",$0,"$257,802","$281,539","$281,539","$151,042","$874,259",$0,$0,"$49,065","$49,065","$197,297","$363,286",$0,"$251,772","$280,267","$322,201","$258,943","$258,943","$480,017","$650,952","$2,596","$2,596","$106,166","$106,166",$0,$0,"$5,017,531","$8,656,932",$0,$0,"$1,498,127","$1,582,837","$1,498,127","$1,582,837","$6,515,658","$10,239,769","$2,894,386","$2,926,417","$10,303","$10,303","$165,656","$179,030","$401,915","$3,409,136","$181,041","$356,797","$1,264","$1,264","$49,181","$49,181",$0,$0,"$234,405","$234,405","$1,079,380","$1,490,399","$5,017,531","$8,656,932",$0,$0,"$1,498,127","$1,582,837","$1,498,127","$1,582,837","$6,515,658","$10,239,769","$5,017,531","$8,656,932",$0,$0,"$28,767","$63,586","$90,462","$168,462",$0,$0,"$1,498,127","$1,582,837","$6,634,887","$10,471,817",628
4,1902,CAYUGA ISD,2011,"$3,152,618","$3,152,618","$2,439,570","$3,091,438",$0,"$2,687,377","$134,887","$603,182","$5,727,075","$9,534,615",$0,$0,$0,$0,$0,"$10,089",$0,"$10,089","$5,727,075","$9,544,704",$0,$0,$0,$0,"$5,727,075","$9,544,704",$0,$0,"$252,603","$292,847","$252,603","$292,847","$5,979,678","$9,837,551","$4,102,439","$6,134,967","$283,364","$984,742","$311,149","$973,794","$154,614","$318,649","$4,851,566","$8,412,152",$0,$0,"$261,902","$272,638","$261,902","$272,638","$5,113,468","$8,684,790","$2,950,685","$4,982,639","$52,170","$78,948",$74,"$14,768",$0,"$308,315","$283,396","$283,396","$152,784","$752,040",$0,$0,"$49,250","$49,250","$238,060","$328,741",$0,"$266,258","$255,095","$309,954","$288,543","$288,543","$466,889","$634,680",$729,$729,"$113,891","$113,891",$0,$0,"$4,851,566","$8,412,152",$0,$0,"$261,902","$272,638","$261,902","$272,638","$5,113,468","$8,684,790","$2,665,401","$2,697,739","$9,891","$9,891","$175,300","$178,400","$404,628","$3,292,103","$196,290","$395,533","$1,216","$1,216","$48,529","$48,529",$0,$0,"$207,921","$207,921","$1,142,390","$1,580,820","$4,851,566","$8,412,152",$0,$0,"$261,902","$272,638","$261,902","$272,638","$5,113,468","$8,684,790","$4,851,566","$8,412,152",$0,$0,"$30,213","$30,213","$96,141","$168,750",$0,$0,"$261,902","$272,638","$5,239,822","$8,883,753",606


In [41]:
peims = peims_df.drop(columns=['DISTRICT NAME'])

In [42]:
# Remove all columns between column name 'A' to 'B'
peims = peims.drop(peims.loc[:, 'GEN FUNDS-LOCAL TAX REVENUE FROM M&O':'ALL FUNDS-TOTAL OPERATING, OTR, DEBT SERV FIN, AND TRS EST REVEN'].columns, axis=1)

In [43]:
# isolate 2019 test year
peims = peims[peims['YEAR'] == 2019]
peims = peims.drop(columns=['YEAR'])

In [44]:
# remove columns that starts with 'GEN'
columns_to_keep = [c for c in peims.columns if c.lower()[:3] != 'gen']
peims = peims[columns_to_keep]

In [45]:
# remove prefix 'all funds'
peims.columns = peims.columns.str.replace("ALL FUNDS-", "")

In [46]:
peims['DISTRICT NUMBER'] = peims['DISTRICT NUMBER'].astype('str')

In [47]:
# padd district numbers with 0's
peims['DISTRICT NUMBER'] = peims['DISTRICT NUMBER'].str.zfill(6)

In [53]:
peims.columns

Index(['DISTRICT NUMBER', 'TOTAL PAYROLL EXPENDITURES',
       'TOTAL PROFESSIONAL & CONTRACTED SERVICES EXPENDITURES',
       'TOTAL SUPPLIES & MATERIALS EXPENDITURES',
       'TOTAL OTHER OPERATING EXPENDITURES',
       'TOTAL OPERATING EXPENDITURES BY OBJ',
       'TOTAL DEBT SERVICE EXPEND BY OBJ',
       'TOTAL CAPITAL PROJECTS EXPEND BY OBJ',
       'TOTAL NON-OPER EXPENDITURES BY OBJ',
       'TOTAL NON-OPER AND OPER EXPENDITURES BY OBJ',
       'INSTRUCTION + TRANSFER EXPEND-FCT11,95',
       'INSTRUC RESOURCE MEDIA SERVICE EXP, FCT12',
       'CURRICULUM/STAFF DEVELOPMENT EXP, FCT13',
       'INSTRUC LEADERSHIP EXPEND, FCT21',
       'CAMPUS ADMINISTRATION EXPEND, FCT23',
       'GUIDANCE & COUNSELING SERVICES EXP, FCT31',
       'SOCIAL WORK SERVICES EXP, FCT32', 'HEALTH SERVICES EXP, FCT33',
       'TRANSPORTATION EXPENDITURES, FCT34',
       'FOOD SERVICE EXPENDITURES, FCT35',
       'EXTRACURRICULAR EXPENDITURES,FCT36',
       'GENERAL ADMINISTRAT EXPEND-FCT41,92',
       

In [54]:
peims = peims.drop(peims.loc[:, 'TOTAL OPERATING EXPENDITURES BY OBJ':'TOTAL NON-OPER AND OPER EXPENDITURES BY OBJ'].columns, axis=1)

In [55]:
peims = peims.drop(peims.loc[:, 'TOTAL OPERATE EXPEND BY FUNCTION':'TOT OPER AND NON-OPER EXP BY FUNCTION'].columns, axis=1)

In [57]:
peims = peims.drop(peims.loc[:, 'NON OPER EXP BY PIC(65XX)':'EQUITY TRANSFERS FOR TD'].columns, axis=1)

In [60]:
peims = peims.drop(peims.loc[:, 'INTERGOVERN CHARGES EXPEND':'FALL SURVEY ENROLLMENT'].columns, axis=1)

In [61]:
peims.columns

Index(['DISTRICT NUMBER', 'TOTAL PAYROLL EXPENDITURES',
       'TOTAL PROFESSIONAL & CONTRACTED SERVICES EXPENDITURES',
       'TOTAL SUPPLIES & MATERIALS EXPENDITURES',
       'TOTAL OTHER OPERATING EXPENDITURES',
       'INSTRUCTION + TRANSFER EXPEND-FCT11,95',
       'INSTRUC RESOURCE MEDIA SERVICE EXP, FCT12',
       'CURRICULUM/STAFF DEVELOPMENT EXP, FCT13',
       'INSTRUC LEADERSHIP EXPEND, FCT21',
       'CAMPUS ADMINISTRATION EXPEND, FCT23',
       'GUIDANCE & COUNSELING SERVICES EXP, FCT31',
       'SOCIAL WORK SERVICES EXP, FCT32', 'HEALTH SERVICES EXP, FCT33',
       'TRANSPORTATION EXPENDITURES, FCT34',
       'FOOD SERVICE EXPENDITURES, FCT35',
       'EXTRACURRICULAR EXPENDITURES,FCT36',
       'GENERAL ADMINISTRAT EXPEND-FCT41,92',
       'PLANT MAINTENANCE/OPERA EXPEND, FCT51',
       'SECURITY/MONITORING SERVICE EXPEND, FCT52',
       'DATA PROCESSING SERVICES EXPEND, FCT53', 'COMMUNITY SERVICES, FCT61',
       'REGULAR PROGRAM EXPEND--11', 'GIFTED/TALENTED PROGRAM EX

In [None]:
# peims = peims.drop(peims.loc[:, 'PREKINDERGARTEN EXPEND BILINGUAL--32':'PREKINDERGARTEN EXPEND SPECIAL ED--32'].columns, axis=1)

In [62]:
peims = peims.rename(columns={'DISTRICT NUMBER':'DISTRICT'})

In [63]:
peims.head()

Unnamed: 0,DISTRICT,TOTAL PAYROLL EXPENDITURES,TOTAL PROFESSIONAL & CONTRACTED SERVICES EXPENDITURES,TOTAL SUPPLIES & MATERIALS EXPENDITURES,TOTAL OTHER OPERATING EXPENDITURES,"INSTRUCTION + TRANSFER EXPEND-FCT11,95","INSTRUC RESOURCE MEDIA SERVICE EXP, FCT12","CURRICULUM/STAFF DEVELOPMENT EXP, FCT13","INSTRUC LEADERSHIP EXPEND, FCT21","CAMPUS ADMINISTRATION EXPEND, FCT23","GUIDANCE & COUNSELING SERVICES EXP, FCT31","SOCIAL WORK SERVICES EXP, FCT32","HEALTH SERVICES EXP, FCT33","TRANSPORTATION EXPENDITURES, FCT34","FOOD SERVICE EXPENDITURES, FCT35","EXTRACURRICULAR EXPENDITURES,FCT36","GENERAL ADMINISTRAT EXPEND-FCT41,92","PLANT MAINTENANCE/OPERA EXPEND, FCT51","SECURITY/MONITORING SERVICE EXPEND, FCT52","DATA PROCESSING SERVICES EXPEND, FCT53","COMMUNITY SERVICES, FCT61",REGULAR PROGRAM EXPEND--11,GIFTED/TALENTED PROGRAM EXPEND--21,CAREER & TECHNOLOGY PGM EXPEND--22,STUDENTS WITH DISABILITIES PGM EXPEND--23,"STATE COMPENSATORY ED EXPEND--24, 26, 28, 29, 30, 34","BILINGUAL PROGRAM EXP--25, 35",HIGH SCHOOL ALLOTMENT PROGRAM--31,"PREKINDERGARTEN--32,35",ATHLETICS PROGRAM EXPEND--91,UNDISTRIBUTED PROGRAM EXP--99,TOTAL PROGRAM OPERATING EXPENDITURES,TOTAL OTHER USES
12,1902,"$6,025,217","$1,075,904","$648,206","$809,559","$4,649,118","$66,490","$4,986","$270,353","$306,385","$998,314",$0,"$37,882","$293,070","$287,406","$413,755","$284,553","$773,085",$0,"$173,489",$0,"$2,778,638","$3,968","$251,350","$3,005,575","$273,747","$9,599","$40,285","$32,890","$304,174","$1,858,660","$8,558,886","$48,633"
28,1903,"$9,093,950","$1,514,689","$784,631","$303,052","$7,043,892","$117,860","$33,175","$66,374","$574,699","$202,086",$0,"$33,657","$422,887","$630,202","$598,484","$558,948","$1,248,908","$13,530","$151,120",$500,"$5,313,722",$93,"$852,319","$1,028,587","$799,037",$0,"$101,243",$0,"$339,045","$3,262,276","$11,696,322","$102,465"
44,1904,"$6,659,596","$927,209","$937,810","$278,109","$4,611,747","$51,126","$157,830",$0,"$466,345","$199,338",$0,"$102,385","$38,800","$411,195","$754,465","$539,512","$1,014,501","$45,482","$409,998",$0,"$3,945,494","$10,154","$552,217","$726,827","$377,013",$0,"$59,567","$114,404","$571,388","$2,445,660","$8,802,724",$481
60,1906,"$3,134,475","$373,513","$408,024","$105,878","$2,087,166","$19,990",$0,"$7,905","$379,101","$75,235",$0,"$40,628","$148,301","$257,465","$210,240","$201,520","$465,549","$10,415","$118,375",$0,"$1,499,301","$14,498","$164,641","$447,072","$402,415","$2,706","$48,748","$29,920",$0,"$1,412,589","$4,021,890","$53,786"
76,1907,"$25,587,063","$5,603,896","$4,134,969","$1,048,416","$18,807,861","$167,823","$535,649","$1,033,275","$2,201,907","$1,443,630","$170,074","$208,736","$1,442,619","$2,071,781","$1,422,648","$1,287,489","$3,937,087","$242,658","$1,006,175","$394,932","$15,527,277","$39,671","$1,625,090","$2,422,707","$3,147,717","$231,026","$302,531","$923,035","$1,214,433","$10,940,857","$36,374,344",$0


In [67]:
ethnic_df.shape
staar.shape
peims.shape

(1201, 2)

(1251, 3)

(1200, 33)

In [66]:
# save intermediary files
ethnic_df.to_csv('../data/inter/clean_ethnic_2019.csv', index=False)
staar.to_csv('../data/inter/clean_staar_2019.csv', index=False)
peims.to_csv('../data/inter/clean_peims_2019.csv', index=False)