In [24]:
# Load Requirements
import pandas as pd
import numpy as np

from helper import *

# Obtain Data

For this project, I used the [Common Core of Data](https://nces.ed.gov/ccd/) which the US Department of Education's database on publicly funded primary and secondary schools. It is managed by the National Center for Education Statistic.

I downloaded the compressed files from CCD's website. Then I unzipped each file, renamed them, and recompressed the files. The table below shows the files I used.

| School Year        | Universe File Name        | Finance File Name          |
|:------------------ | :-------------------------: |:--------------------------:|
| 1989-1990   | universe_1990.txt  | finance_1990.txt  |
| 1994-1995   | universe_1995.txt  | finance_1995.txt  |
| 1999-2000   | universe_2000.txt  | finance_2000.txt  |
| 2004-2005   | universe_2005.txt  | finance_2005.txt  |
| 2009-2010   | universe_2010.txt  | finance_2010.txt  |
| 2010-2011   | universe_2011.txt  | finance_2011.txt  |
| 2014-2015   | universe_2015_directory.txt<br> universe_2015_membership.txt<br> universe_2015_staff.txt | - |
| 2015-2016   | universe_2016_directory.txt  | - |

Finance files are compressed in finance.zip, and universe files are compressed in universe.zip. Both zipped files are located in the data directory.

# Load Data

Pandas dataframes were used for data wrangling tasks. All files were loaded into Pandas dataframes.

## 1989-1990 Files

In [25]:
# Create dictionary with column positions
columns_dict = {
    'LEAID': [0,7],
    'FIPS': [0,2],
    'STID': [7,21],
    'NAME': [21,51],
    'STREET': [51,76],
    'CITY': [76,94],
    'ST': [94,96],
    'ZIP': [96,101],
    'ZIP4': [101,105],
    'PHONE': [105,115],
    'TYPE': [115,116],
    'UNION': [116,119],
    'CONUM': [119,124],
    'CONAME': [124,149],
    'CMSA': [149,155],
    'MSC': [155,156],
    'GRSPAN': [156,160],
    'GSLO': [156,158],
    'GSHI': [158,160],
    'SCH': [160,165],
    'TEACH': [165,171],
    'C01': [171,177],
    'C02': [177,183],
    'MEMBER': [183,190],
    'C03': [190,196],
    'C04': [196,202],
    'C05': [202,208],
    'C06': [208,214],
    'C07': [214,220],
    'NEWREC': [220,221]
}

# Initialize empty dictionary
universe_1990 = {
    'LEAID': [],
    'FIPS': [],
    'STID': [],
    'NAME': [],
    'STREET': [],
    'CITY': [],
    'ST': [],
    'ZIP': [],
    'ZIP4': [],
    'PHONE': [],
    'TYPE': [],
    'UNION': [],
    'CONUM': [],
    'CONAME': [],
    'CMSA': [],
    'MSC': [],
    'GRSPAN': [],
    'GSLO': [],
    'GSHI': [],
    'SCH': [],
    'TEACH': [],
    'C01': [],
    'C02': [],
    'MEMBER': [],
    'C03': [],
    'C04': [],
    'C05': [],
    'C06': [],
    'C07': [],
    'NEWREC': []
}

universe_1990 = read_fixed_width('data/universe.zip', 'universe_1990.txt', columns_dict, universe_1990)

In [26]:
archive = zipfile.ZipFile('data/finance.zip', 'r')
finance_1990 = pd.read_csv(archive.open('finance_1990.txt'), dtype='str', delimiter='\t', encoding='windows-1252')

## 1994-1995

In [30]:
# Create dictionary with column positions
columns_dict = {
    'LEAID': [0,7],
    'FIPS': [0,2],
    'STID': [7,21],
    'NAME': [21,51],
    'STREET': [51,81],
    'CITY': [81,99],
    'ST': [99,101],
    'ZIP': [101,106],
    'ZIP4': [106,110],
    'PHONE': [110,120],
    'TYPE': [120,121],
    'UNION': [121,124],
    'CONUM': [124,129],
    'CONAME': [129,154],
    'CMSA': [154,160],
    'MSC': [160,161],
    'BOUND': [161,162],
    'GRSPAN97': [162,166],
    'GSM': [162,164],
    'GSHI': [164,166],
    'SCH': [166,171],
    'TEACH': [171,178],
    'UG': [178,184],
    'PK12': [184,190],
    'MEMBER': [190,197],
    'SPECED': [197,203],
    'REGDIP': [203,209],
    'OTHDIP': [209,215],
    'OTHCOM': [215,221],
    'DAM7M': [221,226],
    'DAM7F': [226,231],
    'DAM7U': [231,236],
    'DAS7M': [236,241],
    'DAS7F': [241,246],
    'DAS7U': [246,251],
    'DHI7M': [251,256],
    'DHI7F': [256,261],
    'DHI7U': [261,266],
    'BL7M': [266,271],
    'DBL7F': [271,276],
    'DBL7U': [276,281],
    'DWH7M': [281,286],
    'DWH7F': [286,291],
    'DWH7U': [291,296],
    'DUK7M': [296,301],
    'DUK7F': [301,306],
    'DUK7U': [306,311],
    'DAM8M': [311,316],
    'DAM8F': [316,321],
    'DAM8U': [321,326],
    'DAS8M': [326,331],
    'DAS8F': [331,336],
    'DAS8U': [336,341],
    'DHI8M': [341,346],
    'DHI8F': [346,351],
    'DHI8U': [351,356],
    'DBL8M': [356,361],
    'DBL8F': [361,366],
    'DBL8U': [366,371],
    'DWH8M': [371,376],
    'DWH8F': [376,381],
    'DWH8U': [381,386],
    'DUK8M': [386,391],
    'DUK8F': [391,396],
    'DUK8U': [396,401],
    'DAM9M': [401,406],
    'DAM9F': [406,411],
    'DAM9U': [411,416],
    'DAS9M': [416,421],
    'DAS9F': [421,426],
    'DAS9U': [426,431],
    'DHI9M': [431,436],
    'DHI9F': [436,441],
    'DHI9U': [441,446],
    'DBL9M': [446,451],
    'DBL9F': [451,456],
    'DBL9U': [456,461],
    'DWH9M': [461,466],
    'DWH9F': [466,471],
    'DWH9U': [471,476],
    'DUK9M': [476,481],
    'DUK9F': [481,486],
    'DUK9U': [486,491],
    'DAM10M': [491,496],
    'DAM10F': [496,501],
    'DAM10U': [501,506],
    'DAS10M': [506,511],
    'DAS10F': [511,516],
    'DAS10U': [516,521],
    'DHI10M': [521,526],
    'DHI10F': [526,531],
    'DHI10U': [531,536],
    'DBL10M': [536,541],
    'DBL10F': [541,546],
    'DBL10U': [546,551],
    'DWH10M': [551,556],
    'DWH10F': [556,561],
    'DWH10U': [561,566],
    'DUK10M': [566,571],
    'DUK10F': [571,576],
    'DUK10U': [576,581],
    'DAM11M': [581,586],
    'DAM11F': [586,591],
    'DAM11U': [591,596],
    'DAS11M': [596,601],
    'DAS11F': [601,606],
    'DAS11U': [606,611],
    'DHI11M': [611,616],
    'DHI11F': [616,621],
    'DHI11U': [621,626],
    'DBL11M': [626,631],
    'DBL11F': [631,636],
    'DBL11U': [636,641],
    'DWH11M': [641,646],
    'DWH11F': [646,651],
    'DWH11U': [651,656],
    'DUK11M': [656,661],
    'DUK11F': [661,666],
    'DUK11U': [666,671],
    'DAM12M': [671,676],
    'DAM12F': [676,681],
    'DAM12U': [681,686],
    'DAS12M': [686,691],
    'DAS12F': [691,696],
    'DAS12U': [696,701],
    'DHI12M': [701,706],
    'DHI12F': [706,711],
    'DHI12U': [711,716],
    'DBL12M': [716,721],
    'DBL12F': [721,726],
    'DBL12U': [726,731],
    'DWH12M': [731,736],
    'DWH12F': [736,741],
    'DWH12U': [741,746],
    'DUK12M': [746,751],
    'DUK12F': [751,756],
    'DUK12U': [756,761],
    'PKTCH': [761,768],
    'KGTCH': [768,775],
    'ELMTCH': [775,782],
    'SECTCH': [782,789],
    'UGTCH': [789,796],
    'TOTTCH': [796,803],
    'AIDES': [803,810],
    'CORSUP': [810,817],
    'ELMGUI': [817,824],
    'SECGUI': [824,831],
    'TOTGUI': [831,838],
    'LIBSPE': [838,845],
    'LIBSUP': [845,852],
    'LEAADM': [852,859],
    'LEASUP': [859,866],
    'SCHADM': [866,873],
    'SCHSUP': [873,880],
    'STUSUP': [880,887],
    'OTHSUP': [887,894],
    'IUG': [894,895],
    'IPK12': [895,896],
    'IMEMB': [896,897],
    'ISPEC': [897,898],
    'IREGD': [898,899],
    'IOTHD': [899,900],
    'IOTHC': [900,901],
    'IAM7M': [901,902],
    'IAM7F': [902,903],
    'IAM7U': [903,904],
    'IAS7M': [904,905],
    'IAS7F': [905,906],
    'IAS7U': [906,907],
    'IHI7M': [907,908],
    'IHI7F': [908,909],
    'IHI7U': [909,910],
    'IBL7M': [910,911],
    'IBL7F': [911,912],
    'IBL7U': [912,913],
    'IWH7M': [913,914],
    'IWH7F': [914,915],
    'IWH7U': [915,916],
    'IUK7M': [916,917],
    'IUK7F': [917,918],
    'IUK7U': [918,919],
    'IAM8M': [919,920],
    'IAM8F': [920,921],
    'IAM8U': [921,922],
    'IAS8M': [922,923],
    'IAS8F': [923,924],
    'IAS8U': [924,925],
    'IHI8M': [925,926],
    'IHI8F': [926,927],
    'IHI8U': [927,928],
    'IBL8M': [928,929],
    'IBL8F': [929,930],
    'IBL8U': [930,931],
    'IWH8M': [931,932],
    'IWH8F': [932,933],
    'IWH8U': [933,934],
    'IUK8M': [934,935],
    'IUK8F': [935,936],
    'IUK8U': [936,937],
    'IAM9M': [937,938],
    'IAM9F': [938,939],
    'IAM9U': [939,940],
    'IAS9M': [940,941],
    'IAS9F': [941,942],
    'IAS9U': [942,943],
    'IHI9M': [943,944],
    'IHI9F': [944,945],
    'IHI9U': [945,946],
    'IBL9M': [946,947],
    'IBL9F': [947,948],
    'IBL9U': [948,949],
    'IWH9M': [949,950],
    'IWH9F': [950,951],
    'IWH9U': [951,952],
    'IUK9M': [952,953],
    'IUK9F': [953,954],
    'IUK9U': [954,955],
    'IAM10M': [955,956],
    'IAM10F': [956,957],
    'IAM10U': [957,958],
    'IAS10M': [958,959],
    'IAS10F': [959,960],
    'IAS10U': [960,961],
    'IHI10M': [961,962],
    'IHI10F': [962,963],
    'IHI10U': [963,964],
    'IBL10M': [964,965],
    'IBL10F': [965,966],
    'IBL10U': [966,967],
    'IWH10M': [967,968],
    'IWH10F': [968,969],
    'IWH10U': [969,970],
    'IUK10M': [970,971],
    'IUK10F': [971,972],
    'IUK10U': [972,973],
    'IAM11M': [973,974],
    'IAM11F': [974,975],
    'IAM11U': [975,976],
    'IAS11M': [976,977],
    'IAS11F': [977,978],
    'IAS11U': [978,979],
    'IHI11M': [979,980],
    'IHI11F': [980,981],
    'IHI11U': [981,982],
    'IBL11M': [982,983],
    'IBL11F': [983,984],
    'IBL11U': [984,985],
    'IWH11M': [985,986],
    'IWH11F': [986,987],
    'IWH11U': [987,988],
    'IUK11M': [988,989],
    'IUK11F': [989,990],
    'IUK11U': [990,991],
    'IAM12M': [991,992],
    'IAM12F': [992,993],
    'IAM12U': [993,994],
    'IAS12M': [994,995],
    'IAS12F': [995,996],
    'IAS12U': [996,997],
    'IHI12M': [997,998],
    'IHI12F': [998,999],
    'IHI12U': [999,1000],
    'IBL12M': [1000,1001],
    'IBL12F': [1001,1002],
    'IBL12U': [1002,1003],
    'IWH12M': [1003,1004],
    'IWH12F': [1004,1005],
    'IWH12U': [1005,1006],
    'IUK12M': [1006,1007],
    'IUK12F': [1007,1008],
    'IUK12U': [1008,1009],
    'IPKTCH': [1009,1010],
    'IKGTCH': [1010,1011],
    'IELTCH': [1011,1012],
    'ISETCH': [1012,1013],
    'IUGTCH': [1013,1014],
    'ITOTCH': [1014,1015],
    'IAIDES': [1015,1016],
    'ICOSUP': [1016,1017],
    'IELGUI': [1017,1018],
    'ISEGUI': [1018,1019],
    'ITOGUI': [1019,1020],
    'ILISPE': [1020,1021],
    'ILISUP': [1021,1022],
    'ILEADM': [1022,1023],
    'ILESUP': [1023,1024],
    'ISCADM': [1024,1025],
    'ISCSUP': [1025,1026],
    'ISTSUP': [1026,1027],
    'IOTSUP': [1027,1028],
    'ISCH': [1028,1029],
    'ITEACH': [1029,1030]
}

# Initialize empty dictionary
universe_1995 = {
    'LEAID': [],
    'FIPS': [],
    'STID': [],
    'NAME': [],
    'STREET': [],
    'CITY': [],
    'ST': [],
    'ZIP': [],
    'ZIP4': [],
    'PHONE': [],
    'TYPE': [],
    'UNION': [],
    'CONUM': [],
    'CONAME': [],
    'CMSA': [],
    'MSC': [],
    'BOUND': [],
    'GRSPAN97': [],
    'GSM': [],
    'GSHI': [],
    'SCH': [],
    'TEACH': [],
    'UG': [],
    'PK12': [],
    'MEMBER': [],
    'SPECED': [],
    'REGDIP': [],
    'OTHDIP': [],
    'OTHCOM': [],
    'DAM7M': [],
    'DAM7F': [],
    'DAM7U': [],
    'DAS7M': [],
    'DAS7F': [],
    'DAS7U': [],
    'DHI7M': [],
    'DHI7F': [],
    'DHI7U': [],
    'BL7M': [],
    'DBL7F': [],
    'DBL7U': [],
    'DWH7M': [],
    'DWH7F': [],
    'DWH7U': [],
    'DUK7M': [],
    'DUK7F': [],
    'DUK7U': [],
    'DAM8M': [],
    'DAM8F': [],
    'DAM8U': [],
    'DAS8M': [],
    'DAS8F': [],
    'DAS8U': [],
    'DHI8M': [],
    'DHI8F': [],
    'DHI8U': [],
    'DBL8M': [],
    'DBL8F': [],
    'DBL8U': [],
    'DWH8M': [],
    'DWH8F': [],
    'DWH8U': [],
    'DUK8M': [],
    'DUK8F': [],
    'DUK8U': [],
    'DAM9M': [],
    'DAM9F': [],
    'DAM9U': [],
    'DAS9M': [],
    'DAS9F': [],
    'DAS9U': [],
    'DHI9M': [],
    'DHI9F': [],
    'DHI9U': [],
    'DBL9M': [],
    'DBL9F': [],
    'DBL9U': [],
    'DWH9M': [],
    'DWH9F': [],
    'DWH9U': [],
    'DUK9M': [],
    'DUK9F': [],
    'DUK9U': [],
    'DAM10M': [],
    'DAM10F': [],
    'DAM10U': [],
    'DAS10M': [],
    'DAS10F': [],
    'DAS10U': [],
    'DHI10M': [],
    'DHI10F': [],
    'DHI10U': [],
    'DBL10M': [],
    'DBL10F': [],
    'DBL10U': [],
    'DWH10M': [],
    'DWH10F': [],
    'DWH10U': [],
    'DUK10M': [],
    'DUK10F': [],
    'DUK10U': [],
    'DAM11M': [],
    'DAM11F': [],
    'DAM11U': [],
    'DAS11M': [],
    'DAS11F': [],
    'DAS11U': [],
    'DHI11M': [],
    'DHI11F': [],
    'DHI11U': [],
    'DBL11M': [],
    'DBL11F': [],
    'DBL11U': [],
    'DWH11M': [],
    'DWH11F': [],
    'DWH11U': [],
    'DUK11M': [],
    'DUK11F': [],
    'DUK11U': [],
    'DAM12M': [],
    'DAM12F': [],
    'DAM12U': [],
    'DAS12M': [],
    'DAS12F': [],
    'DAS12U': [],
    'DHI12M': [],
    'DHI12F': [],
    'DHI12U': [],
    'DBL12M': [],
    'DBL12F': [],
    'DBL12U': [],
    'DWH12M': [],
    'DWH12F': [],
    'DWH12U': [],
    'DUK12M': [],
    'DUK12F': [],
    'DUK12U': [],
    'PKTCH': [],
    'KGTCH': [],
    'ELMTCH': [],
    'SECTCH': [],
    'UGTCH': [],
    'TOTTCH': [],
    'AIDES': [],
    'CORSUP': [],
    'ELMGUI': [],
    'SECGUI': [],
    'TOTGUI': [],
    'LIBSPE': [],
    'LIBSUP': [],
    'LEAADM': [],
    'LEASUP': [],
    'SCHADM': [],
    'SCHSUP': [],
    'STUSUP': [],
    'OTHSUP': [],
    'IUG': [],
    'IPK12': [],
    'IMEMB': [],
    'ISPEC': [],
    'IREGD': [],
    'IOTHD': [],
    'IOTHC': [],
    'IAM7M': [],
    'IAM7F': [],
    'IAM7U': [],
    'IAS7M': [],
    'IAS7F': [],
    'IAS7U': [],
    'IHI7M': [],
    'IHI7F': [],
    'IHI7U': [],
    'IBL7M': [],
    'IBL7F': [],
    'IBL7U': [],
    'IWH7M': [],
    'IWH7F': [],
    'IWH7U': [],
    'IUK7M': [],
    'IUK7F': [],
    'IUK7U': [],
    'IAM8M': [],
    'IAM8F': [],
    'IAM8U': [],
    'IAS8M': [],
    'IAS8F': [],
    'IAS8U': [],
    'IHI8M': [],
    'IHI8F': [],
    'IHI8U': [],
    'IBL8M': [],
    'IBL8F': [],
    'IBL8U': [],
    'IWH8M': [],
    'IWH8F': [],
    'IWH8U': [],
    'IUK8M': [],
    'IUK8F': [],
    'IUK8U': [],
    'IAM9M': [],
    'IAM9F': [],
    'IAM9U': [],
    'IAS9M': [],
    'IAS9F': [],
    'IAS9U': [],
    'IHI9M': [],
    'IHI9F': [],
    'IHI9U': [],
    'IBL9M': [],
    'IBL9F': [],
    'IBL9U': [],
    'IWH9M': [],
    'IWH9F': [],
    'IWH9U': [],
    'IUK9M': [],
    'IUK9F': [],
    'IUK9U': [],
    'IAM10M': [],
    'IAM10F': [],
    'IAM10U': [],
    'IAS10M': [],
    'IAS10F': [],
    'IAS10U': [],
    'IHI10M': [],
    'IHI10F': [],
    'IHI10U': [],
    'IBL10M': [],
    'IBL10F': [],
    'IBL10U': [],
    'IWH10M': [],
    'IWH10F': [],
    'IWH10U': [],
    'IUK10M': [],
    'IUK10F': [],
    'IUK10U': [],
    'IAM11M': [],
    'IAM11F': [],
    'IAM11U': [],
    'IAS11M': [],
    'IAS11F': [],
    'IAS11U': [],
    'IHI11M': [],
    'IHI11F': [],
    'IHI11U': [],
    'IBL11M': [],
    'IBL11F': [],
    'IBL11U': [],
    'IWH11M': [],
    'IWH11F': [],
    'IWH11U': [],
    'IUK11M': [],
    'IUK11F': [],
    'IUK11U': [],
    'IAM12M': [],
    'IAM12F': [],
    'IAM12U': [],
    'IAS12M': [],
    'IAS12F': [],
    'IAS12U': [],
    'IHI12M': [],
    'IHI12F': [],
    'IHI12U': [],
    'IBL12M': [],
    'IBL12F': [],
    'IBL12U': [],
    'IWH12M': [],
    'IWH12F': [],
    'IWH12U': [],
    'IUK12M': [],
    'IUK12F': [],
    'IUK12U': [],
    'IPKTCH': [],
    'IKGTCH': [],
    'IELTCH': [],
    'ISETCH': [],
    'IUGTCH': [],
    'ITOTCH': [],
    'IAIDES': [],
    'ICOSUP': [],
    'IELGUI': [],
    'ISEGUI': [],
    'ITOGUI': [],
    'ILISPE': [],
    'ILISUP': [],
    'ILEADM': [],
    'ILESUP': [],
    'ISCADM': [],
    'ISCSUP': [],
    'ISTSUP': [],
    'IOTSUP': [],
    'ISCH': [],
    'ITEACH': []
}

universe_1995 = read_fixed_width('data/universe.zip', 'universe_1995.txt', columns_dict, universe_1995)

In [32]:
archive = zipfile.ZipFile('data/finance.zip', 'r')
finance_1995 = pd.read_csv(archive.open('finance_1995.txt'), dtype='str', delimiter='\t', encoding='windows-1252')