In [1]:
import pandas as pd
import numpy as np
import pickle
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

## Create address column 

In [78]:
#load address data 
address_data = pd.read_csv('../data/sc122a.txt', delimiter = '\t', header = 0, encoding='latin-1')

# Filter to only open schools OPEN in 2012-13 school year (so they probably have a working URL)
filtered_STATUS12 = (address_data['STATUS'] == 1) | (address_data['STATUS'] == 3) | (address_data['STATUS'] == 4) | (address_data['STATUS'] == 5) | (address_data['STATUS'] == 8)

address_data = address_data.loc[filtered_STATUS12].reset_index()
print(address_data.shape)
address_data.head()

  interactivity=interactivity, compiler=compiler, result=result)


(100413, 310)


Unnamed: 0,index,SURVYEAR,NCESSCH,FIPST,LEAID,SCHNO,STID,SEASCH,LEANM,SCHNAM,...,WHITE,WHALM,WHALF,PACIFIC,HPALM,HPALF,TR,TRALM,TRALF,TOTETH
0,0,2012,10000200277,1,100002,277,210,20,ALABAMA YOUTH SERVICES,SEQUOYAH SCH - CHALKVILLE CAMPUS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1,1,2012,10000201402,1,100002,1402,210,25,ALABAMA YOUTH SERVICES,EUFAULA SCH - EUFAULA CAMPUS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,2,2012,10000201667,1,100002,1667,210,50,ALABAMA YOUTH SERVICES,CAMPS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,3,2012,10000201670,1,100002,1670,210,60,ALABAMA YOUTH SERVICES,DET CTR,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,4,2012,10000201705,1,100002,1705,210,30,ALABAMA YOUTH SERVICES,WALLACE SCH - MT MEIGS CAMPUS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


In [79]:
street = address_data['LSTREE']
city = address_data['LCITY']
zipcode = list(map(str, address_data['LZIP'])) #convert all int in LZIP to strings so they can be appended 
state = address_data['LSTATE']

In [80]:
def make_address(street, city, state, zipcode): 
    '''Returns a new list of complete addresses with combined street, city, state, and zipcode 
    '''
    addresses = [] #empty list to fill with full addresses
    for i in np.arange(len(street)): 
        full_address = street[i].lower() + ' ' + city[i].lower() + ' ' + state[i].lower() + ' ' + zipcode[i].lower() #for every street in list of streets, append city, state, and zip to create full address
        addresses.append(full_address) #append full address into empty list of addresses
    return addresses 

In [83]:
full_address = make_address(street, city, state, zipcode) #list of full addresses 

In [84]:
address_data['ADDRESS13'] = full_address #add column of full addresses

In [85]:
#select only NCESSCH and ADRESS13 columns because we're joining on NCESSCH and we only want to add the ADDRESS13 values to charters_2015.pkl
address_NCESSCH_data = address_data[['NCESSCH', 'ADDRESS13']] 

## Match formatting of addresses and names in CER dataframe to formatting of new address data 

In [8]:
CER = pd.read_csv('CER_2012-13.csv', encoding = 'latin-1') #open CER file 

In [9]:
#loop through every address, removing commas and periods, and converting all letters to lowercase
for i in np.arange(len(CER['CER_ADDRESS'])):
    CER.loc[i,'CER_ADDRESS'] = CER.loc[i,'CER_ADDRESS'].replace(',', '').replace('.','').lower() 


In [10]:
#uniform formatting of name by making all characters lowercase and removing all punctuation
CER['CER_NAME'] = [str(name).lower() for name in CER['CER_NAME']]

## Add address column to nonduplicate charter dataframe

In [11]:
#load nonduplicate charter data 
charters_nonduplicate = pd.read_csv('../../nowdata/parsing/overlaps_removed_df.csv', sep = '\t', header = 0)

  interactivity=interactivity, compiler=compiler, result=result)


In [99]:
#merge addresses to charters_nonduplicate based on NCESSCH 
charters_nonduplicate_with_address = pd.merge(charters_nonduplicate, address_NCESSCH_data, how = 'left', on = 'NCESSCH') 
charters_nonduplicate_with_address = charters_nonduplicate_with_address.dropna(subset=['ADDRESS13'])

In [100]:
#match formatting for name and address 
charters_nonduplicate_with_address['SCHNAM12'] = [str(name).lower() for name in charters_nonduplicate_with_address['SCHNAM12']]

charters_nonduplicate_with_address['ADDRESS13'] = [str(address) for address in charters_nonduplicate_with_address['ADDRESS13']]

## Create new column of combined name+address to use as comparison with fuzzy ratio

In [101]:
#combine name and address for charters 
chartername = charters_nonduplicate_with_address['SCHNAM12']
charteraddress = charters_nonduplicate_with_address['ADDRESS13']
charters_nonduplicate_with_address['address_name_combined'] = [name+' '+address for name,address in zip(chartername, charteraddress)]
#charters_nonduplicate_with_address
                                                               
#combine name and address for CER 
CERname = CER['CER_NAME']
CERaddress = CER['CER_ADDRESS']
CER['address_name_combined'] = [name+' '+address for name,address in zip(CERname, CERaddress)]

## Using fuzzy ratio 

1. List fuzzy ratio between name+address columns in both data frames 
2. Keep the highest fuzzy ratio 
3. If fuzzy ratio is over 70, change the column name to its match 

In [102]:

index = 0
for combined_charters in charters_nonduplicate_with_address['address_name_combined']:
    ratio = [fuzz.ratio(combined_charters, combinedCER) for combinedCER in CER['address_name_combined']]
    greatest_match_index = np.asarray(ratio).argmax()
    greatest_match_name = ratio[greatest_match_index]
    if greatest_match_name >= 70:
        print(str(index) + ' ' + 'name match found')
        charters_nonduplicate_with_address.loc[index, 'address_name_combined'] = CER.loc[greatest_match_index, 'address_name_combined']
    else:
        print(str(index))
    index = index + 1
        

0 name match found
1 name match found
2 name match found
3 name match found
4 name match found
5 name match found
6 name match found
7 name match found
8 name match found
9
10 name match found
11 name match found
12 name match found
13 name match found
14 name match found
15 name match found
16 name match found
17 name match found
18
19 name match found
20 name match found
21 name match found
22 name match found
23 name match found
24 name match found
25 name match found
26 name match found
27
28 name match found
29 name match found
30 name match found
31 name match found
32 name match found
33 name match found
34 name match found
35 name match found
36 name match found
37 name match found
38 name match found
39 name match found
40 name match found
41 name match found
42
43 name match found
44 name match found
45 name match found
46 name match found
47 name match found
48 name match found
49 name match found
50 name match found
51 name match found
52 name match found
53 name match foun

425 name match found
426 name match found
427 name match found
428 name match found
429 name match found
430 name match found
431 name match found
432 name match found
433
434
435 name match found
436 name match found
437 name match found
438 name match found
439 name match found
440 name match found
441 name match found
442 name match found
443 name match found
444 name match found
445 name match found
446 name match found
447 name match found
448 name match found
449 name match found
450 name match found
451
452 name match found
453 name match found
454 name match found
455 name match found
456 name match found
457 name match found
458 name match found
459 name match found
460 name match found
461 name match found
462 name match found
463 name match found
464 name match found
465 name match found
466 name match found
467 name match found
468 name match found
469 name match found
470 name match found
471
472 name match found
473 name match found
474 name match found
475 name match fou

858 name match found
859 name match found
860 name match found
861 name match found
862 name match found
863 name match found
864
865 name match found
866 name match found
867 name match found
868 name match found
869 name match found
870 name match found
871 name match found
872 name match found
873 name match found
874 name match found
875 name match found
876 name match found
877 name match found
878 name match found
879 name match found
880 name match found
881
882 name match found
883 name match found
884 name match found
885 name match found
886 name match found
887 name match found
888 name match found
889 name match found
890 name match found
891 name match found
892 name match found
893 name match found
894 name match found
895
896 name match found
897 name match found
898 name match found
899 name match found
900 name match found
901 name match found
902 name match found
903 name match found
904 name match found
905 name match found
906 name match found
907 name match found
9

1270 name match found
1271 name match found
1272 name match found
1273 name match found
1274 name match found
1275 name match found
1276 name match found
1277 name match found
1278 name match found
1279 name match found
1280 name match found
1281 name match found
1282 name match found
1283 name match found
1284 name match found
1285 name match found
1286 name match found
1287 name match found
1288 name match found
1289 name match found
1290
1291 name match found
1292 name match found
1293 name match found
1294 name match found
1295 name match found
1296 name match found
1297 name match found
1298 name match found
1299 name match found
1300 name match found
1301
1302 name match found
1303 name match found
1304 name match found
1305 name match found
1306 name match found
1307 name match found
1308 name match found
1309 name match found
1310 name match found
1311 name match found
1312 name match found
1313 name match found
1314 name match found
1315 name match found
1316 name match found


1678 name match found
1679 name match found
1680 name match found
1681 name match found
1682 name match found
1683 name match found
1684 name match found
1685 name match found
1686 name match found
1687 name match found
1688 name match found
1689 name match found
1690 name match found
1691 name match found
1692 name match found
1693 name match found
1694 name match found
1695 name match found
1696 name match found
1697 name match found
1698 name match found
1699 name match found
1700 name match found
1701 name match found
1702 name match found
1703 name match found
1704 name match found
1705 name match found
1706 name match found
1707 name match found
1708 name match found
1709 name match found
1710 name match found
1711 name match found
1712 name match found
1713 name match found
1714 name match found
1715 name match found
1716 name match found
1717 name match found
1718 name match found
1719 name match found
1720 name match found
1721 name match found
1722 name match found
1723 name 

2067 name match found
2068 name match found
2069 name match found
2070 name match found
2071 name match found
2072 name match found
2073 name match found
2074 name match found
2075 name match found
2076 name match found
2077 name match found
2078
2079 name match found
2080 name match found
2081 name match found
2082 name match found
2083 name match found
2084 name match found
2085 name match found
2086 name match found
2087 name match found
2088 name match found
2089
2090 name match found
2091 name match found
2092 name match found
2093 name match found
2094 name match found
2095 name match found
2096 name match found
2097 name match found
2098 name match found
2099 name match found
2100 name match found
2101 name match found
2102 name match found
2103
2104 name match found
2105 name match found
2106 name match found
2107 name match found
2108 name match found
2109 name match found
2110 name match found
2111 name match found
2112
2113
2114 name match found
2115 name match found
2116 na

2461 name match found
2462 name match found
2463 name match found
2464 name match found
2465 name match found
2466 name match found
2467 name match found
2468 name match found
2469 name match found
2470 name match found
2471 name match found
2472 name match found
2473 name match found
2474 name match found
2475 name match found
2476 name match found
2477 name match found
2478 name match found
2479 name match found
2480 name match found
2481 name match found
2482 name match found
2483 name match found
2484 name match found
2485 name match found
2486 name match found
2487 name match found
2488 name match found
2489 name match found
2490 name match found
2491 name match found
2492 name match found
2493 name match found
2494 name match found
2495
2496 name match found
2497
2498
2499 name match found
2500 name match found
2501 name match found
2502 name match found
2503 name match found
2504 name match found
2505 name match found
2506 name match found
2507 name match found
2508 name match f

2867 name match found
2868 name match found
2869 name match found
2870 name match found
2871 name match found
2872 name match found
2873 name match found
2874 name match found
2875 name match found
2876
2877 name match found
2878 name match found
2879 name match found
2880 name match found
2881 name match found
2882 name match found
2883 name match found
2884 name match found
2885 name match found
2886 name match found
2887 name match found
2888 name match found
2889 name match found
2890 name match found
2891 name match found
2892
2893 name match found
2894 name match found
2895 name match found
2896 name match found
2897
2898 name match found
2899 name match found
2900 name match found
2901 name match found
2902 name match found
2903 name match found
2904 name match found
2905 name match found
2906 name match found
2907 name match found
2908 name match found
2909 name match found
2910 name match found
2911 name match found
2912 name match found
2913 name match found
2914 name match f

3253 name match found
3254 name match found
3255 name match found
3256 name match found
3257 name match found
3258 name match found
3259 name match found
3260 name match found
3261 name match found
3262 name match found
3263 name match found
3264 name match found
3265 name match found
3266 name match found
3267 name match found
3268 name match found
3269 name match found
3270 name match found
3271 name match found
3272 name match found
3273 name match found
3274 name match found
3275 name match found
3276 name match found
3277 name match found
3278
3279 name match found
3280 name match found
3281 name match found
3282 name match found
3283 name match found
3284 name match found
3285 name match found
3286 name match found
3287 name match found
3288 name match found
3289 name match found
3290 name match found
3291 name match found
3292
3293 name match found
3294 name match found
3295 name match found
3296 name match found
3297 name match found
3298 name match found
3299 name match found


3652 name match found
3653 name match found
3654 name match found
3655 name match found
3656 name match found
3657 name match found
3658
3659 name match found
3660 name match found
3661 name match found
3662 name match found
3663 name match found
3664 name match found
3665 name match found
3666 name match found
3667 name match found
3668 name match found
3669 name match found
3670 name match found
3671 name match found
3672 name match found
3673 name match found
3674 name match found
3675 name match found
3676 name match found
3677 name match found
3678 name match found
3679 name match found
3680 name match found
3681
3682 name match found
3683 name match found
3684 name match found
3685 name match found
3686 name match found
3687 name match found
3688 name match found
3689 name match found
3690 name match found
3691 name match found
3692 name match found
3693 name match found
3694 name match found
3695 name match found
3696 name match found
3697 name match found
3698 name match found


4040 name match found
4041 name match found
4042 name match found
4043 name match found
4044 name match found
4045 name match found
4046 name match found
4047 name match found
4048 name match found
4049 name match found
4050 name match found
4051 name match found
4052 name match found
4053 name match found
4054 name match found
4055 name match found
4056 name match found
4057 name match found
4058 name match found
4059 name match found
4060 name match found
4061 name match found
4062 name match found
4063 name match found
4064 name match found
4065 name match found
4066 name match found
4067 name match found
4068 name match found
4069 name match found
4070 name match found
4071 name match found
4072 name match found
4073 name match found
4074 name match found
4075 name match found
4076 name match found
4077 name match found
4078 name match found
4079 name match found
4080 name match found
4081
4082 name match found
4083
4084 name match found
4085 name match found
4086 name match found


4425 name match found
4426 name match found
4427 name match found
4428 name match found
4429 name match found
4430 name match found
4431 name match found
4432 name match found
4433 name match found
4434
4435 name match found
4436 name match found
4437 name match found
4438 name match found
4439 name match found
4440 name match found
4441
4442 name match found
4443 name match found
4444 name match found
4445 name match found
4446 name match found
4447 name match found
4448 name match found
4449 name match found
4450 name match found
4451 name match found
4452 name match found
4453 name match found
4454 name match found
4455 name match found
4456 name match found
4457 name match found
4458 name match found
4459 name match found
4460 name match found
4461 name match found
4462 name match found
4463 name match found
4464 name match found
4465 name match found
4466 name match found
4467 name match found
4468 name match found
4469 name match found
4470 name match found
4471 name match found


4830 name match found
4831 name match found
4832 name match found
4833 name match found
4834 name match found
4835 name match found
4836 name match found
4837 name match found
4838 name match found
4839 name match found
4840 name match found
4841 name match found
4842 name match found
4843 name match found
4844 name match found
4845 name match found
4846 name match found
4847 name match found
4848 name match found
4849 name match found
4850 name match found
4851 name match found
4852 name match found
4853 name match found
4854 name match found
4855 name match found
4856 name match found
4857 name match found
4858 name match found
4859 name match found
4860
4861 name match found
4862 name match found
4863 name match found
4864 name match found
4865 name match found
4866 name match found
4867 name match found
4868 name match found
4869
4870 name match found
4871 name match found
4872 name match found
4873 name match found
4874 name match found
4875 name match found
4876 name match found


5259 name match found
5260 name match found
5261 name match found
5262 name match found
5263 name match found
5264 name match found
5265
5266 name match found
5267
5268 name match found
5269
5270 name match found
5271 name match found
5272 name match found
5273 name match found
5274 name match found
5275 name match found
5276 name match found
5277 name match found
5278 name match found
5279 name match found
5280
5281
5282
5283
5284 name match found
5285 name match found
5286 name match found
5287 name match found
5288 name match found
5289 name match found
5290 name match found
5291
5292
5293 name match found
5294
5295 name match found
5296
5297 name match found
5298 name match found
5299 name match found
5300 name match found
5301 name match found
5302
5303 name match found
5304 name match found
5305 name match found
5306 name match found
5307 name match found
5308
5309 name match found
5310 name match found
5311 name match found
5312 name match found
5313 name match found
5314
5315 n

5739 name match found
5740 name match found
5741 name match found
5742 name match found
5743 name match found
5744 name match found
5745 name match found
5746 name match found
5747 name match found
5748 name match found
5749 name match found
5750 name match found
5751 name match found
5752 name match found
5753 name match found
5754 name match found
5755 name match found
5756 name match found
5757 name match found
5758 name match found
5759 name match found
5760 name match found
5761 name match found
5762 name match found
5763 name match found
5764 name match found
5765 name match found
5766 name match found
5767 name match found
5768 name match found
5769 name match found
5770 name match found
5771 name match found
5772 name match found
5773 name match found
5774 name match found
5775 name match found
5776 name match found
5777 name match found
5778 name match found
5779 name match found
5780 name match found
5781 name match found
5782 name match found
5783 name match found
5784 name 

## Merge URL with charter data 

In [112]:
#merge based on combined name and address

charters_merge_CER = pd.merge(charters_nonduplicate_with_address, CER, how = 'left', left_on = ['address_name_combined'], right_on = ['address_name_combined']) 



In [113]:
CER_columns = list(CER.columns) #list of all columns labels in CER
CER_columns.remove('CER_URL') #remove CER_URL from the list 

In [114]:
charters_merge_CER = charters_merge_CER.drop(labels = CER_columns, axis = 1) #drop all columns in CER except CER_URL 

In [115]:
len(charters_merge_CER)

8461

## Drop NAN, empty, and 0 values 

In [118]:
charters_merge_CER_cleaned = charters_merge_CER[charters_merge_CER['CER_URL'] != ''][charters_merge_CER['CER_URL'] != '0'].dropna(subset = ['CER_URL', 'SCHNAM12'])

3425

In [126]:
non_null_charter = charters_nonduplicate_with_address.dropna(subset = ['SCHNAM12'])

6175

## Check URL matches

In [127]:
#number of rows in charter data 
len(charters_nonduplicate_with_address)

8456

In [128]:
#number of rows in merged charter + URL data 
len(charters_merge_CER['NCESSCH']) #resulting merge has 6 more rows than original charters_nonduplicate

8461

In [130]:
#number of non-NAN, empty, or 0 value URLs + school name in merged data  
len(charters_merge_CER_cleaned)

3425

In [129]:
#number of non-NAN, empty, or 0 value URLs + school name in charter data 
len(non_null_charter)

6175

In [131]:
#number of unmatched schools 
len(non_null_charter) - len(charters_merge_CER_cleaned)

2750

In [134]:
non_null_charter['SCHNAM12']

1                                      ayaprun elitnaurvik
2                                 ketchikan charter school
3        tongass school of arts and sciences charter sc...
4                                  aquarian charter school
5                        family partnership charter school
8                                       winterberry school
9                             eagle academy charter school
10                                 frontier charter school
11                       highland tech high charter school
12                             rilke schule charter school
13                   alaska native cultural charter school
15                         juneau community charter school
16                          aurora borealis charter school
17                                        fireweed academy
18                      soldotna montessori charter school
19                  kaleidoscope school of arts & sciences
20                                  academy charter scho

In [135]:
charters_merge_CER_cleaned['SCHNAM12']

0                                     ayaprun elitnaurvik
1                                ketchikan charter school
2       tongass school of arts and sciences charter sc...
3                                 aquarian charter school
4                       family partnership charter school
5                                      winterberry school
7                                 frontier charter school
8                       highland tech high charter school
9                             rilke schule charter school
10                  alaska native cultural charter school
11                        juneau community charter school
12                         aurora borealis charter school
13                                       fireweed academy
15                 kaleidoscope school of arts & sciences
16                                 academy charter school
17                    midnight sun family learning center
18                               american charter academy
19            

In [133]:
index=1
for schname in non_null_charter['SCHNAM12']: 
    if schname not in charters_merge_CER_cleaned['SCHNAM12']:
        print(schname + ' ' + str(index)) 
        index = index +1

ayaprun elitnaurvik 1
ketchikan charter school 2
tongass school of arts and sciences charter school 3
aquarian charter school 4
family partnership charter school 5
winterberry school 6
eagle academy charter school 7
frontier charter school 8
highland tech high charter school 9
rilke schule charter school 10
alaska native cultural charter school 11
juneau community charter school 12
aurora borealis charter school 13
fireweed academy 14
soldotna montessori charter school 15
kaleidoscope school of arts & sciences 16
academy charter school 17
midnight sun family learning center 18
american charter academy 19
twindly bridge charter school 20
fronteras charter school 21
birchtree charter school 22
anvil city science academy 23
chinook montessori charter school 24
star of the north secondary school 25
effie kokrine charter school 26
watershed charter school 27
horseshoe trails elementary school 28
center for academic success  the #1 29
center for academic success  the #2 30
center for academi

woodland polytechnic academy 1688
cox academy 1689
lazear charter academy 1690
fame public charter 1691
envision academy for arts & tech 1692
aspire california college preparatory academy 1693
community school for creative education 1694
yu ming charter 1695
urban montessori charter 1696
urban corps of san diego county charter 1697
california heritage youthbuild academy 1698
orange county conservation corps charter 1699
castle rock 1700
klamath river early college of the redwoods 1701
uncharted shores academy 1702
soledad enrichment action charter high 1703
odyssey charter 1704
jardin de la infancia 1705
opportunities unlimited charter high 1706
academia avance charter 1707
los angeles international charter high 1708
wisdom academy for young scientists 1709
environmental charter middle 1710
celerity sirius charter 1711
westchester secondary charter 1712
youth & family academy charter 1713
frontier charter academy 1714
community leadership academy 1715
colorado springs charter academy 1

walden green montessori 3188
west mi academy of arts and academics 3189
black river public school 3190
vanderbilt charter academy 3191
saginaw county transition academy 3192
st. clair county learning academy 3193
honey creek community school 3194
casa richard academy 3195
aisha shule/web dubois prep. academy school 3196
plymouth educational center 3197
plymouth educational center preparatory high schoo 3198
plymouth educational center - 9th grade 3199
nataki talibah schoolhouse of detroit 3200
michigan technical academy middle school 3201
michigan technical academy elementary 3202
detroit west preparatory academy 3203
martin luther king jr. education center academy 3204
gaudior academy 3205
woodward academy 3206
eaton academy 3207
summit academy 3208
cesar chavez academy elementary 3209
cesar chavez middle school 3210
cesar chavez high school 3211
cesar chavez academy intermediate 3212
commonwealth community development academy 3213
midland academy of advanced and creative studies 3214

dove science academy es (okc) 4687
tulsa schl of arts/sciences 4688
lighthouse academies of tulsa 4689
four rivers community school 4690
southwest charter school 4691
the ivy school 4692
bennett pearson academy charter school 4693
city view charter school 4694
ione community charter school 4695
mosier community school 4696
mosier middle school 4697
ballston community school 4698
annex charter school 4699
arlington community charter school 4700
baker web academy 4701
baker early college 4702
burnt river school 4703
arco iris spanish immersion school 4704
hope chinese charter school 4705
homesource family charter 4706
triangle lake charter school 4707
silvies river charter school 4708
butte falls charter school 4709
camas valley school 4710
north columbia academy 4711
corbett charter school 4712
muddy creek charter school 4713
cove charter school 4714
resource link charter school 4715
powell butte community charter school 4716
insight school of oregon-charter option 4717
luckiamute valle

In [None]:
def common_data(list1, list2): 
    result = False
    for x in list1: 
        for y in list2: 
            if x = y: 
                result = True
                return result

In [57]:
#number of non-NAN URL matches
non_null_vals = charters_merge_CER[charters_merge_CER['CER_URL'] != ''][charters_merge_CER['SCHNAM12'] != 'nan']
#non_null_vals = non_null_vals[non_null_vals['SCHNAM12'] ]
len(non_null_vals)

SyntaxError: invalid syntax (<ipython-input-57-d9ae1121239b>, line 2)

In [56]:
non_null_vals['SCHNAM12']

KeyError: 'SCHNAM12'

In [39]:
#number of unmatched schools 
len(non_null_charter) - len(non_null_vals)

926

# Why Unmatched URLS?
Unmatchated URLS(466) + Matched URLS(5707) = Total With Dropped NaN (6173) URLS. <br>
Question: Why could we not find match for 466 URLS? Possible reasons:
- Duplicate URLS? Perhaps a school may have multiple locations so those are all linked to the same website so it shows up multiple times in dataset?

In [26]:
charters_merge_CER[['SCHNAM12', 'CER_URL']].dropna()

Unnamed: 0,SCHNAM12,CER_URL
1,ayaprun elitnaurvik,http://www.yupik.org/#
2,ketchikan charter school,http://kcs.kgbsd.org/#
3,tongass school of arts and sciences charter sc...,http://www.tongassschool.org/#
4,aquarian charter school,http://www.aquariancharterschool.com/#
5,family partnership charter school,http://www.fpcs.net/#
8,winterberry school,http://www.winterberryschool.org/#
9,eagle academy charter school,http://www.eagleacademycharterschool.com/#
10,frontier charter school,http://www.frontierk12.org/#
11,highland tech high charter school,http://www.highlandtech.org/#
13,alaska native cultural charter school,http://www.asdk12.org/schools/anccs/pages/#


## Explore unmatched URLs

In [42]:
#list of unmatched URLs 
index=1
for url in list(non_null_CER_URL): 
    if url not in list(non_null_vals):
        print(url + ' ' + str(index)) 
        index = index +1

http://www.the100schools.com/# 1
http://www.aplusarts.com/# 2
http://www.columbus.k12.oh.us/career_education/prep.htm# 3
https://apex.diplomaplus.net/# 4
http://www.sccresa.org/ata# 5
http://ace.wheatlandhigh.org/# 6
http://www.a3school.org/# 7
http://www.academyofbioscience.org/# 8
http://www.actcharter.org/# 9
http://www.theacademyofhope.org/# 10
http://www.academyofmoorecounty.com/# 11
http://www.ci.amesbury.ma.us/# 12
http://www.ade.state.az.us/edd/NewDetails.asp?EntityID=79986&RefTypeID=1035&SchoolType=CS# 13
http://accelcyber.org/Home.html# 14
http://www.altaacademy.org/# 15
http://acclaimcharter.com/# 16
http://www.acecharterschool.org/# 17
http://www.acehs.org/# 18
http://www.acehs.org/# 19
http://www.achievementfirst.org/schools/new-york-schools/achievement-first-brooklyn-high-school/about/# 20
http://www.achievemenfirst.org/# 21
http://www.achievementcharter.com/# 22
http://actacademy.org/# 23
http://www.labcharter.org/about/index.html# 24
https://adamstraditionalacademy.org/

http://www.kipptulsa.org/# 325
http://kootenaibridgeacademy.org/education/school/school.php?sectionid=43# 326
http://www.ladylibertyacademycs.org/# 327
http://www.lhsd.k12.wi.us/HCA/student_services.shtml# 328
http://www.pucschools.org/lakeviewcharterhighschool/# 329
http://www.hallco.org/eschool/main/index.asp# 330
http://lanier.ebrschools.org/# 331
http://www.charteracademies.com/academy-of-lathrup-village/index.htm# 332
http://www.uncommonschools.org/lpb/home/# 333
http://www.learncharter.org/campus_learn3.aspx# 334
http://www.lena.k12.wi.us/# 335
http://www.bcoe.org/# 336
http://www.bcoe.org/hearthstone# 337
http://lcba.choiceed.org/# 338
http://www.leffertsgardens.org/# 339
http://www.legacyk12.org/# 340
http://www.legalprep.org/index.php# 341
http://www.lemondeimmersion.org/# 342
http://www.clayton.k12.ga.us/facilities/otherdistrictfacilities/lewisschool.asp# 343
http://www.libertyhigh.us/# 344
http://www.lifeskillscenters.com/about_school.htm# 345
http://lighthouse.charter.k12.m

In [101]:
charters_nonduplicate_with_address['SCHNAM12'][charters_nonduplicate_with_address['SCHNAM12'].str.contains('academy of arts and academics')]

5513    west mi academy of arts and academics
8516            academy of arts and academics
Name: SCHNAM12, dtype: object

In [104]:
charters_nonduplicate_with_address['ADDRESS13'][charters_nonduplicate_with_address['ADDRESS13'].str.contains('springfield or')]

8516    615 main st springfield or 97477
Name: ADDRESS13, dtype: object

In [105]:
charters_merge_CER[charters_merge_CER['SCHNAM12'] == 'academy of arts and academics']

Unnamed: 0,"% Civilian Population 16 to 19 Years: High School Graduate, or Enrolled (in School)","% Civilian Population 16 to 19 Years: Not High School Graduate, Not Enrolled (Dropped Out)",% Civilian Population in Labor Force 16 Years and Over: Employed,% Civilian Population in Labor Force 16 Years and Over: Unemployed,"% Employed Civilian Population 16 Years and Over: Agriculture, Forestry, Fishing and Hunting, and Mining","% Employed Civilian Population 16 Years and Over: Arts, Entertainment, and Recreation, and Accommodation and Food Services",% Employed Civilian Population 16 Years and Over: Construction,"% Employed Civilian Population 16 Years and Over: Educational Services, and Health Care and Social Assistance","% Employed Civilian Population 16 Years and Over: Finance and Insurance, and Real Estate and Rental and Leasing",% Employed Civilian Population 16 Years and Over: Information,...,close_rate15,close_rate16,close_rate99,ess_strength_x,ess_strength_y,index,prog_strength_x,prog_strength_y,ADDRESS13,CER_URL
8522,91.44,8.56,91.18,8.82,2.38,10.13,5.63,24.83,5.13,1.59,...,0.0,0.0,0.0,0.160776,0.160776,10272.0,0.189255,0.189255,615 main st springfield or 97477,http://www.a3school.org/Academy_of_Arts_and_Ac...


In [106]:
CER['CER_URL'][CER['CER_URL'].str.contains('http://www.a3school.org')]

69    http://www.a3school.org/Academy_of_Arts_and_Ac...
70                            http://www.a3school.org/#
Name: CER_URL, dtype: object

In [107]:
CER[CER['CER_URL'] == 'http://www.a3school.org/#']

Unnamed: 0,CER_NAME,CER_ADDRESS,CER_URL,CER_DESCRIPTION,CER_OPENED,CER_MEMBER,CER_GRADES,CER_GRADE_CATEGORIES,address_name_combined
70,academy of arts and academics (a3),615 main st springfield or 97477,http://www.a3school.org/#,Magnet school conversion with project based le...,2012,0,9-12,"High,",academy of arts and academics (a3) 615 main st...


In [113]:
CER.iloc[69:71]

Unnamed: 0,CER_NAME,CER_ADDRESS,CER_URL,CER_DESCRIPTION,CER_OPENED,CER_MEMBER,CER_GRADES,CER_GRADE_CATEGORIES,address_name_combined
69,academy of arts and academics,615 main street springfield or 97477,http://www.a3school.org/Academy_of_Arts_and_Ac...,,2010,112,9-12,"High,",academy of arts and academics 615 main street ...
70,academy of arts and academics (a3),615 main st springfield or 97477,http://www.a3school.org/#,Magnet school conversion with project based le...,2012,0,9-12,"High,",academy of arts and academics (a3) 615 main st...


In [None]:
#charters_merge_CER.to_csv('../../nowdata/backups/charters_full_250_CER_URLs.pkl')