In [2]:
import pandas as pd
import numpy as np
import pickle
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

## Create address column 

In [3]:
#load address data 
address_data = pd.read_csv('../data/sc122a.txt', delimiter = '\t', header = 0, encoding='latin-1')

# Filter to only open schools OPEN in 2012-13 school year (so they probably have a working URL)
filtered_STATUS12 = (address_data['STATUS'] == 1) | (address_data['STATUS'] == 3) | (address_data['STATUS'] == 4) | (address_data['STATUS'] == 5) | (address_data['STATUS'] == 8)

address_data = address_data.loc[filtered_STATUS12].reset_index()
print(address_data.shape)
address_data.head()

  interactivity=interactivity, compiler=compiler, result=result)


(100413, 310)


Unnamed: 0,index,SURVYEAR,NCESSCH,FIPST,LEAID,SCHNO,STID,SEASCH,LEANM,SCHNAM,...,WHITE,WHALM,WHALF,PACIFIC,HPALM,HPALF,TR,TRALM,TRALF,TOTETH
0,0,2012,10000200277,1,100002,277,210,20,ALABAMA YOUTH SERVICES,SEQUOYAH SCH - CHALKVILLE CAMPUS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1,1,2012,10000201402,1,100002,1402,210,25,ALABAMA YOUTH SERVICES,EUFAULA SCH - EUFAULA CAMPUS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,2,2012,10000201667,1,100002,1667,210,50,ALABAMA YOUTH SERVICES,CAMPS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,3,2012,10000201670,1,100002,1670,210,60,ALABAMA YOUTH SERVICES,DET CTR,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,4,2012,10000201705,1,100002,1705,210,30,ALABAMA YOUTH SERVICES,WALLACE SCH - MT MEIGS CAMPUS,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


In [4]:
street = address_data['LSTREE']
city = address_data['LCITY']
zipcode = list(map(str, address_data['LZIP'])) #convert all int in LZIP to strings so they can be appended 
state = address_data['LSTATE']

In [5]:
def make_address(street, city, state, zipcode): 
    '''Returns a new list of complete addresses with combined street, city, state, and zipcode 
    '''
    addresses = [] #empty list to fill with full addresses
    for i in np.arange(len(street)): 
        full_address = street[i].lower() + ' ' + city[i].lower() + ' ' + state[i].lower() + ' ' + zipcode[i].lower() #for every street in list of streets, append city, state, and zip to create full address
        addresses.append(full_address) #append full address into empty list of addresses
    return addresses 

In [6]:
full_address = make_address(street, city, state, zipcode) #list of full addresses 

In [29]:
address_data['ADDRESS13'] = full_address #add column of full addresses

In [30]:
#select only NCESSCH and ADRESS13 columns because we're joining on NCESSCH and we only want to add the ADDRESS13 values to charters_2015.pkl
address_NCESSCH_data = address_data[['NCESSCH', 'ADDRESS13']] 
len(address_NCESSCH_data)

100413

## Match formatting of addresses and names in CER dataframe to formatting of new address data 

In [57]:
CER = pd.read_csv('../data/CER_2012-13.csv', encoding = 'latin-1') #open CER file 

In [58]:
#loop through every address, removing commas and periods, and converting all letters to lowercase
for i in np.arange(len(CER['CER_ADDRESS'])):
    CER.loc[i,'CER_ADDRESS'] = CER.loc[i,'CER_ADDRESS'].replace(',', '').replace('.','').lower() 


In [59]:
#uniform formatting of name by making all characters lowercase and removing all punctuation
CER['CER_NAME'] = [str(name).lower() for name in CER['CER_NAME']]

In [63]:
CER = CER[CER['CER_URL'] != ''][CER['CER_URL'] != '0'].dropna(subset = ['CER_URL'])
len(CER)

6171

## Add address column to charter dataframe

In [23]:
#load charter data 
charters = pd.read_pickle('../../nowdata/charters_2015.pkl')

In [37]:
#merge addresses to charters based on NCESSCH
charters_with_address = pd.merge(charters, address_NCESSCH_data, how = 'left', on = 'NCESSCH') 
len(charters_with_address)

10965

In [38]:
#match formatting for name and address 
charters_with_address['SCHNAM12'] = [str(name).lower() for name in charters_with_address['SCHNAM12']]

charters_with_address['ADDRESS13'] = [str(address) for address in charters_with_address['ADDRESS13']]

## Create new column of combined name+address to use as comparison with fuzzy ratio

In [39]:
#combine name and address for charters 
chartername = charters_with_address['SCHNAM12']
charteraddress = charters_with_address['ADDRESS13']
charters_with_address['address_name_combined'] = [name+' '+address for name,address in zip(chartername, charteraddress)]

                                                               


In [65]:
#combine name and address for CER 
CERname = CER['CER_NAME']
CERaddress = CER['CER_ADDRESS']
CER['address_name_combined'] = [name+' '+address for name,address in zip(CERname, CERaddress)]

## Using fuzzy ratio 

1. List fuzzy ratio between name+address columns in both data frames 
2. Keep the highest fuzzy ratio 
3. If fuzzy ratio is over 70, change the column name to its match 

In [40]:

index = 0
for combined_charters in charters_with_address['address_name_combined']:
    ratio = [fuzz.ratio(combined_charters, combinedCER) for combinedCER in CER['address_name_combined']]
    greatest_match_index = np.asarray(ratio).argmax()
    greatest_match_name = ratio[greatest_match_index]
    if greatest_match_name >= 70:
        print(str(index) + ' ' + 'name match found')
        charters_nonduplicate_with_address.loc[index, 'address_name_combined'] = CER.loc[greatest_match_index, 'address_name_combined']
    else:
        print(str(index))
    index = index + 1
        

0
1 name match found
2 name match found
3 name match found
4 name match found
5 name match found
6
7
8 name match found
9 name match found
10 name match found
11 name match found
12
13 name match found
14
15 name match found
16 name match found
17 name match found
18 name match found
19 name match found
20 name match found
21 name match found
22
23 name match found
24
25 name match found
26 name match found
27
28 name match found
29 name match found
30
31 name match found
32 name match found
33 name match found
34
35
36 name match found
37 name match found
38 name match found
39 name match found
40 name match found
41 name match found
42 name match found
43
44
45
46 name match found
47
48 name match found
49
50
51
52 name match found
53 name match found
54 name match found
55 name match found
56
57 name match found
58
59
60
61
62
63
64
65 name match found
66 name match found
67 name match found
68
69
70
71
72
73
74
75
76
77 name match found
78 name match found
79
80
81
82
83
84
85
86
8

668 name match found
669 name match found
670 name match found
671 name match found
672
673
674
675 name match found
676
677
678
679
680 name match found
681 name match found
682 name match found
683 name match found
684 name match found
685 name match found
686
687
688
689 name match found
690 name match found
691
692
693 name match found
694
695 name match found
696 name match found
697 name match found
698
699 name match found
700
701 name match found
702
703
704
705
706 name match found
707 name match found
708 name match found
709 name match found
710 name match found
711 name match found
712 name match found
713 name match found
714
715 name match found
716 name match found
717 name match found
718 name match found
719 name match found
720
721 name match found
722
723 name match found
724 name match found
725 name match found
726
727
728 name match found
729
730 name match found
731 name match found
732 name match found
733
734 name match found
735 name match found
736 name match

1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325 name match found
1326
1327
1328
1329 name match found
1330
1331 name match found
1332 name match found
1333 name match found
1334 name match found
1335
1336
1337 name match found
1338
1339
1340
1341 name match found
1342 name match found
1343
1344
1345 name match found
1346
1347
1348
1349 name match found
1350
1351
1352 name match found
1353
1354 name match found
1355 name match found
1356 name match found
1357
1358 name match found
1359
1360 name match found
1361 name match found
1362
1363 name match found
1364
1365 name match found
1366 name match found
1367 name match found
1368 name match found
1369
1370 name match found
1371 name match found
1372 name match found
1373 name match found
1374 name match found
1375
1376
1377
1378
1379
1380
1381
1382
1383 name match found
1384 name match found
1385
1386
1387
1388 name match found
1389 name match found
1390
1391 name match found

1848 name match found
1849 name match found
1850 name match found
1851 name match found
1852 name match found
1853 name match found
1854 name match found
1855
1856 name match found
1857 name match found
1858
1859 name match found
1860
1861 name match found
1862 name match found
1863 name match found
1864 name match found
1865 name match found
1866
1867 name match found
1868
1869
1870
1871
1872 name match found
1873 name match found
1874
1875 name match found
1876
1877
1878 name match found
1879 name match found
1880
1881
1882 name match found
1883
1884 name match found
1885
1886
1887
1888 name match found
1889 name match found
1890 name match found
1891 name match found
1892 name match found
1893 name match found
1894 name match found
1895
1896
1897 name match found
1898 name match found
1899 name match found
1900 name match found
1901 name match found
1902
1903 name match found
1904
1905 name match found
1906 name match found
1907 name match found
1908 name match found
1909 name match

2406 name match found
2407
2408
2409 name match found
2410 name match found
2411 name match found
2412 name match found
2413 name match found
2414 name match found
2415
2416
2417
2418 name match found
2419 name match found
2420 name match found
2421 name match found
2422 name match found
2423 name match found
2424 name match found
2425
2426
2427
2428 name match found
2429 name match found
2430
2431 name match found
2432
2433
2434 name match found
2435 name match found
2436 name match found
2437
2438 name match found
2439 name match found
2440 name match found
2441 name match found
2442 name match found
2443
2444 name match found
2445
2446 name match found
2447 name match found
2448 name match found
2449 name match found
2450 name match found
2451 name match found
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462 name match found
2463
2464 name match found
2465 name match found
2466 name match found
2467 name match found
2468
2469
2470 name match found
2471 name match found
2472
24

2974
2975
2976 name match found
2977
2978 name match found
2979 name match found
2980
2981 name match found
2982
2983 name match found
2984 name match found
2985 name match found
2986 name match found
2987 name match found
2988 name match found
2989 name match found
2990 name match found
2991
2992
2993
2994 name match found
2995
2996 name match found
2997 name match found
2998 name match found
2999
3000 name match found
3001 name match found
3002 name match found
3003 name match found
3004 name match found
3005 name match found
3006 name match found
3007 name match found
3008 name match found
3009 name match found
3010 name match found
3011 name match found
3012 name match found
3013 name match found
3014 name match found
3015 name match found
3016 name match found
3017 name match found
3018 name match found
3019 name match found
3020 name match found
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042 name match found
3043 name

3534 name match found
3535 name match found
3536
3537
3538
3539 name match found
3540 name match found
3541
3542 name match found
3543 name match found
3544
3545 name match found
3546
3547
3548
3549 name match found
3550 name match found
3551 name match found
3552
3553
3554 name match found
3555 name match found
3556 name match found
3557
3558
3559 name match found
3560 name match found
3561 name match found
3562 name match found
3563 name match found
3564
3565 name match found
3566 name match found
3567 name match found
3568 name match found
3569 name match found
3570 name match found
3571 name match found
3572 name match found
3573
3574 name match found
3575
3576 name match found
3577 name match found
3578 name match found
3579 name match found
3580 name match found
3581 name match found
3582 name match found
3583 name match found
3584
3585
3586
3587 name match found
3588 name match found
3589 name match found
3590
3591
3592
3593
3594
3595
3596 name match found
3597
3598
3599 name ma

4147 name match found
4148
4149
4150
4151 name match found
4152 name match found
4153
4154 name match found
4155 name match found
4156 name match found
4157 name match found
4158 name match found
4159 name match found
4160 name match found
4161 name match found
4162
4163 name match found
4164 name match found
4165
4166 name match found
4167 name match found
4168
4169 name match found
4170
4171
4172
4173
4174
4175 name match found
4176 name match found
4177 name match found
4178
4179
4180 name match found
4181 name match found
4182
4183
4184 name match found
4185 name match found
4186
4187 name match found
4188 name match found
4189 name match found
4190 name match found
4191 name match found
4192
4193
4194 name match found
4195 name match found
4196 name match found
4197 name match found
4198 name match found
4199
4200 name match found
4201 name match found
4202 name match found
4203 name match found
4204 name match found
4205 name match found
4206
4207
4208
4209
4210
4211
4212
4213
42

4738 name match found
4739
4740
4741
4742
4743
4744
4745
4746 name match found
4747
4748 name match found
4749
4750
4751
4752 name match found
4753 name match found
4754 name match found
4755 name match found
4756 name match found
4757
4758 name match found
4759 name match found
4760 name match found
4761 name match found
4762 name match found
4763 name match found
4764
4765 name match found
4766 name match found
4767 name match found
4768 name match found
4769
4770
4771
4772
4773 name match found
4774 name match found
4775
4776 name match found
4777 name match found
4778 name match found
4779 name match found
4780 name match found
4781 name match found
4782 name match found
4783 name match found
4784 name match found
4785 name match found
4786 name match found
4787 name match found
4788 name match found
4789 name match found
4790 name match found
4791
4792 name match found
4793
4794
4795 name match found
4796 name match found
4797
4798 name match found
4799 name match found
4800
4801 

5375 name match found
5376 name match found
5377 name match found
5378 name match found
5379 name match found
5380 name match found
5381 name match found
5382
5383 name match found
5384
5385 name match found
5386 name match found
5387 name match found
5388
5389 name match found
5390 name match found
5391 name match found
5392 name match found
5393
5394 name match found
5395
5396 name match found
5397 name match found
5398
5399 name match found
5400 name match found
5401 name match found
5402
5403 name match found
5404 name match found
5405
5406 name match found
5407
5408 name match found
5409 name match found
5410 name match found
5411
5412 name match found
5413 name match found
5414
5415 name match found
5416 name match found
5417 name match found
5418 name match found
5419 name match found
5420 name match found
5421 name match found
5422 name match found
5423 name match found
5424 name match found
5425 name match found
5426 name match found
5427 name match found
5428 name match found

5876 name match found
5877 name match found
5878 name match found
5879 name match found
5880
5881 name match found
5882 name match found
5883 name match found
5884 name match found
5885 name match found
5886 name match found
5887 name match found
5888 name match found
5889 name match found
5890 name match found
5891 name match found
5892 name match found
5893
5894 name match found
5895 name match found
5896 name match found
5897
5898
5899 name match found
5900
5901 name match found
5902 name match found
5903 name match found
5904 name match found
5905 name match found
5906 name match found
5907 name match found
5908 name match found
5909 name match found
5910
5911
5912
5913 name match found
5914 name match found
5915 name match found
5916
5917
5918 name match found
5919
5920 name match found
5921 name match found
5922 name match found
5923 name match found
5924 name match found
5925 name match found
5926 name match found
5927
5928
5929
5930 name match found
5931 name match found
5932
5

6548
6549 name match found
6550 name match found
6551
6552
6553 name match found
6554
6555 name match found
6556 name match found
6557
6558
6559 name match found
6560 name match found
6561 name match found
6562 name match found
6563 name match found
6564
6565 name match found
6566
6567
6568
6569 name match found
6570 name match found
6571 name match found
6572 name match found
6573
6574
6575 name match found
6576 name match found
6577 name match found
6578 name match found
6579 name match found
6580
6581
6582 name match found
6583 name match found
6584
6585 name match found
6586 name match found
6587 name match found
6588 name match found
6589 name match found
6590 name match found
6591 name match found
6592 name match found
6593
6594 name match found
6595
6596 name match found
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606 name match found
6607
6608
6609 name match found
6610 name match found
6611
6612 name match found
6613
6614 name match found
6615 name match found
6616 name matc

7042 name match found
7043 name match found
7044 name match found
7045 name match found
7046 name match found
7047 name match found
7048
7049 name match found
7050 name match found
7051 name match found
7052 name match found
7053 name match found
7054 name match found
7055 name match found
7056 name match found
7057 name match found
7058 name match found
7059 name match found
7060 name match found
7061 name match found
7062 name match found
7063 name match found
7064 name match found
7065 name match found
7066 name match found
7067
7068 name match found
7069 name match found
7070 name match found
7071 name match found
7072 name match found
7073 name match found
7074 name match found
7075 name match found
7076 name match found
7077 name match found
7078 name match found
7079 name match found
7080 name match found
7081 name match found
7082 name match found
7083 name match found
7084 name match found
7085 name match found
7086 name match found
7087 name match found
7088 name match found


7687
7688
7689 name match found
7690
7691
7692
7693
7694
7695 name match found
7696
7697 name match found
7698
7699
7700 name match found
7701
7702 name match found
7703 name match found
7704 name match found
7705 name match found
7706 name match found
7707 name match found
7708
7709 name match found
7710
7711 name match found
7712 name match found
7713 name match found
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729 name match found
7730
7731
7732
7733
7734
7735 name match found
7736 name match found
7737 name match found
7738
7739 name match found
7740 name match found
7741
7742
7743
7744
7745
7746 name match found
7747
7748 name match found
7749
7750
7751
7752
7753
7754 name match found
7755
7756
7757
7758 name match found
7759 name match found
7760
7761
7762
7763
7764 name match found
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779 name match found
7780 name match found
7781 name match found
7782
7783 name match found
7784


8396 name match found
8397 name match found
8398
8399 name match found
8400
8401
8402 name match found
8403
8404
8405 name match found
8406 name match found
8407
8408
8409 name match found
8410 name match found
8411 name match found
8412
8413 name match found
8414
8415
8416 name match found
8417 name match found
8418 name match found
8419
8420 name match found
8421
8422 name match found
8423 name match found
8424 name match found
8425
8426 name match found
8427 name match found
8428 name match found
8429 name match found
8430 name match found
8431 name match found
8432 name match found
8433 name match found
8434
8435
8436
8437
8438
8439 name match found
8440 name match found
8441
8442
8443
8444
8445
8446
8447
8448 name match found
8449 name match found
8450 name match found
8451 name match found
8452 name match found
8453 name match found
8454 name match found
8455 name match found
8456 name match found
8457
8458 name match found
8459 name match found
8460 name match found
8461 name ma

8963 name match found
8964 name match found
8965 name match found
8966
8967 name match found
8968 name match found
8969 name match found
8970 name match found
8971 name match found
8972 name match found
8973
8974 name match found
8975
8976 name match found
8977 name match found
8978 name match found
8979 name match found
8980 name match found
8981 name match found
8982 name match found
8983 name match found
8984 name match found
8985 name match found
8986 name match found
8987 name match found
8988 name match found
8989 name match found
8990
8991
8992 name match found
8993 name match found
8994 name match found
8995 name match found
8996 name match found
8997 name match found
8998 name match found
8999
9000 name match found
9001 name match found
9002 name match found
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019 name match found
9020 name match found
9021
9022
9023 name match found
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033 name match found

9666
9667
9668
9669
9670
9671
9672
9673
9674
9675
9676 name match found
9677 name match found
9678 name match found
9679
9680 name match found
9681
9682 name match found
9683 name match found
9684 name match found
9685
9686
9687
9688 name match found
9689
9690
9691 name match found
9692 name match found
9693 name match found
9694 name match found
9695
9696
9697
9698 name match found
9699
9700 name match found
9701
9702
9703 name match found
9704 name match found
9705 name match found
9706
9707 name match found
9708 name match found
9709 name match found
9710 name match found
9711 name match found
9712
9713
9714 name match found
9715
9716 name match found
9717 name match found
9718 name match found
9719 name match found
9720
9721
9722 name match found
9723 name match found
9724
9725
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752 name match found
9753 name match found
9754 name match found
9755 name m

10302 name match found
10303 name match found
10304 name match found
10305 name match found
10306 name match found
10307 name match found
10308
10309 name match found
10310
10311 name match found
10312 name match found
10313 name match found
10314 name match found
10315 name match found
10316 name match found
10317
10318
10319
10320
10321
10322 name match found
10323
10324
10325
10326
10327
10328
10329
10330
10331
10332
10333
10334
10335
10336
10337
10338
10339
10340
10341
10342
10343
10344
10345
10346
10347
10348
10349
10350
10351
10352
10353
10354
10355
10356
10357
10358
10359
10360
10361
10362
10363 name match found
10364 name match found
10365
10366
10367
10368
10369
10370
10371
10372 name match found
10373
10374
10375
10376 name match found
10377
10378
10379
10380
10381
10382
10383
10384
10385
10386 name match found
10387
10388
10389
10390 name match found
10391 name match found
10392 name match found
10393
10394 name match found
10395 name match found
10396 name match found
10397

## Merge URL with charter data 

In [114]:
#merge based on combined name and address

charters_merge_CER = pd.merge(charters_with_address, CER, how = 'left', on = ['address_name_combined']) 



In [115]:
CER_columns = list(CER.columns) #list of all columns labels in CER
CER_columns.remove('CER_URL') #remove CER_URL from the list 

In [116]:
charters_merge_CER = charters_merge_CER.drop(labels = CER_columns, axis = 1) #drop all columns in CER except CER_URL 

In [117]:
len(charters_merge_CER)

10965

In [118]:
charters_merge_CER

Unnamed: 0,NCESSCH,URL,LAT1516,LON1516,AM,AS,BL,HI,HP,TR,...,PROG_VALID_STR,RIT_VALID_COUNT,RIT_VALID_STR,INQUIRY_RATIO,DISCIPLINE_RATIO,ESS_VALID_RATIO,PROG_VALID_RATIO,RIT_VALID_RATIO,ADDRESS13,CER_URL
0,1.001970e+10,http://www.maef.net/,,,,,,,,,...,-2.997944,51.0,-2.545622,0.007424,0.000446,0.000893,0.001005,0.002847,,
1,2.000010e+10,https://education.alaska.gov/DOE_Rolodex/Schoo...,60.796131,-161.765194,167.0,0.0,0.0,0.0,0.0,0.0,...,,0.0,,,,,,,1010 fourth ave bethel ak 99559,
2,2.001500e+10,https://www.kgbsd.org/ketchikancharter,55.347001,-131.641191,74.0,37.0,2.0,5.0,4.0,5.0,...,-3.123525,0.0,-6.000000,0.003762,0.000752,0.000000,0.000752,0.000000,410 schoenbar ketchikan ak 99901,
3,2.001500e+10,http://www.tongassschool.org/,55.347001,-131.641191,57.0,12.0,4.0,6.0,1.0,11.0,...,-3.752448,31.0,-2.960010,0.009768,0.000177,0.000212,0.000177,0.001096,410 schoenbar rd ketchikan ak 99901,
4,2.001800e+10,https://aquarian.asdk12.org/,61.192407,-149.916872,10.0,11.0,6.0,19.0,2.0,51.0,...,-6.000000,0.0,-6.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1705 west 32nd ave anchorage ak 99517,
5,2.001800e+10,https://education.alaska.gov/DOE_Rolodex/Schoo...,61.198100,-149.876000,30.0,16.0,18.0,43.0,18.0,69.0,...,,0.0,,,,,,,401 east fireweed ln anchorage ak 99503,
6,2.001800e+10,,,,,,,,,,...,,0.0,,,,,,,,
7,2.001800e+10,,,,,,,,,,...,,0.0,,,,,,,,
8,2.001800e+10,http://www.winterberrycharterschool.com/,61.194450,-149.791641,15.0,8.0,5.0,14.0,1.0,26.0,...,-3.190948,34.0,-2.659441,0.009728,0.000193,0.000322,0.000644,0.002191,4802 bryn mawr court anchorage ak 99508,
9,2.001800e+10,http://www.asdk12.org/aboutschools/eagleacademy/,61.319213,-149.579442,2.0,3.0,8.0,6.0,0.0,18.0,...,-6.000000,0.0,-6.000000,0.000000,0.000000,0.000000,0.000000,0.000000,10901 mausel st eagle river ak 99577,


## Investigating schools without a URL match

In [121]:
#Drop NaN values in CER_URL 
charters_merge_CER_cleaned = charters_merge_CER.dropna(subset = ['CER_URL'])

#number of non-NaN URL values in merged data
len(charters_merge_CER_cleaned)

502

In [122]:
#number of charter schools that were open in 2012-13
len(charters_with_address[charters_with_address['ADDRESS13'] != 'nan'])

6175

In [123]:
#number of rows without URL that represent schools that were open in 2012-13
charters_merge_CER_opened = charters_merge_CER[charters_merge_CER['CER_URL'].isnull() == True][charters_merge_CER['ADDRESS13'] != 'nan']
len(charters_merge_CER_opened)

  


5673

In [125]:
#list of schools from charter data that do not have a URL but have an address in address column 
charters_merge_CER_opened

Unnamed: 0,NCESSCH,URL,LAT1516,LON1516,AM,AS,BL,HI,HP,TR,...,PROG_VALID_STR,RIT_VALID_COUNT,RIT_VALID_STR,INQUIRY_RATIO,DISCIPLINE_RATIO,ESS_VALID_RATIO,PROG_VALID_RATIO,RIT_VALID_RATIO,ADDRESS13,CER_URL
1,2.000010e+10,https://education.alaska.gov/DOE_Rolodex/Schoo...,60.796131,-161.765194,167.0,0.0,0.0,0.0,0.0,0.0,...,,0.0,,,,,,,1010 fourth ave bethel ak 99559,
2,2.001500e+10,https://www.kgbsd.org/ketchikancharter,55.347001,-131.641191,74.0,37.0,2.0,5.0,4.0,5.0,...,-3.123525,0.0,-6.000000,0.003762,0.000752,0.000000,0.000752,0.000000,410 schoenbar ketchikan ak 99901,
3,2.001500e+10,http://www.tongassschool.org/,55.347001,-131.641191,57.0,12.0,4.0,6.0,1.0,11.0,...,-3.752448,31.0,-2.960010,0.009768,0.000177,0.000212,0.000177,0.001096,410 schoenbar rd ketchikan ak 99901,
4,2.001800e+10,https://aquarian.asdk12.org/,61.192407,-149.916872,10.0,11.0,6.0,19.0,2.0,51.0,...,-6.000000,0.0,-6.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1705 west 32nd ave anchorage ak 99517,
5,2.001800e+10,https://education.alaska.gov/DOE_Rolodex/Schoo...,61.198100,-149.876000,30.0,16.0,18.0,43.0,18.0,69.0,...,,0.0,,,,,,,401 east fireweed ln anchorage ak 99503,
8,2.001800e+10,http://www.winterberrycharterschool.com/,61.194450,-149.791641,15.0,8.0,5.0,14.0,1.0,26.0,...,-3.190948,34.0,-2.659441,0.009728,0.000193,0.000322,0.000644,0.002191,4802 bryn mawr court anchorage ak 99508,
9,2.001800e+10,http://www.asdk12.org/aboutschools/eagleacademy/,61.319213,-149.579442,2.0,3.0,8.0,6.0,0.0,18.0,...,-6.000000,0.0,-6.000000,0.000000,0.000000,0.000000,0.000000,0.000000,10901 mausel st eagle river ak 99577,
10,2.001800e+10,http://www.frontiercs.org/,61.194914,-149.891223,27.0,8.0,1.0,16.0,4.0,20.0,...,-3.590229,6.0,-3.113052,0.007451,0.000000,0.000128,0.000257,0.000771,400 west northern lights blvd anchorage ak 99503,
11,2.001800e+10,http://highlandacademy.asdk12.org/,61.193958,-149.775759,8.0,9.0,10.0,13.0,7.0,23.0,...,-6.000000,0.0,-6.000000,0.000000,0.000000,0.000000,0.000000,0.000000,5530 east northern lights anchorage ak 99504,
12,2.001800e+10,https://education.alaska.gov/DOE_Rolodex/Schoo...,61.173154,-149.895616,8.0,9.0,1.0,23.0,3.0,48.0,...,,0.0,,,,,,,650 west international airport anchorage ak 99507,


In [None]:
#charters_merge_CER.to_csv('../../nowdata/backups/charters_full_250_CER_URLs.pkl')