In [18]:
#Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
from scipy.stats import linregress
import numpy as np
import hvplot.pandas
import requests
import json
from pprint import pprint



#Upload from csv
data_set_path = "school_attendance.csv"
data_set_2_path = "all_students_df.csv"
zip_codes_path = "zip_codes.csv"
#population_set_path = "school_attendance.csv"

#Read data and results
data_set_2_df = pd.read_csv(data_set_2_path)
zip_codes_df = pd.read_csv(zip_codes_path, converters={"Zip Code":str})
data_set_df = pd.read_csv(data_set_path)

#population_df = pd.read_csv(population_set_path)
data_set_df.head()

Unnamed: 0,District code,District name,Category,Student group,2021-2022 student count - year to date,2021-2022 attendance rate - year to date,2020-2021 student count,2020-2021 attendance rate,2019-2020 student count,2019-2020 attendance rate,Reporting period,Date update
0,00000CT,Connecticut,,All Students,500285,0.9169,496092.0,0.9294,508346.0,0.9479,June 2022,07/22/2022
1,00000CT,Connecticut,Homelessness,Students Experiencing Homelessness,1814,0.8348,1735.0,0.8155,3916.0,0.8884,June 2022,07/22/2022
2,00000CT,Connecticut,Students With Disabilities,Students With Disabilities,78417,0.8899,76487.0,0.8946,80365.0,0.9277,June 2022,07/22/2022
3,00000CT,Connecticut,Free/Reduced Lunch,Free Meal Eligible,168984,0.8851,176225.0,0.8861,193706.0,0.9314,June 2022,07/22/2022
4,00000CT,Connecticut,Free/Reduced Lunch,Reduced Price Meal Eligible,29905,0.9184,30886.0,0.9299,27507.0,0.9518,June 2022,07/22/2022


In [19]:
#Attempt to clean out populations
all_students_df = data_set_df.loc[(data_set_df["Student group"]=="All Students")]
all_students_df = all_students_df[["District name", "2021-2022 student count - year to date"]]
all_students_df["District name"] = all_students_df["District name"].str.upper()
all_students_df

Unnamed: 0,District name,2021-2022 student count - year to date
0,CONNECTICUT,500285
13,ANDOVER SCHOOL DISTRICT,161
19,ANSONIA SCHOOL DISTRICT,2139
31,ASHFORD SCHOOL DISTRICT,343
40,AVON SCHOOL DISTRICT,3057
...,...,...
1972,UNIFIED SCHOOL DISTRICT #2,78
1977,CONNECTICUT TECHNICAL EDUCATION AND CAREER SYSTEM,11138
1990,NORWICH FREE ACADEMY DISTRICT,2068
2002,THE GILBERT SCHOOL DISTRICT,421


In [20]:
#Rename columns
zip_codes_df = zip_codes_df.rename(columns={"District Name": "District name"})
zip_codes_df = zip_codes_df[["District name", "Zip Code"]]
zip_codes_df.head()

Unnamed: 0,District name,Zip Code
0,ACHIEVEMENT FIRST HARTFORD AC,6112
1,AMISTAD ACADEMY DISTRICT,6513
2,ANDOVER SCHOOL DISTRICT,6232
3,ANSONIA SCHOOL DISTRICT,6401
4,AREA COOPERATIVE EDUCATIONAL,6473


In [21]:
 #Merge Files
merged_df = pd.merge(all_students_df, zip_codes_df, on="District name", how = "left")
#merged_3_df = merged_df[["District name", "Zip Code"]]
merged_df = merged_df.dropna()
merged_df

Unnamed: 0,District name,2021-2022 student count - year to date,Zip Code
1,ANDOVER SCHOOL DISTRICT,161,06232
2,ANSONIA SCHOOL DISTRICT,2139,06401
3,ASHFORD SCHOOL DISTRICT,343,06278
4,AVON SCHOOL DISTRICT,3057,06001
5,BARKHAMSTED SCHOOL DISTRICT,199,06063
...,...,...,...
179,THE BRIDGE ACADEMY DISTRICT,268,06608
181,EXPLORATIONS DISTRICT,87,06098
182,AMISTAD ACADEMY DISTRICT,1093,06513
195,UNIFIED SCHOOL DISTRICT #1,140,06109


In [22]:
# Import API key
from config3 import geoapify_key

In [23]:
#Set up base API URL
zip_code = "06473"
country = "United States"
url = f"https://api.geoapify.com/v1/geocode/search?text={zip_code}&format=json&apiKey={geoapify_key}"
geo_data = requests.get(url).json()

pprint(geo_data)


{'query': {'parsed': {'expected_type': 'unknown', 'postcode': '06473'},
           'text': '06473'},
 'results': [{'address_line1': 'North Haven',
              'address_line2': 'North Haven, CT 06473, United States of '
                               'America',
              'bbox': {'lat1': 41.221443578114,
                       'lat2': 41.541443578114,
                       'lon1': -73.019669765131,
                       'lon2': -72.699669765131},
              'city': 'North Haven',
              'country': 'United States',
              'country_code': 'us',
              'county': 'South Central Connecticut Planning Region',
              'datasource': {'attribution': '© OpenStreetMap contributors',
                             'license': 'Open Database License',
                             'sourcename': 'openstreetmap',
                             'url': 'https://www.openstreetmap.org/copyright'},
              'formatted': 'North Haven, CT 06473, United States of America',

In [24]:
zip_code = []
districts = []
for item in merged_df["Zip Code"]:
    zip_code.append(item)
for district in merged_df["2021-2022 student count - year to date"]:
    districts.append(district)
districts

[161,
 2139,
 343,
 3057,
 199,
 2613,
 391,
 3102,
 1734,
 745,
 159,
 2527,
 18482,
 7439,
 2545,
 804,
 77,
 429,
 1462,
 143,
 4119,
 214,
 1493,
 2141,
 75,
 413,
 105,
 1549,
 1873,
 11783,
 4675,
 206,
 1222,
 126,
 839,
 951,
 1713,
 6126,
 2660,
 2597,
 875,
 971,
 2548,
 4660,
 271,
 9229,
 4073,
 157,
 5631,
 1726,
 8471,
 1639,
 4560,
 3108,
 5392,
 63,
 16371,
 129,
 635,
 195,
 2245,
 886,
 2424,
 381,
 808,
 2414,
 6039,
 948,
 445,
 8476,
 4346,
 5204,
 3319,
 1970,
 4208,
 9299,
 4113,
 2096,
 425,
 18028,
 3850,
 3055,
 3601,
 4001,
 68,
 218,
 3130,
 12356,
 3176,
 1014,
 1242,
 1693,
 1860,
 2171,
 1245,
 352,
 1209,
 383,
 1063,
 789,
 4533,
 2483,
 385,
 282,
 86,
 2077,
 99,
 4404,
 247,
 4068,
 1319,
 6171,
 4714,
 248,
 1312,
 15986,
 314,
 1805,
 6724,
 2016,
 795,
 892,
 2200,
 3813,
 6703,
 44,
 3019,
 213,
 5133,
 17806,
 2287,
 2521,
 590,
 9018,
 5863,
 2204,
 5269,
 3578,
 376,
 3734,
 532,
 2960,
 3219,
 1428,
 2121,
 834,
 781,
 335,
 814,
 2132,
 830,

In [25]:
district_data = []
country_code = "us"

i=0
for code in zip_code:
    try:
        #zip_code_url = f"https://api.geoapify.com/v1/geocode/search?text={code}&format=json&apiKey={geoapify_key}"
        zip_code_url = f"https://api.geoapify.com/v1/geocode/search?text={code}%20{country_code}&format=json&apiKey={geoapify_key}"
        zip_code_requests = requests.get(zip_code_url).json()
        print(f"Zip Code URL: {zip_code_url}")
    #Find Lat and Long
        zip_lat = zip_code_requests['results'][0]['lat']
        zip_lng = zip_code_requests['results'][0]['lon']
        zip_code_1 = zip_code_requests["results"][0]["postcode"]
        
       
     # Append the City information into city_data list
        district_data.append({"Zip Code": zip_code_1, 
                          "Lat": zip_lat, 
                          "Lng": zip_lng, 
                            "Population": districts[i]/75 
                          })
    except:
        print(f"not working")
    i=i+1        

Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06232%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06401%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06278%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06001%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06063%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06037%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06524%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06801%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e


Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06040%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06268%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06447%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06450%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06457%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06460%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06468%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06370%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e


Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06498%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06107%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06516%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06883%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06880%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06109%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06279%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e
Zip Code URL: https://api.geoapify.com/v1/geocode/search?text=06897%20us&format=json&apiKey=c967a2b03b354238ac3aa60aa86eb97e


In [26]:
district_data_df = pd.DataFrame(district_data)
district_data_df

Unnamed: 0,Zip Code,Lat,Lng,Population
0,06232,41.732991,-72.375790,2.146667
1,06401,41.342445,-73.072524,28.520000
2,06278,41.890680,-72.171084,4.573333
3,06001,41.791284,-72.866740,40.760000
4,06063,41.918415,-72.977078,2.653333
...,...,...,...,...
165,06608,41.189517,-73.180982,3.573333
166,06098,41.927121,-73.078201,1.160000
167,06513,41.316445,-72.875431,14.573333
168,06109,41.700966,-72.676233,1.866667


In [27]:
map_plot_1 = district_data_df.hvplot.points(
    "Lng",
    "Lat",
    s="Population",
    geo = True,
    tiles = "OSM",
)

map_plot_1

*** Analysis: This map shows that there is a higher concentration of students around major cities, specifically Hartford, New Haven, and Bridgeport. The more rural districts have less students.***