# Introduction to Mongo

In [1]:
from pymongo import MongoClient
from pprint import pprint
import pandas as pd
# we completed this mongo import below before running this virtual database
# mongoimport --type csv -d PublicSchool2021 -c characteristics --headerline --drop Compiled_Public_School_Characteristics.csv

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# confirm that our new database was created
print(mongo.list_database_names())

['PublicSchool2021', 'admin', 'autosaurus', 'classDB', 'config', 'epa', 'fruits', 'local', 'met', 'petsitly_marketing', 'uk_food']


In [4]:
# assign the Public School database to a variable name
db = mongo['PublicSchool2021']

In [5]:
# review the collections in our new database
print(db.list_collection_names())

['characteristics']


In [6]:
# review a document in the characteristics collection
print(db.characteristics.find_one())

{'_id': ObjectId('644097c157d2a1f3d6c254c4'), '': 2, 'Location_ID': 'Alabama', 'School_Name': 'Albertville Intermediate School', 'Location_Abbreviation': 'AL', 'Location_Zip': 35950, 'Virtual': 'Not Virtual', 'Elementary(PK-G5)': 439.0, 'Middle(G6-G8)': 452.0, 'High(G9-G13)': 0.0, 'Total_Free/Reduced_Lunch': 330.0, 'Total_Students': 891.0, 'Total_Teachers': 41.0, 'Charter': 'No', 'Magnet': 'No', 'Latitude': 34.2733, 'Longitude': -86.2201}


In [7]:
# assign the collection to a variable
characteristics = db['characteristics']

In [8]:
#Not Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'count': { '$sum': 1 }}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
NotVirtual_results = list(characteristics.aggregate(pipeline))

In [9]:
#pprint Not Virtual results 
pprint(NotVirtual_results[0:10])

[{'_id': 'Rhode Island', 'count': 313},
 {'_id': 'Colorado', 'count': 1880},
 {'_id': 'Maine', 'count': 593},
 {'_id': 'Michigan', 'count': 1409},
 {'_id': 'Georgia', 'count': 2298},
 {'_id': 'Ohio', 'count': 3516},
 {'_id': 'U.S. Virgin Islands', 'count': 1},
 {'_id': 'Montana', 'count': 620},
 {'_id': 'Oklahoma', 'count': 1768},
 {'_id': 'Bureau of Indian Education', 'count': 174}]


In [10]:
# Convert mongo result to Pandas DataFrame
NotVirtual_df = pd.DataFrame(NotVirtual_results)

print("Rows in DataFrame: ", len(NotVirtual_df))
NotVirtual_df.head()

Rows in DataFrame:  43


Unnamed: 0,_id,count
0,Rhode Island,313
1,Colorado,1880
2,Maine,593
3,Michigan,1409
4,Georgia,2298


In [11]:
#Not Virtual Elementary
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [12]:
#pprint Not Virtual Elementary
pprint(PK_NotVirtual_results[0:10])

[{'_id': 'Rhode Island', 'sum': 61151.0},
 {'_id': 'Colorado', 'sum': 391766.0},
 {'_id': 'Maine', 'sum': 76587.0},
 {'_id': 'Michigan', 'sum': 241963.0},
 {'_id': 'Georgia', 'sum': 779028.0},
 {'_id': 'Ohio', 'sum': 736426.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 0.0},
 {'_id': 'Montana', 'sum': 66098.0},
 {'_id': 'Oklahoma', 'sum': 308527.0},
 {'_id': 'Bureau of Indian Education', 'sum': 17480.0}]


In [13]:
#Not Virtual Middle School
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [14]:
#pprint Not Virtual Middle School
pprint(MS_NotVirtual_results[0:10])

[{'_id': 'Colorado', 'sum': 199237.0},
 {'_id': 'Maine', 'sum': 39561.0},
 {'_id': 'Puerto Rico', 'sum': 64711.0},
 {'_id': 'Georgia', 'sum': 407506.0},
 {'_id': 'Michigan', 'sum': 122703.0},
 {'_id': 'Ohio', 'sum': 380750.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 0.0},
 {'_id': 'Montana', 'sum': 30720.0},
 {'_id': 'Oklahoma', 'sum': 143024.0},
 {'_id': 'Wyoming', 'sum': 22495.0}]


In [15]:
#Not Virtual High School
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [16]:
#pprint Not Virtual High School
pprint(HS_NotVirtual_results[0:10])

[{'_id': 'Rhode Island', 'sum': 55103.0},
 {'_id': 'Colorado', 'sum': 326150.0},
 {'_id': 'Maine', 'sum': 63034.0},
 {'_id': 'Michigan', 'sum': 197934.0},
 {'_id': 'Georgia', 'sum': 659627.0},
 {'_id': 'Ohio', 'sum': 614757.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 0.0},
 {'_id': 'Montana', 'sum': 19694.0},
 {'_id': 'Oklahoma', 'sum': 219785.0},
 {'_id': 'Bureau of Indian Education', 'sum': 11068.0}]


In [17]:
#Not Virtual Total Students
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [18]:
#pprint Not Virtual Total Students
pprint(TS_NotVirtual_results[0:10])

[{'_id': 'Colorado', 'sum': 851991.0},
 {'_id': 'Maine', 'sum': 166507.0},
 {'_id': 'Georgia', 'sum': 1709547.0},
 {'_id': 'Ohio', 'sum': 1604831.0},
 {'_id': 'Michigan', 'sum': 525194.0},
 {'_id': 'Montana', 'sum': 112464.0},
 {'_id': 'Oklahoma', 'sum': 627075.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 0.0},
 {'_id': 'Wyoming', 'sum': 92772.0},
 {'_id': 'Bureau of Indian Education', 'sum': 34724.0}]


In [19]:
#Full Virtual Elementary
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [20]:
#pprint Full Virtual Elementary
pprint(PK_FullVirtual_results[0:10])

[{'_id': 'South Carolina', 'sum': 5854.0},
 {'_id': 'Massachusetts', 'sum': 741.0},
 {'_id': 'Colorado', 'sum': 5986.0},
 {'_id': 'Maine', 'sum': 0.0},
 {'_id': 'Michigan', 'sum': 7041.0},
 {'_id': 'Georgia', 'sum': 6940.0},
 {'_id': 'Ohio', 'sum': 14590.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 4802.0},
 {'_id': 'Oklahoma', 'sum': 19304.0},
 {'_id': 'New Hampshire', 'sum': 1003.0}]


In [21]:
#Full Virtual Middle School
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [22]:
#pprint Full Virtual Middle School
pprint(MS_FullVirtual_results[0:10])

[{'_id': 'Colorado', 'sum': 5328.0},
 {'_id': 'Maine', 'sum': 252.0},
 {'_id': 'Massachusetts', 'sum': 964.0},
 {'_id': 'Georgia', 'sum': 5924.0},
 {'_id': 'Ohio', 'sum': 9245.0},
 {'_id': 'Michigan', 'sum': 5058.0},
 {'_id': 'Oklahoma', 'sum': 9557.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 2735.0},
 {'_id': 'North Carolina', 'sum': 6682.0},
 {'_id': 'Alaska', 'sum': 275.0}]


In [23]:
#Full Virtual High School
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [24]:
# pprint Full Virtual High School
pprint(HS_FullVirtual_results[0:10])

[{'_id': 'Colorado', 'sum': 10392.0},
 {'_id': 'Maine', 'sum': 825.0},
 {'_id': 'Massachusetts', 'sum': 2221.0},
 {'_id': 'Georgia', 'sum': 9666.0},
 {'_id': 'Michigan', 'sum': 17312.0},
 {'_id': 'Ohio', 'sum': 18892.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 4360.0},
 {'_id': 'Oklahoma', 'sum': 18307.0},
 {'_id': 'North Carolina', 'sum': 4853.0},
 {'_id': 'Alaska', 'sum': 409.0}]


In [25]:
#Full Virtual Total Students
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [26]:
#pprint Full Virtual Total Students
pprint(TS_FullVirtual_results[0:10])

[{'_id': 'South Carolina', 'sum': 16950.0},
 {'_id': 'Massachusetts', 'sum': 3457.0},
 {'_id': 'Colorado', 'sum': 19734.0},
 {'_id': 'Maine', 'sum': 888.0},
 {'_id': 'Michigan', 'sum': 26013.0},
 {'_id': 'Georgia', 'sum': 20417.0},
 {'_id': 'Ohio', 'sum': 39074.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 10993.0},
 {'_id': 'Oklahoma', 'sum': 43324.0},
 {'_id': 'Alaska', 'sum': 1142.0}]


In [27]:
#Supplemental Virtual Elementary
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [28]:
#pprint Supplemental Virtual Elementary
pprint(PK_SupplementalVirtual_results[0:10])

[{'_id': 'Mississippi', 'sum': 0.0},
 {'_id': 'Maryland', 'sum': 407318.0},
 {'_id': 'Connecticut', 'sum': 221937.0},
 {'_id': 'Colorado', 'sum': 640.0},
 {'_id': 'Massachusetts', 'sum': 407430.0},
 {'_id': 'Michigan', 'sum': 357726.0},
 {'_id': 'Oregon', 'sum': 32384.0},
 {'_id': 'South Carolina', 'sum': 346727.0},
 {'_id': 'Vermont', 'sum': 38613.0},
 {'_id': 'New York', 'sum': 1153061.0}]


In [29]:
#Supplemental Virtual Middle School
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [30]:
#pprint Supplemental Virtual Middle School
pprint(MS_SupplementalVirtual_results[0:10])

[{'_id': 'Maryland', 'sum': 207151.0},
 {'_id': 'Massachusetts', 'sum': 212396.0},
 {'_id': 'Colorado', 'sum': 1429.0},
 {'_id': 'Connecticut', 'sum': 116410.0},
 {'_id': 'Michigan', 'sum': 192204.0},
 {'_id': 'Mississippi', 'sum': 373.0},
 {'_id': 'Oregon', 'sum': 43977.0},
 {'_id': 'Vermont', 'sum': 17203.0},
 {'_id': 'New York', 'sum': 590956.0},
 {'_id': 'Guam', 'sum': 6262.0}]


In [31]:
#Supplemental Virtual High School
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [32]:
#pprint Supplemental Virtual High School
pprint(HS_SupplementalVirtual_results[0:10])

[{'_id': 'Maryland', 'sum': 337397.0},
 {'_id': 'Massachusetts', 'sum': 359062.0},
 {'_id': 'Colorado', 'sum': 10297.0},
 {'_id': 'Connecticut', 'sum': 199117.0},
 {'_id': 'Michigan', 'sum': 349386.0},
 {'_id': 'Mississippi', 'sum': 4737.0},
 {'_id': 'Oregon', 'sum': 156774.0},
 {'_id': 'Vermont', 'sum': 27450.0},
 {'_id': 'New York', 'sum': 1000861.0},
 {'_id': 'Guam', 'sum': 11512.0}]


In [33]:
#Supplemental Virtual Total Students
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [34]:
#pprint Supplemental Virtual Total Students
pprint(TS_SupplementalVirtual_results[0:10])

[{'_id': 'Connecticut', 'sum': 497411.0},
 {'_id': 'Colorado', 'sum': 10779.0},
 {'_id': 'Mississippi', 'sum': 4230.0},
 {'_id': 'Maryland', 'sum': 882457.0},
 {'_id': 'Massachusetts', 'sum': 908008.0},
 {'_id': 'Oregon', 'sum': 201684.0},
 {'_id': 'Tennessee', 'sum': 128565.0},
 {'_id': 'Michigan', 'sum': 830616.0},
 {'_id': 'Montana', 'sum': 32541.0},
 {'_id': 'Vermont', 'sum': 77661.0}]


In [35]:
#Virtual with face to face options Elementary
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [36]:
#pprint Virtual with face to face options Elementary
pprint(PK_VirtualFFOptions_results[0:10])

[{'_id': 'Colorado', 'sum': 0.0},
 {'_id': 'Maryland', 'sum': 70.0},
 {'_id': 'Oregon', 'sum': 5724.0},
 {'_id': 'Michigan', 'sum': 6100.0},
 {'_id': 'Oklahoma', 'sum': 11967.0},
 {'_id': 'Hawaii', 'sum': 286.0},
 {'_id': 'North Carolina', 'sum': 5951.0},
 {'_id': 'Alaska', 'sum': 6265.0},
 {'_id': 'New Hampshire', 'sum': 1407.0},
 {'_id': 'Virginia', 'sum': 66004.0}]


In [37]:
#Virtual with face to face options Middle School
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [38]:
#pprint Virtual with face to face options Middle School
pprint(MS_VirtualFFOptions_results[0:10])

[{'_id': 'North Carolina', 'sum': 2882.0},
 {'_id': 'Oklahoma', 'sum': 5167.0},
 {'_id': 'Michigan', 'sum': 3834.0},
 {'_id': 'Maryland', 'sum': 0.0},
 {'_id': 'Oregon', 'sum': 3162.0},
 {'_id': 'Colorado', 'sum': 0.0},
 {'_id': 'Tennessee', 'sum': 0.0},
 {'_id': 'Rhode Island', 'sum': 0.0},
 {'_id': 'Wisconsin', 'sum': 522.0},
 {'_id': 'Minnesota', 'sum': 84.0}]


In [39]:
#Virtual with face to face options High School
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [40]:
#pprint Virtual with face to face options High School
pprint(HS_VirtualFFOptions_results[0:10])

[{'_id': 'Maryland', 'sum': 0.0},
 {'_id': 'Oregon', 'sum': 6240.0},
 {'_id': 'Colorado', 'sum': 376.0},
 {'_id': 'Rhode Island', 'sum': 279.0},
 {'_id': 'Michigan', 'sum': 13127.0},
 {'_id': 'Oklahoma', 'sum': 8294.0},
 {'_id': 'Alaska', 'sum': 4663.0},
 {'_id': 'New Hampshire', 'sum': 1028.0},
 {'_id': 'Hawaii', 'sum': 161.0},
 {'_id': 'North Carolina', 'sum': 4527.0}]


In [41]:
#Virtual with face to face options Total Students
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [42]:
#pprint Virtual with face to face options Total Students
pprint(TS_VirtualFFOptions_results[0:10])

[{'_id': 'Tennessee', 'sum': 249.0},
 {'_id': 'Colorado', 'sum': 331.0},
 {'_id': 'Oregon', 'sum': 13931.0},
 {'_id': 'Maryland', 'sum': 70.0},
 {'_id': 'Michigan', 'sum': 20459.0},
 {'_id': 'Oklahoma', 'sum': 23714.0},
 {'_id': 'North Carolina', 'sum': 12395.0},
 {'_id': 'Hawaii', 'sum': 527.0},
 {'_id': 'New Hampshire', 'sum': 2976.0},
 {'_id': 'Alaska', 'sum': 13099.0}]


In [43]:
###HighSchool - Not Virtual
#Create data frame
HS_NotVirtual_results_df = pd.DataFrame(HS_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(HS_NotVirtual_results_df))
HS_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,Rhode Island,55103.0
1,Colorado,326150.0
2,Maine,63034.0
3,Michigan,197934.0
4,Georgia,659627.0
5,Ohio,614757.0
6,U.S. Virgin Islands,0.0
7,Montana,19694.0
8,Oklahoma,219785.0
9,Bureau of Indian Education,11068.0


In [44]:
###MiddleSchool - Not Virtual
#Create data frame
MS_NotVirtual_results_df = pd.DataFrame(MS_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(MS_NotVirtual_results_df))
MS_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,Colorado,199237.0
1,Maine,39561.0
2,Puerto Rico,64711.0
3,Georgia,407506.0
4,Michigan,122703.0
5,Ohio,380750.0
6,U.S. Virgin Islands,0.0
7,Montana,30720.0
8,Oklahoma,143024.0
9,Wyoming,22495.0


In [45]:
###Elementary - Not Virtual
#Create data frame
PK_NotVirtual_results_df = pd.DataFrame(PK_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(PK_NotVirtual_results_df))
PK_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,Rhode Island,61151.0
1,Colorado,391766.0
2,Maine,76587.0
3,Michigan,241963.0
4,Georgia,779028.0
5,Ohio,736426.0
6,U.S. Virgin Islands,0.0
7,Montana,66098.0
8,Oklahoma,308527.0
9,Bureau of Indian Education,17480.0


In [46]:
###TotalStudents - Not Virtual
#Create data frame
TS_NotVirtual_results_df = pd.DataFrame(TS_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(TS_NotVirtual_results_df))
TS_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,Colorado,851991.0
1,Maine,166507.0
2,Georgia,1709547.0
3,Ohio,1604831.0
4,Michigan,525194.0
5,Montana,112464.0
6,Oklahoma,627075.0
7,U.S. Virgin Islands,0.0
8,Wyoming,92772.0
9,Bureau of Indian Education,34724.0


In [47]:
###HighSchool - Supplemental Virtual
#Create data frame
HS_SupplementalVirtual_results_df = pd.DataFrame(HS_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(HS_SupplementalVirtual_results))
HS_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Maryland,337397.0
1,Massachusetts,359062.0
2,Colorado,10297.0
3,Connecticut,199117.0
4,Michigan,349386.0
5,Mississippi,4737.0
6,Oregon,156774.0
7,Vermont,27450.0
8,New York,1000861.0
9,Guam,11512.0


In [48]:
###Middle School - Supplemental Virtual
#Create data frame
MS_SupplementalVirtual_results_df = pd.DataFrame(MS_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(MS_SupplementalVirtual_results))
MS_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Maryland,207151.0
1,Massachusetts,212396.0
2,Colorado,1429.0
3,Connecticut,116410.0
4,Michigan,192204.0
5,Mississippi,373.0
6,Oregon,43977.0
7,Vermont,17203.0
8,New York,590956.0
9,Guam,6262.0


In [49]:
###Elementary - Supplemental Virtual
#Create data frame
PK_SupplementalVirtual_results_df = pd.DataFrame(PK_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(PK_SupplementalVirtual_results))
PK_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Mississippi,0.0
1,Maryland,407318.0
2,Connecticut,221937.0
3,Colorado,640.0
4,Massachusetts,407430.0
5,Michigan,357726.0
6,Oregon,32384.0
7,South Carolina,346727.0
8,Vermont,38613.0
9,New York,1153061.0


In [50]:
###TotalStudents - Supplemental Virtual
#Create data frame
TS_SupplementalVirtual_results_df = pd.DataFrame(TS_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(TS_SupplementalVirtual_results))
TS_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Connecticut,497411.0
1,Colorado,10779.0
2,Mississippi,4230.0
3,Maryland,882457.0
4,Massachusetts,908008.0
5,Oregon,201684.0
6,Tennessee,128565.0
7,Michigan,830616.0
8,Montana,32541.0
9,Vermont,77661.0


In [51]:
###High School - Full Virtual
#Create data frame
HS_FullVirtual_results_df = pd.DataFrame(HS_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(HS_FullVirtual_results))
HS_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,Colorado,10392.0
1,Maine,825.0
2,Massachusetts,2221.0
3,Georgia,9666.0
4,Michigan,17312.0
5,Ohio,18892.0
6,U.S. Virgin Islands,4360.0
7,Oklahoma,18307.0
8,North Carolina,4853.0
9,Alaska,409.0


In [52]:
###Middle School - Full Virtual
#Create data frame
MS_FullVirtual_results_df = pd.DataFrame(MS_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(MS_FullVirtual_results))
MS_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,Colorado,5328.0
1,Maine,252.0
2,Massachusetts,964.0
3,Georgia,5924.0
4,Ohio,9245.0
5,Michigan,5058.0
6,Oklahoma,9557.0
7,U.S. Virgin Islands,2735.0
8,North Carolina,6682.0
9,Alaska,275.0


In [53]:
###Elementary - Full Virtual
#Create data frame
PK_FullVirtual_results_df = pd.DataFrame(PK_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(PK_FullVirtual_results))
PK_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,South Carolina,5854.0
1,Massachusetts,741.0
2,Colorado,5986.0
3,Maine,0.0
4,Michigan,7041.0
5,Georgia,6940.0
6,Ohio,14590.0
7,U.S. Virgin Islands,4802.0
8,Oklahoma,19304.0
9,New Hampshire,1003.0


In [54]:
###TotalStudents - Full Virtual
#Create data frame
TS_FullVirtual_results_df = pd.DataFrame(TS_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(TS_FullVirtual_results))
TS_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,South Carolina,16950.0
1,Massachusetts,3457.0
2,Colorado,19734.0
3,Maine,888.0
4,Michigan,26013.0
5,Georgia,20417.0
6,Ohio,39074.0
7,U.S. Virgin Islands,10993.0
8,Oklahoma,43324.0
9,Alaska,1142.0


In [55]:
###High School - Virtual FacetoFaceOptions
#Create data frame
HS_VirtualFFOptions_results_df = pd.DataFrame(HS_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(HS_VirtualFFOptions_results))
HS_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,Maryland,0.0
1,Oregon,6240.0
2,Colorado,376.0
3,Rhode Island,279.0
4,Michigan,13127.0
5,Oklahoma,8294.0
6,Alaska,4663.0
7,New Hampshire,1028.0
8,Hawaii,161.0
9,North Carolina,4527.0


In [56]:
###Middle School - Virtual FacetoFaceOptions
#Create data frame
MS_VirtualFFOptions_results_df = pd.DataFrame(MS_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(MS_VirtualFFOptions_results))
MS_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,North Carolina,2882.0
1,Oklahoma,5167.0
2,Michigan,3834.0
3,Maryland,0.0
4,Oregon,3162.0
5,Colorado,0.0
6,Tennessee,0.0
7,Rhode Island,0.0
8,Wisconsin,522.0
9,Minnesota,84.0


In [57]:
###Elementary - Virtual FacetoFaceOptions
#Create data frame
PK_VirtualFFOptions_results_df = pd.DataFrame(PK_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(PK_VirtualFFOptions_results))
PK_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,Colorado,0.0
1,Maryland,70.0
2,Oregon,5724.0
3,Michigan,6100.0
4,Oklahoma,11967.0
5,Hawaii,286.0
6,North Carolina,5951.0
7,Alaska,6265.0
8,New Hampshire,1407.0
9,Virginia,66004.0


In [58]:
###TotalStudents - Virtual FacetoFaceOptions
#Create data frame
TS_VirtualFFOptions_results_df = pd.DataFrame(TS_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(TS_VirtualFFOptions_results))
TS_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,Tennessee,249.0
1,Colorado,331.0
2,Oregon,13931.0
3,Maryland,70.0
4,Michigan,20459.0
5,Oklahoma,23714.0
6,North Carolina,12395.0
7,Hawaii,527.0
8,New Hampshire,2976.0
9,Alaska,13099.0


In [59]:
##Rename High School NotVirtual Column
HS_NotVirtual_results_df = HS_NotVirtual_results_df.rename(columns={"sum": "HS_NotVirtual_Sum"})
HS_NotVirtual_results_df.head()

Unnamed: 0,_id,HS_NotVirtual_Sum
0,Rhode Island,55103.0
1,Colorado,326150.0
2,Maine,63034.0
3,Michigan,197934.0
4,Georgia,659627.0


In [60]:
##Rename Middle School NotVirtual Column
MS_NotVirtual_results_df = MS_NotVirtual_results_df.rename(columns={"sum": "MS_NotVirtual_Sum"})
MS_NotVirtual_results_df.head()

Unnamed: 0,_id,MS_NotVirtual_Sum
0,Colorado,199237.0
1,Maine,39561.0
2,Puerto Rico,64711.0
3,Georgia,407506.0
4,Michigan,122703.0


In [61]:
##Rename Elementary NotVirtual Column
PK_NotVirtual_results_df = PK_NotVirtual_results_df.rename(columns={"sum": "PK_NotVirtual_Sum"})
PK_NotVirtual_results_df.head()

Unnamed: 0,_id,PK_NotVirtual_Sum
0,Rhode Island,61151.0
1,Colorado,391766.0
2,Maine,76587.0
3,Michigan,241963.0
4,Georgia,779028.0


In [62]:
##Rename Total Students NotVirtual Column
TS_NotVirtual_results_df = TS_NotVirtual_results_df.rename(columns={"sum": "TS_NotVirtual_Sum"})
TS_NotVirtual_results_df.head()

Unnamed: 0,_id,TS_NotVirtual_Sum
0,Colorado,851991.0
1,Maine,166507.0
2,Georgia,1709547.0
3,Ohio,1604831.0
4,Michigan,525194.0


In [63]:
##Rename High School SuppVirtual Column
HS_SupplementalVirtual_results_df = HS_SupplementalVirtual_results_df.rename(columns={"sum": "HS_SupplementalVirtual_Sum"})
HS_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,HS_SupplementalVirtual_Sum
0,Maryland,337397.0
1,Massachusetts,359062.0
2,Colorado,10297.0
3,Connecticut,199117.0
4,Michigan,349386.0


In [64]:
##Rename Middle School SuppVirtual Column
MS_SupplementalVirtual_results_df = MS_SupplementalVirtual_results_df.rename(columns={"sum": "MS_SupplementalVirtual_Sum"})
MS_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,MS_SupplementalVirtual_Sum
0,Maryland,207151.0
1,Massachusetts,212396.0
2,Colorado,1429.0
3,Connecticut,116410.0
4,Michigan,192204.0


In [65]:
##Rename Elementary SuppVirtual Column
PK_SupplementalVirtual_results_df = PK_SupplementalVirtual_results_df.rename(columns={"sum": "PK_SupplementalVirtual_Sum"})
PK_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,PK_SupplementalVirtual_Sum
0,Mississippi,0.0
1,Maryland,407318.0
2,Connecticut,221937.0
3,Colorado,640.0
4,Massachusetts,407430.0


In [66]:
##Rename Total School SuppVirtual Column
TS_SupplementalVirtual_results_df = TS_SupplementalVirtual_results_df.rename(columns={"sum": "TS_SupplementalVirtual_Sum"})
TS_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,TS_SupplementalVirtual_Sum
0,Connecticut,497411.0
1,Colorado,10779.0
2,Mississippi,4230.0
3,Maryland,882457.0
4,Massachusetts,908008.0


In [67]:
##Rename High School FacetoFaceOptions Column
HS_VirtualFFOptions_results_df = HS_VirtualFFOptions_results_df.rename(columns={'sum': 'HS_VirtualFFOptions_Sum'})
HS_VirtualFFOptions_results_df

Unnamed: 0,_id,HS_VirtualFFOptions_Sum
0,Maryland,0.0
1,Oregon,6240.0
2,Colorado,376.0
3,Rhode Island,279.0
4,Michigan,13127.0
5,Oklahoma,8294.0
6,Alaska,4663.0
7,New Hampshire,1028.0
8,Hawaii,161.0
9,North Carolina,4527.0


In [68]:
##Rename Middle School FacetoFaceOptions Column
MS_VirtualFFOptions_results_df = MS_VirtualFFOptions_results_df.rename(columns={'sum': 'MS_VirtualFFOptions_Sum'})
MS_VirtualFFOptions_results_df

Unnamed: 0,_id,MS_VirtualFFOptions_Sum
0,North Carolina,2882.0
1,Oklahoma,5167.0
2,Michigan,3834.0
3,Maryland,0.0
4,Oregon,3162.0
5,Colorado,0.0
6,Tennessee,0.0
7,Rhode Island,0.0
8,Wisconsin,522.0
9,Minnesota,84.0


In [69]:
##Rename Elementary FacetoFaceOptions Column
PK_VirtualFFOptions_results_df = PK_VirtualFFOptions_results_df.rename(columns={'sum': 'PK_VirtualFFOptions_Sum'})
PK_VirtualFFOptions_results_df

Unnamed: 0,_id,PK_VirtualFFOptions_Sum
0,Colorado,0.0
1,Maryland,70.0
2,Oregon,5724.0
3,Michigan,6100.0
4,Oklahoma,11967.0
5,Hawaii,286.0
6,North Carolina,5951.0
7,Alaska,6265.0
8,New Hampshire,1407.0
9,Virginia,66004.0


In [70]:
##Rename Total Students FacetoFaceOptions Column
TS_VirtualFFOptions_results_df = TS_VirtualFFOptions_results_df.rename(columns={'sum': 'TS_VirtualFFOptions_Sum'})
TS_VirtualFFOptions_results_df

Unnamed: 0,_id,TS_VirtualFFOptions_Sum
0,Tennessee,249.0
1,Colorado,331.0
2,Oregon,13931.0
3,Maryland,70.0
4,Michigan,20459.0
5,Oklahoma,23714.0
6,North Carolina,12395.0
7,Hawaii,527.0
8,New Hampshire,2976.0
9,Alaska,13099.0


In [71]:
##Rename High School FullVirtual Column
HS_FullVirtual_results_df = HS_FullVirtual_results_df.rename(columns={"sum": "HS_FullVirtual_Sum"})
HS_FullVirtual_results_df

Unnamed: 0,_id,HS_FullVirtual_Sum
0,Colorado,10392.0
1,Maine,825.0
2,Massachusetts,2221.0
3,Georgia,9666.0
4,Michigan,17312.0
5,Ohio,18892.0
6,U.S. Virgin Islands,4360.0
7,Oklahoma,18307.0
8,North Carolina,4853.0
9,Alaska,409.0


In [72]:
##Rename Middle School FullVirtual Column
MS_FullVirtual_results_df = MS_FullVirtual_results_df.rename(columns={"sum": "MS_FullVirtual_Sum"})
MS_FullVirtual_results_df

Unnamed: 0,_id,MS_FullVirtual_Sum
0,Colorado,5328.0
1,Maine,252.0
2,Massachusetts,964.0
3,Georgia,5924.0
4,Ohio,9245.0
5,Michigan,5058.0
6,Oklahoma,9557.0
7,U.S. Virgin Islands,2735.0
8,North Carolina,6682.0
9,Alaska,275.0


In [73]:
##Rename Elementary FullVirtual Column
PK_FullVirtual_results_df = PK_FullVirtual_results_df.rename(columns={"sum": "PK_FullVirtual_Sum"})
PK_FullVirtual_results_df

Unnamed: 0,_id,PK_FullVirtual_Sum
0,South Carolina,5854.0
1,Massachusetts,741.0
2,Colorado,5986.0
3,Maine,0.0
4,Michigan,7041.0
5,Georgia,6940.0
6,Ohio,14590.0
7,U.S. Virgin Islands,4802.0
8,Oklahoma,19304.0
9,New Hampshire,1003.0


In [74]:
##Rename Total Students FullVirtual Column
TS_FullVirtual_results_df = TS_FullVirtual_results_df.rename(columns={"sum": "TS_FullVirtual_Sum"})
TS_FullVirtual_results_df

Unnamed: 0,_id,TS_FullVirtual_Sum
0,South Carolina,16950.0
1,Massachusetts,3457.0
2,Colorado,19734.0
3,Maine,888.0
4,Michigan,26013.0
5,Georgia,20417.0
6,Ohio,39074.0
7,U.S. Virgin Islands,10993.0
8,Oklahoma,43324.0
9,Alaska,1142.0


In [75]:
#merge all newly renamed dataframes to one completed dataframe
merge_df = pd.merge(TS_VirtualFFOptions_results_df, HS_VirtualFFOptions_results_df, on="_id", how="outer").merge(MS_VirtualFFOptions_results_df, on="_id", how="outer").merge(PK_VirtualFFOptions_results_df, on="_id", how="outer").merge(TS_FullVirtual_results_df, on="_id", how="outer").merge(HS_FullVirtual_results_df, on="_id", how="outer").merge(MS_FullVirtual_results_df, on="_id", how="outer").merge(PK_FullVirtual_results_df, on="_id", how="outer").merge(TS_SupplementalVirtual_results_df, on = "_id", how="outer").merge(HS_SupplementalVirtual_results_df, on = "_id", how="outer").merge(MS_SupplementalVirtual_results_df, on = "_id", how="outer").merge(PK_SupplementalVirtual_results_df, on = "_id", how="outer").merge(TS_NotVirtual_results_df, on = "_id", how="outer").merge(HS_NotVirtual_results_df, on = "_id", how="outer").merge(MS_NotVirtual_results_df, on = "_id", how="outer").merge(PK_NotVirtual_results_df, on = "_id", how="outer")
merge_df.head()

Unnamed: 0,_id,TS_VirtualFFOptions_Sum,HS_VirtualFFOptions_Sum,MS_VirtualFFOptions_Sum,PK_VirtualFFOptions_Sum,TS_FullVirtual_Sum,HS_FullVirtual_Sum,MS_FullVirtual_Sum,PK_FullVirtual_Sum,TS_SupplementalVirtual_Sum,HS_SupplementalVirtual_Sum,MS_SupplementalVirtual_Sum,PK_SupplementalVirtual_Sum,TS_NotVirtual_Sum,HS_NotVirtual_Sum,MS_NotVirtual_Sum,PK_NotVirtual_Sum
0,Tennessee,249.0,253.0,0.0,0.0,10580.0,4450.0,2893.0,4146.0,128565.0,132773.0,16458.0,6480.0,844672.0,231857.0,209166.0,450990.0
1,Colorado,331.0,376.0,0.0,0.0,19734.0,10392.0,5328.0,5986.0,10779.0,10297.0,1429.0,640.0,851991.0,326150.0,199237.0,391766.0
2,Oregon,13931.0,6240.0,3162.0,5724.0,13297.0,5707.0,3509.0,5199.0,201684.0,156774.0,43977.0,32384.0,318161.0,50747.0,83219.0,194533.0
3,Maryland,70.0,0.0,0.0,70.0,,,,,882457.0,337397.0,207151.0,407318.0,,,,
4,Michigan,20459.0,13127.0,3834.0,6100.0,26013.0,17312.0,5058.0,7041.0,830616.0,349386.0,192204.0,357726.0,525194.0,197934.0,122703.0,241963.0


In [76]:
#rename _id column to define it as States and Territories
merge_df = merge_df.rename(columns={"_id": "States_Territories"})
merge_df.head()

Unnamed: 0,States_Territories,TS_VirtualFFOptions_Sum,HS_VirtualFFOptions_Sum,MS_VirtualFFOptions_Sum,PK_VirtualFFOptions_Sum,TS_FullVirtual_Sum,HS_FullVirtual_Sum,MS_FullVirtual_Sum,PK_FullVirtual_Sum,TS_SupplementalVirtual_Sum,HS_SupplementalVirtual_Sum,MS_SupplementalVirtual_Sum,PK_SupplementalVirtual_Sum,TS_NotVirtual_Sum,HS_NotVirtual_Sum,MS_NotVirtual_Sum,PK_NotVirtual_Sum
0,Tennessee,249.0,253.0,0.0,0.0,10580.0,4450.0,2893.0,4146.0,128565.0,132773.0,16458.0,6480.0,844672.0,231857.0,209166.0,450990.0
1,Colorado,331.0,376.0,0.0,0.0,19734.0,10392.0,5328.0,5986.0,10779.0,10297.0,1429.0,640.0,851991.0,326150.0,199237.0,391766.0
2,Oregon,13931.0,6240.0,3162.0,5724.0,13297.0,5707.0,3509.0,5199.0,201684.0,156774.0,43977.0,32384.0,318161.0,50747.0,83219.0,194533.0
3,Maryland,70.0,0.0,0.0,70.0,,,,,882457.0,337397.0,207151.0,407318.0,,,,
4,Michigan,20459.0,13127.0,3834.0,6100.0,26013.0,17312.0,5058.0,7041.0,830616.0,349386.0,192204.0,357726.0,525194.0,197934.0,122703.0,241963.0


In [77]:
#alphabetically sort dataframe by States_Territories
merge_df = merge_df.sort_values(by="States_Territories")
merge_df.head()

Unnamed: 0,States_Territories,TS_VirtualFFOptions_Sum,HS_VirtualFFOptions_Sum,MS_VirtualFFOptions_Sum,PK_VirtualFFOptions_Sum,TS_FullVirtual_Sum,HS_FullVirtual_Sum,MS_FullVirtual_Sum,PK_FullVirtual_Sum,TS_SupplementalVirtual_Sum,HS_SupplementalVirtual_Sum,MS_SupplementalVirtual_Sum,PK_SupplementalVirtual_Sum,TS_NotVirtual_Sum,HS_NotVirtual_Sum,MS_NotVirtual_Sum,PK_NotVirtual_Sum
24,Alabama,,,,,15288.0,6943.0,4240.0,5559.0,,,,,717105.0,265355.0,169842.0,336204.0
9,Alaska,13099.0,4663.0,3087.0,6265.0,1142.0,409.0,275.0,544.0,95762.0,36181.0,22173.0,44610.0,19869.0,5930.0,4392.0,10755.0
34,Arizona,,,,,57032.0,29426.0,13515.0,19473.0,,,,,1052166.0,412284.0,253530.0,469326.0
22,Arkansas,,,,,7885.0,3731.0,2097.0,2895.0,248078.0,170674.0,97878.0,14288.0,230342.0,6544.0,15547.0,209415.0
47,Bureau of Indian Education,,,,,,,,,,,,,34724.0,11068.0,8534.0,17480.0


In [78]:
#reset index to sort them ascending with the states running alphabetically
merge_df = merge_df.reset_index().drop(columns = "index")
merge_df.head()

Unnamed: 0,States_Territories,TS_VirtualFFOptions_Sum,HS_VirtualFFOptions_Sum,MS_VirtualFFOptions_Sum,PK_VirtualFFOptions_Sum,TS_FullVirtual_Sum,HS_FullVirtual_Sum,MS_FullVirtual_Sum,PK_FullVirtual_Sum,TS_SupplementalVirtual_Sum,HS_SupplementalVirtual_Sum,MS_SupplementalVirtual_Sum,PK_SupplementalVirtual_Sum,TS_NotVirtual_Sum,HS_NotVirtual_Sum,MS_NotVirtual_Sum,PK_NotVirtual_Sum
0,Alabama,,,,,15288.0,6943.0,4240.0,5559.0,,,,,717105.0,265355.0,169842.0,336204.0
1,Alaska,13099.0,4663.0,3087.0,6265.0,1142.0,409.0,275.0,544.0,95762.0,36181.0,22173.0,44610.0,19869.0,5930.0,4392.0,10755.0
2,Arizona,,,,,57032.0,29426.0,13515.0,19473.0,,,,,1052166.0,412284.0,253530.0,469326.0
3,Arkansas,,,,,7885.0,3731.0,2097.0,2895.0,248078.0,170674.0,97878.0,14288.0,230342.0,6544.0,15547.0,209415.0
4,Bureau of Indian Education,,,,,,,,,,,,,34724.0,11068.0,8534.0,17480.0


In [79]:
#convert dataframe to csv
merge_df.to_csv("Compiled_Public_School_Characteristics.csv", index=False)

In [80]:
#import new libraries to convert Public School Characteristics csv as new df to convert to geojson
import pandas as pd
from pandas_geojson import write_geojson, to_geojson
df = pd.read_csv("Public_School_Characteristics_2020-21_cleaned.csv", index_col=False)
df.head()

Unnamed: 0.1,Unnamed: 0,Location_ID,School_Name,Location_Abbreviation,Location_Zip,Virtual,Elementary(PK-G5),Middle(G6-G8),High(G9-G13),Total_Free/Reduced_Lunch,Total_Students,Total_Teachers,Charter,Magnet,Latitude,Longitude
0,0,Alabama,Albertville Middle School,AL,35950,Not Virtual,0.0,908.0,0.0,332.0,908.0,42.0,No,No,34.2602,-86.2062
1,1,Alabama,Albertville High School,AL,35950,Not Virtual,0.0,0.0,2033.0,456.0,1606.0,82.0,No,No,34.2622,-86.2049
2,2,Alabama,Albertville Intermediate School,AL,35950,Not Virtual,439.0,452.0,0.0,330.0,891.0,41.0,No,No,34.2733,-86.2201
3,3,Alabama,Albertville Elementary School,AL,35950,Not Virtual,908.0,0.0,0.0,397.0,908.0,49.0,No,No,34.2527,-86.221806
4,4,Alabama,Albertville Kindergarten and PreK,AL,35951,Not Virtual,556.0,0.0,0.0,181.0,556.0,30.0,No,No,34.2898,-86.1933


In [81]:
#convert df with individual properties to geo_json
geo_json = to_geojson(df=df, lat='Latitude', lon='Longitude',
                 properties=['School_Name','Virtual','Total_Students','Total_Teachers','Elementary(PK-G5)','Middle(G6-G8)','High(G9-G13)','Total_Free/Reduced_Lunch'])

In [82]:
#write geojson to folder location
write_geojson(geo_json,'static/PSC.geojson')