# Introduction to Mongo

In [1]:
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# confirm that our new database was created
print(mongo.list_database_names())

['PublicSchool2021', 'admin', 'autosaurus', 'classDB', 'config', 'epa', 'fruits_db', 'local', 'met', 'petsitly_marketing', 'uk_food']


In [4]:
# assign the database to a variable name
db = mongo['PublicSchool2021']

In [5]:
# review the collections in our new database
print(db.list_collection_names())

['characteristics']


In [6]:
# review a document in the customer_list collection
print(db.characteristics.find_one())

{'_id': ObjectId('643c5f6a8b27b2e44f2f834d'), '': 0, 'Location_ID': 'Alabama', 'School_Name': 'Albertville Middle School', 'Location_Abbreviation': 'AL', 'Location_Zip': 35950, 'Virtual': 'Not Virtual', 'Elementary(PK-G5)': 0.0, 'Middle(G6-G8)': 908.0, 'High(G9-G13)': 0.0, 'Total_Free/Reduced_Lunch': 332.0, 'Total_Students': 908.0, 'Total_Teachers': 42.0, 'Charter': 'No', 'Magnet': 'No', 'Latitude': 34.2602, 'Longitude': -86.2062}


In [7]:
# assign the collection to a variable
characteristics = db['characteristics']

In [8]:
# Filter results by name
query = {'STATE_ID': 'Montana'}
results = characteristics.find(query)
for result in results:
    print(result)

In [9]:
#Not Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'count': { '$sum': 1 }}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
NotVirtual_results = list(characteristics.aggregate(pipeline))

In [10]:
#Not Virtual
pprint(NotVirtual_results[0:10])

[{'_id': 'Wyoming', 'count': 364},
 {'_id': 'District of Columbia', 'count': 234},
 {'_id': 'Bureau of Indian Education', 'count': 174},
 {'_id': 'Idaho', 'count': 746},
 {'_id': 'Ohio', 'count': 3516},
 {'_id': 'Puerto Rico', 'count': 849},
 {'_id': 'U.S. Virgin Islands', 'count': 1},
 {'_id': 'Arizona', 'count': 2263},
 {'_id': 'Michigan', 'count': 1409},
 {'_id': 'Oklahoma', 'count': 1768}]


In [11]:
# Convert mongo result to Pandas DataFrame
NotVirtual_df = pd.DataFrame(NotVirtual_results)

print("Rows in DataFrame: ", len(NotVirtual_df))
NotVirtual_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,count
0,Wyoming,364
1,District of Columbia,234
2,Bureau of Indian Education,174
3,Idaho,746
4,Ohio,3516
5,Puerto Rico,849
6,U.S. Virgin Islands,1
7,Arizona,2263
8,Michigan,1409
9,Oklahoma,1768


In [12]:
#Not Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [13]:
pprint(PK_NotVirtual_results[0:10])

[{'_id': 'Minnesota', 'sum': 390306.0},
 {'_id': 'North Carolina', 'sum': 618593.0},
 {'_id': 'Alabama', 'sum': 336204.0},
 {'_id': 'Rhode Island', 'sum': 61151.0},
 {'_id': 'Michigan', 'sum': 241963.0},
 {'_id': 'Arizona', 'sum': 469326.0},
 {'_id': 'Oklahoma', 'sum': 308527.0},
 {'_id': 'South Dakota', 'sum': 35921.0},
 {'_id': 'Tennessee', 'sum': 450990.0},
 {'_id': 'Wyoming', 'sum': 41981.0}]


In [14]:
#Not Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [15]:
pprint(MS_NotVirtual_results[0:10])

[{'_id': 'District of Columbia', 'sum': 16849.0},
 {'_id': 'Wyoming', 'sum': 22495.0},
 {'_id': 'Idaho', 'sum': 67680.0},
 {'_id': 'Ohio', 'sum': 380750.0},
 {'_id': 'Bureau of Indian Education', 'sum': 8534.0},
 {'_id': 'Puerto Rico', 'sum': 64711.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 0.0},
 {'_id': 'Michigan', 'sum': 122703.0},
 {'_id': 'Arizona', 'sum': 253530.0},
 {'_id': 'Oklahoma', 'sum': 143024.0}]


In [16]:
#Not Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [17]:
pprint(HS_NotVirtual_results[0:10])

[{'_id': 'District of Columbia', 'sum': 23735.0},
 {'_id': 'Wyoming', 'sum': 35614.0},
 {'_id': 'Idaho', 'sum': 111545.0},
 {'_id': 'Ohio', 'sum': 614757.0},
 {'_id': 'Bureau of Indian Education', 'sum': 11068.0},
 {'_id': 'Puerto Rico', 'sum': 110806.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 0.0},
 {'_id': 'Michigan', 'sum': 197934.0},
 {'_id': 'Arizona', 'sum': 412284.0},
 {'_id': 'Oklahoma', 'sum': 219785.0}]


In [18]:
#Not Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Not Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_NotVirtual_results = list(characteristics.aggregate(pipeline))

In [19]:
pprint(TS_NotVirtual_results[0:10])

[{'_id': 'Wyoming', 'sum': 92772.0},
 {'_id': 'District of Columbia', 'sum': 89147.0},
 {'_id': 'Bureau of Indian Education', 'sum': 34724.0},
 {'_id': 'Idaho', 'sum': 280486.0},
 {'_id': 'Ohio', 'sum': 1604831.0},
 {'_id': 'Puerto Rico', 'sum': 276413.0},
 {'_id': 'U.S. Virgin Islands', 'sum': 0.0},
 {'_id': 'Arizona', 'sum': 1052166.0},
 {'_id': 'Michigan', 'sum': 525194.0},
 {'_id': 'Oklahoma', 'sum': 627075.0}]


In [20]:
#Full Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [21]:
pprint(PK_FullVirtual_results[0:10])

[{'_id': 'Ohio', 'sum': 14590.0},
 {'_id': 'Idaho', 'sum': 11466.0},
 {'_id': 'District of Columbia', 'sum': 168.0},
 {'_id': 'Kentucky', 'sum': 0.0},
 {'_id': 'Arizona', 'sum': 19473.0},
 {'_id': 'Michigan', 'sum': 7041.0},
 {'_id': 'South Carolina', 'sum': 5854.0},
 {'_id': 'Alabama', 'sum': 5559.0},
 {'_id': 'Oklahoma', 'sum': 19304.0},
 {'_id': 'Minnesota', 'sum': 3641.0}]


In [22]:
#Full Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [23]:
pprint(MS_FullVirtual_results[0:10])

[{'_id': 'Idaho', 'sum': 5443.0},
 {'_id': 'Ohio', 'sum': 9245.0},
 {'_id': 'District of Columbia', 'sum': 129.0},
 {'_id': 'Kentucky', 'sum': 282.0},
 {'_id': 'Arizona', 'sum': 13515.0},
 {'_id': 'Michigan', 'sum': 5058.0},
 {'_id': 'Oklahoma', 'sum': 9557.0},
 {'_id': 'Alabama', 'sum': 4240.0},
 {'_id': 'South Carolina', 'sum': 4571.0},
 {'_id': 'Minnesota', 'sum': 3013.0}]


In [24]:
#Full Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [25]:
pprint(HS_FullVirtual_results[0:10])

[{'_id': 'District of Columbia', 'sum': 0.0},
 {'_id': 'Idaho', 'sum': 7338.0},
 {'_id': 'Ohio', 'sum': 18892.0},
 {'_id': 'Kentucky', 'sum': 926.0},
 {'_id': 'Arizona', 'sum': 29426.0},
 {'_id': 'Michigan', 'sum': 17312.0},
 {'_id': 'Oklahoma', 'sum': 18307.0},
 {'_id': 'Minnesota', 'sum': 6634.0},
 {'_id': 'South Carolina', 'sum': 8273.0},
 {'_id': 'Alabama', 'sum': 6943.0}]


In [26]:
#Full Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Full Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_FullVirtual_results = list(characteristics.aggregate(pipeline))

In [27]:
pprint(TS_FullVirtual_results[0:10])

[{'_id': 'Minnesota', 'sum': 12109.0},
 {'_id': 'South Dakota', 'sum': 4023.0},
 {'_id': 'Alabama', 'sum': 15288.0},
 {'_id': 'South Carolina', 'sum': 16950.0},
 {'_id': 'Michigan', 'sum': 26013.0},
 {'_id': 'Arizona', 'sum': 57032.0},
 {'_id': 'Oklahoma', 'sum': 43324.0},
 {'_id': 'Tennessee', 'sum': 10580.0},
 {'_id': 'Texas', 'sum': 29521.0},
 {'_id': 'Kentucky', 'sum': 1025.0}]


In [28]:
#Supplemental Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [29]:
pprint(PK_SupplementalVirtual_results[0:10])

[{'_id': 'Minnesota', 'sum': 0.0},
 {'_id': 'New York', 'sum': 1153061.0},
 {'_id': 'Colorado', 'sum': 640.0},
 {'_id': 'Tennessee', 'sum': 6480.0},
 {'_id': 'Iowa', 'sum': 164010.0},
 {'_id': 'Montana', 'sum': 394.0},
 {'_id': 'Mississippi', 'sum': 0.0},
 {'_id': 'New Hampshire', 'sum': 0.0},
 {'_id': 'Massachusetts', 'sum': 407430.0},
 {'_id': 'Arkansas', 'sum': 14288.0}]


In [30]:
#Supplemental Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [31]:
pprint(MS_SupplementalVirtual_results[0:10])

[{'_id': 'Minnesota', 'sum': 0.0},
 {'_id': 'New York', 'sum': 590956.0},
 {'_id': 'Colorado', 'sum': 1429.0},
 {'_id': 'Tennessee', 'sum': 16458.0},
 {'_id': 'Iowa', 'sum': 89726.0},
 {'_id': 'Montana', 'sum': 4246.0},
 {'_id': 'Mississippi', 'sum': 373.0},
 {'_id': 'New Hampshire', 'sum': 0.0},
 {'_id': 'Massachusetts', 'sum': 212396.0},
 {'_id': 'Arkansas', 'sum': 97878.0}]


In [32]:
#Supplemental Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [33]:
pprint(HS_SupplementalVirtual_results[0:10])

[{'_id': 'Maryland', 'sum': 337397.0},
 {'_id': 'Virginia', 'sum': 435485.0},
 {'_id': 'Alaska', 'sum': 36181.0},
 {'_id': 'Vermont', 'sum': 27450.0},
 {'_id': 'Massachusetts', 'sum': 359062.0},
 {'_id': 'Arkansas', 'sum': 170674.0},
 {'_id': 'Mississippi', 'sum': 4737.0},
 {'_id': 'Montana', 'sum': 35031.0},
 {'_id': 'New Hampshire', 'sum': 26.0},
 {'_id': 'Iowa', 'sum': 156074.0}]


In [34]:
#Supplemental Virtual
# Write a match query 
match_query = {'$match': {'Virtual': 'Supplemental Virtual'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_SupplementalVirtual_results = list(characteristics.aggregate(pipeline))

In [35]:
pprint(TS_SupplementalVirtual_results[0:10])

[{'_id': 'Minnesota', 'sum': 0.0},
 {'_id': 'New York', 'sum': 2578055.0},
 {'_id': 'Colorado', 'sum': 10779.0},
 {'_id': 'Tennessee', 'sum': 128565.0},
 {'_id': 'Iowa', 'sum': 378134.0},
 {'_id': 'Montana', 'sum': 32541.0},
 {'_id': 'Mississippi', 'sum': 4230.0},
 {'_id': 'New Hampshire', 'sum': 18.0},
 {'_id': 'Massachusetts', 'sum': 908008.0},
 {'_id': 'Arkansas', 'sum': 248078.0}]


In [36]:
#Virtual with face to face options
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Elementary(PK-G5)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
PK_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [37]:
pprint(PK_VirtualFFOptions_results[0:10])

[{'_id': 'Minnesota', 'sum': 66.0},
 {'_id': 'Tennessee', 'sum': 0.0},
 {'_id': 'Colorado', 'sum': 0.0},
 {'_id': 'Rhode Island', 'sum': 0.0},
 {'_id': 'North Carolina', 'sum': 5951.0},
 {'_id': 'New Hampshire', 'sum': 1407.0},
 {'_id': 'Oregon', 'sum': 5724.0},
 {'_id': 'Washington', 'sum': 2888.0},
 {'_id': 'South Dakota', 'sum': 312.0},
 {'_id': 'Wisconsin', 'sum': 1046.0}]


In [38]:
#Virtual with face to face options
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Middle(G6-G8)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
MS_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [39]:
pprint(MS_VirtualFFOptions_results[0:10])

[{'_id': 'Virginia', 'sum': 34470.0},
 {'_id': 'Maryland', 'sum': 0.0},
 {'_id': 'Alaska', 'sum': 3087.0},
 {'_id': 'South Dakota', 'sum': 186.0},
 {'_id': 'Oregon', 'sum': 3162.0},
 {'_id': 'New Hampshire', 'sum': 757.0},
 {'_id': 'North Carolina', 'sum': 2882.0},
 {'_id': 'Wisconsin', 'sum': 522.0},
 {'_id': 'Tennessee', 'sum': 0.0},
 {'_id': 'Colorado', 'sum': 0.0}]


In [40]:
#Virtual with face to face options
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$High(G9-G13)'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
HS_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [41]:
pprint(HS_VirtualFFOptions_results[0:10])

[{'_id': 'Minnesota', 'sum': 556.0},
 {'_id': 'Tennessee', 'sum': 253.0},
 {'_id': 'Colorado', 'sum': 376.0},
 {'_id': 'Rhode Island', 'sum': 279.0},
 {'_id': 'North Carolina', 'sum': 4527.0},
 {'_id': 'New Hampshire', 'sum': 1028.0},
 {'_id': 'Oregon', 'sum': 6240.0},
 {'_id': 'Washington', 'sum': 2874.0},
 {'_id': 'South Dakota', 'sum': 548.0},
 {'_id': 'Wisconsin', 'sum': 763.0}]


In [42]:
#Virtual with face to face options
# Write a match query 
match_query = {'$match': {'Virtual': 'Virtual with face to face options'}}

# Write an aggregation query that counts the number location IDs within the match query 
group_query = {'$group': {'_id': "$Location_ID", 'sum': { '$sum' : '$Total_Students'}}}

# Put the pipeline together
pipeline = [match_query, group_query]

#results 
TS_VirtualFFOptions_results = list(characteristics.aggregate(pipeline))

In [43]:
pprint(TS_VirtualFFOptions_results[0:10])

[{'_id': 'Minnesota', 'sum': 629.0},
 {'_id': 'Tennessee', 'sum': 249.0},
 {'_id': 'Colorado', 'sum': 331.0},
 {'_id': 'Rhode Island', 'sum': 221.0},
 {'_id': 'North Carolina', 'sum': 12395.0},
 {'_id': 'New Hampshire', 'sum': 2976.0},
 {'_id': 'Oregon', 'sum': 13931.0},
 {'_id': 'Washington', 'sum': 7283.0},
 {'_id': 'South Dakota', 'sum': 925.0},
 {'_id': 'Wisconsin', 'sum': 2182.0}]


In [44]:
###Highschool - Not Virtual
#Create data frame
HS_NotVirtual_results_df = pd.DataFrame(HS_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(HS_NotVirtual_results_df))
HS_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,District of Columbia,23735.0
1,Wyoming,35614.0
2,Idaho,111545.0
3,Ohio,614757.0
4,Bureau of Indian Education,11068.0
5,Puerto Rico,110806.0
6,U.S. Virgin Islands,0.0
7,Michigan,197934.0
8,Arizona,412284.0
9,Oklahoma,219785.0


In [45]:
###Middle - Not Virtual
#Create data frame
MS_NotVirtual_results_df = pd.DataFrame(MS_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(MS_NotVirtual_results_df))
MS_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,District of Columbia,16849.0
1,Wyoming,22495.0
2,Idaho,67680.0
3,Ohio,380750.0
4,Bureau of Indian Education,8534.0
5,Puerto Rico,64711.0
6,U.S. Virgin Islands,0.0
7,Michigan,122703.0
8,Arizona,253530.0
9,Oklahoma,143024.0


In [46]:
###PreK - Not Virtual
#Create data frame
PK_NotVirtual_results_df = pd.DataFrame(PK_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(PK_NotVirtual_results_df))
PK_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,Minnesota,390306.0
1,North Carolina,618593.0
2,Alabama,336204.0
3,Rhode Island,61151.0
4,Michigan,241963.0
5,Arizona,469326.0
6,Oklahoma,308527.0
7,South Dakota,35921.0
8,Tennessee,450990.0
9,Wyoming,41981.0


In [47]:
###TS- Not Virtual
#Create data frame
TS_NotVirtual_results_df = pd.DataFrame(TS_NotVirtual_results)
#print results
print("Rows in DataFrame: ", len(TS_NotVirtual_results_df))
TS_NotVirtual_results_df.head(10)

Rows in DataFrame:  43


Unnamed: 0,_id,sum
0,Wyoming,92772.0
1,District of Columbia,89147.0
2,Bureau of Indian Education,34724.0
3,Idaho,280486.0
4,Ohio,1604831.0
5,Puerto Rico,276413.0
6,U.S. Virgin Islands,0.0
7,Arizona,1052166.0
8,Michigan,525194.0
9,Oklahoma,627075.0


In [48]:
###Highschool - Supplemental Virtual
#Create data frame
HS_SupplementalVirtual_results_df = pd.DataFrame(HS_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(HS_SupplementalVirtual_results))
HS_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Maryland,337397.0
1,Virginia,435485.0
2,Alaska,36181.0
3,Vermont,27450.0
4,Massachusetts,359062.0
5,Arkansas,170674.0
6,Mississippi,4737.0
7,Montana,35031.0
8,New Hampshire,26.0
9,Iowa,156074.0


In [49]:
###Middle - Supplemental Virtual
#Create data frame
MS_SupplementalVirtual_results_df = pd.DataFrame(MS_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(MS_SupplementalVirtual_results))
MS_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Minnesota,0.0
1,New York,590956.0
2,Colorado,1429.0
3,Tennessee,16458.0
4,Iowa,89726.0
5,Montana,4246.0
6,Mississippi,373.0
7,New Hampshire,0.0
8,Massachusetts,212396.0
9,Arkansas,97878.0


In [50]:
###PK - Supplemental Virtual
#Create data frame
PK_SupplementalVirtual_results_df = pd.DataFrame(PK_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(PK_SupplementalVirtual_results))
PK_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Minnesota,0.0
1,New York,1153061.0
2,Colorado,640.0
3,Tennessee,6480.0
4,Iowa,164010.0
5,Montana,394.0
6,Mississippi,0.0
7,New Hampshire,0.0
8,Massachusetts,407430.0
9,Arkansas,14288.0


In [51]:
###TS - Supplemental Virtual
#Create data frame
TS_SupplementalVirtual_results_df = pd.DataFrame(TS_SupplementalVirtual_results)
#print results
print("Rows in DataFrame: ", len(TS_SupplementalVirtual_results))
TS_SupplementalVirtual_results_df.head(10)

Rows in DataFrame:  27


Unnamed: 0,_id,sum
0,Minnesota,0.0
1,New York,2578055.0
2,Colorado,10779.0
3,Tennessee,128565.0
4,Iowa,378134.0
5,Montana,32541.0
6,Mississippi,4230.0
7,New Hampshire,18.0
8,Massachusetts,908008.0
9,Arkansas,248078.0


In [52]:
###High School - Full Virtual
#Create data frame
HS_FullVirtual_results_df = pd.DataFrame(HS_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(HS_FullVirtual_results))
HS_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,District of Columbia,0.0
1,Idaho,7338.0
2,Ohio,18892.0
3,Kentucky,926.0
4,Arizona,29426.0
5,Michigan,17312.0
6,Oklahoma,18307.0
7,Minnesota,6634.0
8,South Carolina,8273.0
9,Alabama,6943.0


In [53]:
###Middle - Full Virtual
#Create data frame
MS_FullVirtual_results_df = pd.DataFrame(MS_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(MS_FullVirtual_results))
MS_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,Idaho,5443.0
1,Ohio,9245.0
2,District of Columbia,129.0
3,Kentucky,282.0
4,Arizona,13515.0
5,Michigan,5058.0
6,Oklahoma,9557.0
7,Alabama,4240.0
8,South Carolina,4571.0
9,Minnesota,3013.0


In [54]:
###PK - Full Virtual
#Create data frame
PK_FullVirtual_results_df = pd.DataFrame(PK_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(PK_FullVirtual_results))
PK_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,Ohio,14590.0
1,Idaho,11466.0
2,District of Columbia,168.0
3,Kentucky,0.0
4,Arizona,19473.0
5,Michigan,7041.0
6,South Carolina,5854.0
7,Alabama,5559.0
8,Oklahoma,19304.0
9,Minnesota,3641.0


In [55]:
###TS- Full Virtual
#Create data frame
TS_FullVirtual_results_df = pd.DataFrame(TS_FullVirtual_results)
#print results
print("Rows in DataFrame: ", len(TS_FullVirtual_results))
TS_FullVirtual_results_df.head(10)

Rows in DataFrame:  34


Unnamed: 0,_id,sum
0,Minnesota,12109.0
1,South Dakota,4023.0
2,Alabama,15288.0
3,South Carolina,16950.0
4,Michigan,26013.0
5,Arizona,57032.0
6,Oklahoma,43324.0
7,Tennessee,10580.0
8,Texas,29521.0
9,Kentucky,1025.0


In [56]:
###High School - Virtual FFOp
#Create data frame
HS_VirtualFFOptions_results_df = pd.DataFrame(HS_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(HS_VirtualFFOptions_results))
HS_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,Minnesota,556.0
1,Tennessee,253.0
2,Colorado,376.0
3,Rhode Island,279.0
4,North Carolina,4527.0
5,New Hampshire,1028.0
6,Oregon,6240.0
7,Washington,2874.0
8,South Dakota,548.0
9,Wisconsin,763.0


In [57]:
###Middle - Virtual FFOp
#Create data frame
MS_VirtualFFOptions_results_df = pd.DataFrame(MS_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(MS_VirtualFFOptions_results))
MS_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,Virginia,34470.0
1,Maryland,0.0
2,Alaska,3087.0
3,South Dakota,186.0
4,Oregon,3162.0
5,New Hampshire,757.0
6,North Carolina,2882.0
7,Wisconsin,522.0
8,Tennessee,0.0
9,Colorado,0.0


In [58]:
###PK - Virtual FFOp
#Create data frame
PK_VirtualFFOptions_results_df = pd.DataFrame(PK_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(PK_VirtualFFOptions_results))
PK_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,Minnesota,66.0
1,Tennessee,0.0
2,Colorado,0.0
3,Rhode Island,0.0
4,North Carolina,5951.0
5,New Hampshire,1407.0
6,Oregon,5724.0
7,Washington,2888.0
8,South Dakota,312.0
9,Wisconsin,1046.0


In [59]:
###TS - Virtual FFOp
#Create data frame
TS_VirtualFFOptions_results_df = pd.DataFrame(TS_VirtualFFOptions_results)
#print results
print("Rows in DataFrame: ", len(TS_VirtualFFOptions_results))
TS_VirtualFFOptions_results_df.head(10)

Rows in DataFrame:  16


Unnamed: 0,_id,sum
0,Minnesota,629.0
1,Tennessee,249.0
2,Colorado,331.0
3,Rhode Island,221.0
4,North Carolina,12395.0
5,New Hampshire,2976.0
6,Oregon,13931.0
7,Washington,7283.0
8,South Dakota,925.0
9,Wisconsin,2182.0


In [60]:
##Rename Column
HS_NotVirtual_results_df = HS_NotVirtual_results_df.rename(columns={"sum": "HS_NotVirtual_Sum"})
HS_NotVirtual_results_df.head()

Unnamed: 0,_id,HS_NotVirtual_Sum
0,District of Columbia,23735.0
1,Wyoming,35614.0
2,Idaho,111545.0
3,Ohio,614757.0
4,Bureau of Indian Education,11068.0


In [61]:
##Rename Column
MS_NotVirtual_results_df = MS_NotVirtual_results_df.rename(columns={"sum": "MS_NotVirtual_Sum"})
MS_NotVirtual_results_df.head()

Unnamed: 0,_id,MS_NotVirtual_Sum
0,District of Columbia,16849.0
1,Wyoming,22495.0
2,Idaho,67680.0
3,Ohio,380750.0
4,Bureau of Indian Education,8534.0


In [62]:
##Rename Column
PK_NotVirtual_results_df = PK_NotVirtual_results_df.rename(columns={"sum": "PK_NotVirtual_Sum"})
PK_NotVirtual_results_df.head()

Unnamed: 0,_id,PK_NotVirtual_Sum
0,Minnesota,390306.0
1,North Carolina,618593.0
2,Alabama,336204.0
3,Rhode Island,61151.0
4,Michigan,241963.0


In [63]:
##Rename Column
TS_NotVirtual_results_df = TS_NotVirtual_results_df.rename(columns={"sum": "TS_NotVirtual_Sum"})
TS_NotVirtual_results_df.head()

Unnamed: 0,_id,TS_NotVirtual_Sum
0,Wyoming,92772.0
1,District of Columbia,89147.0
2,Bureau of Indian Education,34724.0
3,Idaho,280486.0
4,Ohio,1604831.0


In [64]:
##Rename Column
HS_SupplementalVirtual_results_df = HS_SupplementalVirtual_results_df.rename(columns={"sum": "HS_SupplementalVirtual_Sum"})
HS_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,HS_SupplementalVirtual_Sum
0,Maryland,337397.0
1,Virginia,435485.0
2,Alaska,36181.0
3,Vermont,27450.0
4,Massachusetts,359062.0


In [65]:
##Rename Column
MS_SupplementalVirtual_results_df = MS_SupplementalVirtual_results_df.rename(columns={"sum": "MS_SupplementalVirtual_Sum"})
MS_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,MS_SupplementalVirtual_Sum
0,Minnesota,0.0
1,New York,590956.0
2,Colorado,1429.0
3,Tennessee,16458.0
4,Iowa,89726.0


In [66]:
##Rename Column
PK_SupplementalVirtual_results_df = PK_SupplementalVirtual_results_df.rename(columns={"sum": "PK_SupplementalVirtual_Sum"})
PK_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,PK_SupplementalVirtual_Sum
0,Minnesota,0.0
1,New York,1153061.0
2,Colorado,640.0
3,Tennessee,6480.0
4,Iowa,164010.0


In [67]:
##Rename Column
TS_SupplementalVirtual_results_df = TS_SupplementalVirtual_results_df.rename(columns={"sum": "TS_SupplementalVirtual_Sum"})
TS_SupplementalVirtual_results_df.head()

Unnamed: 0,_id,TS_SupplementalVirtual_Sum
0,Minnesota,0.0
1,New York,2578055.0
2,Colorado,10779.0
3,Tennessee,128565.0
4,Iowa,378134.0


In [68]:
##Rename Column
HS_VirtualFFOptions_results_df = HS_VirtualFFOptions_results_df.rename(columns={'sum': 'HS_VirtualFFOptions_Sum'})
HS_VirtualFFOptions_results_df

Unnamed: 0,_id,HS_VirtualFFOptions_Sum
0,Minnesota,556.0
1,Tennessee,253.0
2,Colorado,376.0
3,Rhode Island,279.0
4,North Carolina,4527.0
5,New Hampshire,1028.0
6,Oregon,6240.0
7,Washington,2874.0
8,South Dakota,548.0
9,Wisconsin,763.0


In [69]:
##Rename Column
MS_VirtualFFOptions_results_df = MS_VirtualFFOptions_results_df.rename(columns={'sum': 'MS_VirtualFFOptions_Sum'})
MS_VirtualFFOptions_results_df

Unnamed: 0,_id,MS_VirtualFFOptions_Sum
0,Virginia,34470.0
1,Maryland,0.0
2,Alaska,3087.0
3,South Dakota,186.0
4,Oregon,3162.0
5,New Hampshire,757.0
6,North Carolina,2882.0
7,Wisconsin,522.0
8,Tennessee,0.0
9,Colorado,0.0


In [70]:
##Rename Column
PK_VirtualFFOptions_results_df = PK_VirtualFFOptions_results_df.rename(columns={'sum': 'PK_VirtualFFOptions_Sum'})
PK_VirtualFFOptions_results_df

Unnamed: 0,_id,PK_VirtualFFOptions_Sum
0,Minnesota,66.0
1,Tennessee,0.0
2,Colorado,0.0
3,Rhode Island,0.0
4,North Carolina,5951.0
5,New Hampshire,1407.0
6,Oregon,5724.0
7,Washington,2888.0
8,South Dakota,312.0
9,Wisconsin,1046.0


In [71]:
##Rename Column
TS_VirtualFFOptions_results_df = TS_VirtualFFOptions_results_df.rename(columns={'sum': 'TS_VirtualFFOptions_Sum'})
TS_VirtualFFOptions_results_df

Unnamed: 0,_id,TS_VirtualFFOptions_Sum
0,Minnesota,629.0
1,Tennessee,249.0
2,Colorado,331.0
3,Rhode Island,221.0
4,North Carolina,12395.0
5,New Hampshire,2976.0
6,Oregon,13931.0
7,Washington,7283.0
8,South Dakota,925.0
9,Wisconsin,2182.0


In [72]:
##Rename Column
HS_FullVirtual_results_df = HS_FullVirtual_results_df.rename(columns={"sum": "HS_FullVirtual_Sum"})
HS_FullVirtual_results_df

Unnamed: 0,_id,HS_FullVirtual_Sum
0,District of Columbia,0.0
1,Idaho,7338.0
2,Ohio,18892.0
3,Kentucky,926.0
4,Arizona,29426.0
5,Michigan,17312.0
6,Oklahoma,18307.0
7,Minnesota,6634.0
8,South Carolina,8273.0
9,Alabama,6943.0


In [73]:
##Rename Column
MS_FullVirtual_results_df = MS_FullVirtual_results_df.rename(columns={"sum": "MS_FullVirtual_Sum"})
MS_FullVirtual_results_df

Unnamed: 0,_id,MS_FullVirtual_Sum
0,Idaho,5443.0
1,Ohio,9245.0
2,District of Columbia,129.0
3,Kentucky,282.0
4,Arizona,13515.0
5,Michigan,5058.0
6,Oklahoma,9557.0
7,Alabama,4240.0
8,South Carolina,4571.0
9,Minnesota,3013.0


In [74]:
##Rename Column
PK_FullVirtual_results_df = PK_FullVirtual_results_df.rename(columns={"sum": "PK_FullVirtual_Sum"})
PK_FullVirtual_results_df

Unnamed: 0,_id,PK_FullVirtual_Sum
0,Ohio,14590.0
1,Idaho,11466.0
2,District of Columbia,168.0
3,Kentucky,0.0
4,Arizona,19473.0
5,Michigan,7041.0
6,South Carolina,5854.0
7,Alabama,5559.0
8,Oklahoma,19304.0
9,Minnesota,3641.0


In [75]:
##Rename Column
TS_FullVirtual_results_df = TS_FullVirtual_results_df.rename(columns={"sum": "TS_FullVirtual_Sum"})
TS_FullVirtual_results_df

Unnamed: 0,_id,TS_FullVirtual_Sum
0,Minnesota,12109.0
1,South Dakota,4023.0
2,Alabama,15288.0
3,South Carolina,16950.0
4,Michigan,26013.0
5,Arizona,57032.0
6,Oklahoma,43324.0
7,Tennessee,10580.0
8,Texas,29521.0
9,Kentucky,1025.0


In [80]:
merge_df = pd.merge(TS_VirtualFFOptions_results_df, HS_VirtualFFOptions_results_df, on="_id").merge(MS_VirtualFFOptions_results_df, on="_id").merge(PK_VirtualFFOptions_results_df, on="_id").merge(TS_FullVirtual_results_df, on="_id").merge(HS_FullVirtual_results_df, on="_id").merge(MS_FullVirtual_results_df, on="_id").merge(PK_FullVirtual_results_df, on="_id").merge(TS_SupplementalVirtual_results_df, on = "_id").merge(HS_SupplementalVirtual_results_df, on = "_id").merge(MS_SupplementalVirtual_results_df, on = "_id").merge(PK_SupplementalVirtual_results_df, on = "_id").merge(TS_NotVirtual_results_df, on = "_id").merge(HS_NotVirtual_results_df, on = "_id").merge(MS_NotVirtual_results_df, on = "_id").merge(PK_NotVirtual_results_df, on = "_id")
merge_df

Unnamed: 0,_id,TS_VirtualFFOptions_Sum,HS_VirtualFFOptions_Sum,MS_VirtualFFOptions_Sum,PK_VirtualFFOptions_Sum,TS_FullVirtual_Sum,HS_FullVirtual_Sum,MS_FullVirtual_Sum,PK_FullVirtual_Sum,TS_SupplementalVirtual_Sum,HS_SupplementalVirtual_Sum,MS_SupplementalVirtual_Sum,PK_SupplementalVirtual_Sum,TS_NotVirtual_Sum,HS_NotVirtual_Sum,MS_NotVirtual_Sum,PK_NotVirtual_Sum
0,Minnesota,629.0,556.0,84.0,66.0,12109.0,6634.0,3013.0,3641.0,0.0,0.0,0.0,0.0,859345.0,338372.0,197362.0,390306.0
1,Tennessee,249.0,253.0,0.0,0.0,10580.0,4450.0,2893.0,4146.0,128565.0,132773.0,16458.0,6480.0,844672.0,231857.0,209166.0,450990.0
2,Colorado,331.0,376.0,0.0,0.0,19734.0,10392.0,5328.0,5986.0,10779.0,10297.0,1429.0,640.0,851991.0,326150.0,199237.0,391766.0
3,North Carolina,12395.0,4527.0,2882.0,5951.0,21088.0,4853.0,6682.0,10642.0,271634.0,183242.0,77027.0,48980.0,1208560.0,392003.0,278903.0,618593.0
4,New Hampshire,2976.0,1028.0,757.0,1407.0,3797.0,2233.0,1019.0,1003.0,18.0,26.0,0.0,0.0,477813.0,191506.0,114254.0,211508.0
5,Oregon,13931.0,6240.0,3162.0,5724.0,13297.0,5707.0,3509.0,5199.0,201684.0,156774.0,43977.0,32384.0,318161.0,50747.0,83219.0,194533.0
6,Washington,7283.0,2874.0,2008.0,2888.0,14296.0,8121.0,3567.0,4307.0,231984.0,225275.0,38285.0,13639.0,833791.0,190741.0,211967.0,467534.0
7,South Dakota,925.0,548.0,186.0,312.0,4023.0,1317.0,1008.0,1968.0,76289.0,42096.0,14879.0,27943.0,57817.0,6797.0,16493.0,35921.0
8,Wisconsin,2182.0,763.0,522.0,1046.0,14120.0,7438.0,3244.0,4840.0,756816.0,304372.0,170209.0,342753.0,56948.0,13250.0,11883.0,34462.0
9,Alaska,13099.0,4663.0,3087.0,6265.0,1142.0,409.0,275.0,544.0,95762.0,36181.0,22173.0,44610.0,19869.0,5930.0,4392.0,10755.0
