# Exploring and Transforming JSON Schemas

## Objectives
* Explore unknown JSON schemas
* Access and manipulate data inside a JSON file
* Convert JSON to alternative data formats

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [21]:
from IPython.display import display
pd.options.display.max_columns = None

## Loading the JSON file

Load the data from the file disease_data.json.

In [3]:
import json
f = open('disease_data.json')
data = json.load(f)
data

{'meta': {'view': {'id': 'g4ie-h725',
   'name': 'U.S. Chronic Disease Indicators (CDI)',
   'attribution': 'Centers for Disease Control and Prevention, National Center for Chronic Disease Prevention and Health Promotion, Division of Population Health',
   'attributionLink': 'http://www.cdc.gov/nccdphp/dph/',
   'averageRating': 0,
   'category': 'Chronic Disease Indicators',
   'createdAt': 1463517008,
   'description': "CDC's Division of Population Health provides cross-cutting set of 124 indicators that were developed by consensus and that allows states and territories and large metropolitan areas to uniformly define, collect, and report chronic disease data that are important to public health practice and available for states, territories and large metropolitan areas. In addition to providing access to state-specific indicator data, the CDI web site serves as a gateway to additional information and data resources.",
   'displayType': 'table',
   'downloadCount': 80068,
   'hideFrom

## Explore the first and second levels of the schema hierarchy.

In [4]:
data.keys()
type(data)

dict_keys(['meta', 'data'])

dict

In [5]:
data['data'][0]

[1,
 'FF49C41F-CE8D-46C4-9164-653B1227CF6F',
 1,
 1527194521,
 '959778',
 1527194521,
 '959778',
 None,
 '2016',
 '2016',
 'US',
 'United States',
 'BRFSS',
 'Alcohol',
 'Binge drinking prevalence among adults aged >= 18 years',
 None,
 '%',
 'Crude Prevalence',
 '16.9',
 '16.9',
 '*',
 '50 States + DC: US Median',
 '16',
 '18',
 'Overall',
 'Overall',
 None,
 None,
 None,
 None,
 [None, None, None, None, None],
 None,
 '59',
 'ALC',
 'ALC2_2',
 'CRDPREV',
 'OVERALL',
 'OVR',
 None,
 None,
 None,
 None]

In [6]:
for key in data['meta']['view'].keys():
    print(key)

id
name
attribution
attributionLink
averageRating
category
createdAt
description
displayType
downloadCount
hideFromCatalog
hideFromDataJson
indexUpdatedAt
licenseId
newBackend
numberOfComments
oid
provenance
publicationAppendEnabled
publicationDate
publicationGroup
publicationStage
rowClass
rowsUpdatedAt
rowsUpdatedBy
tableId
totalTimesRated
viewCount
viewLastModified
viewType
columns
grants
license
metadata
owner
query
rights
tableAuthor
tags
flags


In [7]:
type(data['meta'])
len(data['meta']['view'].keys())
data['meta']['view'].keys()

dict

40

dict_keys(['id', 'name', 'attribution', 'attributionLink', 'averageRating', 'category', 'createdAt', 'description', 'displayType', 'downloadCount', 'hideFromCatalog', 'hideFromDataJson', 'indexUpdatedAt', 'licenseId', 'newBackend', 'numberOfComments', 'oid', 'provenance', 'publicationAppendEnabled', 'publicationDate', 'publicationGroup', 'publicationStage', 'rowClass', 'rowsUpdatedAt', 'rowsUpdatedBy', 'tableId', 'totalTimesRated', 'viewCount', 'viewLastModified', 'viewType', 'columns', 'grants', 'license', 'metadata', 'owner', 'query', 'rights', 'tableAuthor', 'tags', 'flags'])

In [8]:
type(data['data'])
len(data['data'])
print(data['data'][0:2])
for item in data['data']:
    print(item)

list

60266

[[1, 'FF49C41F-CE8D-46C4-9164-653B1227CF6F', 1, 1527194521, '959778', 1527194521, '959778', None, '2016', '2016', 'US', 'United States', 'BRFSS', 'Alcohol', 'Binge drinking prevalence among adults aged >= 18 years', None, '%', 'Crude Prevalence', '16.9', '16.9', '*', '50 States + DC: US Median', '16', '18', 'Overall', 'Overall', None, None, None, None, [None, None, None, None, None], None, '59', 'ALC', 'ALC2_2', 'CRDPREV', 'OVERALL', 'OVR', None, None, None, None], [2, 'F4468C3D-340A-4CD2-84A3-DF554DFF065E', 2, 1527194521, '959778', 1527194521, '959778', None, '2016', '2016', 'AL', 'Alabama', 'BRFSS', 'Alcohol', 'Binge drinking prevalence among adults aged >= 18 years', None, '%', 'Crude Prevalence', '13', '13', None, None, '11.9', '14.1', 'Overall', 'Overall', None, None, None, None, [None, '32.84057112200048', '-86.63186076199969', None, False], None, '01', 'ALC', 'ALC2_2', 'CRDPREV', 'OVERALL', 'OVR', None, None, None, None]]
[1, 'FF49C41F-CE8D-46C4-9164-653B1227CF6F', 1, 1527194521

[2238, '1C5D56A5-BB86-45DF-AFEE-279B75B7059E', 2238, 1527194521, '959778', 1527194521, '959778', None, '2016', '2016', 'AK', 'Alaska', 'BRFSS', 'Alcohol', 'Binge drinking intensity among adults aged >= 18 years who binge drink', None, 'Number', 'Age-adjusted Mean', '7.9', '7.9', None, None, '7.2', '8.5', 'Gender', 'Male', None, None, None, None, [None, '64.84507995700051', '-147.72205903599973', None, False], None, '02', 'ALC', 'ALC4_0', 'AGEADJMEAN', 'GENDER', 'GENM', None, None, None, None]
[2239, 'FBE0224C-92C7-4E76-A711-E9F171B182FE', 2239, 1527194521, '959778', 1527194521, '959778', None, '2016', '2016', 'AZ', 'Arizona', 'BRFSS', 'Alcohol', 'Binge drinking intensity among adults aged >= 18 years who binge drink', None, 'Number', 'Age-adjusted Mean', '7.9', '7.9', None, None, '7.3', '8.5', 'Gender', 'Male', None, None, None, None, [None, '34.865970280000454', '-111.76381127699972', None, False], None, '04', 'ALC', 'ALC4_0', 'AGEADJMEAN', 'GENDER', 'GENM', None, None, None, None]
[2

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




[28864, '3D38E1DB-EAEF-48D6-BD69-0C66AB39FAD1', 28864, 1527194528, '959778', 1527194528, '959778', None, '2016', '2016', 'NH', 'New Hampshire', 'BRFSS', 'Diabetes', 'Influenza vaccination among noninstitutionalized adults aged 18-64 years with diagnosed diabetes', None, '%', 'Age-adjusted Prevalence', '55.3', '55.3', None, None, '42.4', '67.6', 'Overall', 'Overall', None, None, None, None, [None, '43.65595011300047', '-71.50036091999965', None, False], None, '33', 'DIA', 'DIA12_1', 'AGEADJPREV', 'OVERALL', 'OVR', None, None, None, None]
[28865, '012761F9-6A83-4807-8F64-26D36DB82723', 28865, 1527194528, '959778', 1527194528, '959778', None, '2016', '2016', 'NJ', 'New Jersey', 'BRFSS', 'Diabetes', 'Influenza vaccination among noninstitutionalized adults aged 18-64 years with diagnosed diabetes', None, '%', 'Age-adjusted Prevalence', '44.2', '44.2', None, None, '34.3', '54.5', 'Overall', 'Overall', None, None, None, None, [None, '40.13057004800049', '-74.27369128799967', None, False], No

[31360, '7BA061DE-44BB-43AC-8370-D21DD27EFB77', 31360, 1527194528, '959778', 1527194528, '959778', None, '2016', '2016', 'WY', 'Wyoming', 'BRFSS', 'Diabetes', 'Pneumococcal vaccination among noninstitutionalized adults aged 18-64 years with diagnosed diabetes', None, '%', 'Crude Prevalence', None, None, '****', 'Sample size of denominator and/or age group for age-standardization is less than 50 or relative standard error is more than 30%', None, None, 'Race/Ethnicity', 'Multiracial, non-Hispanic', None, None, None, None, [None, '43.23554134300048', '-108.10983035299967', None, False], None, '56', 'DIA', 'DIA13_1', 'CRDPREV', 'RACE', 'MRC', None, None, None, None]
[31361, 'F39AD868-C37C-4EBC-B040-57BF49C5740C', 31361, 1527194528, '959778', 1527194528, '959778', None, '2016', '2016', 'GU', 'Guam', 'BRFSS', 'Diabetes', 'Pneumococcal vaccination among noninstitutionalized adults aged 18-64 years with diagnosed diabetes', None, '%', 'Crude Prevalence', None, None, '****', 'Sample size of de

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [9]:
data.keys()
data['meta'].keys()
data['meta']['view'].keys()
len(data['meta']['view'].keys())
data['meta']['view']['columns']

dict_keys(['meta', 'data'])

dict_keys(['view'])

dict_keys(['id', 'name', 'attribution', 'attributionLink', 'averageRating', 'category', 'createdAt', 'description', 'displayType', 'downloadCount', 'hideFromCatalog', 'hideFromDataJson', 'indexUpdatedAt', 'licenseId', 'newBackend', 'numberOfComments', 'oid', 'provenance', 'publicationAppendEnabled', 'publicationDate', 'publicationGroup', 'publicationStage', 'rowClass', 'rowsUpdatedAt', 'rowsUpdatedBy', 'tableId', 'totalTimesRated', 'viewCount', 'viewLastModified', 'viewType', 'columns', 'grants', 'license', 'metadata', 'owner', 'query', 'rights', 'tableAuthor', 'tags', 'flags'])

40

[{'id': -1,
  'name': 'sid',
  'dataTypeName': 'meta_data',
  'fieldName': ':sid',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'id',
  'dataTypeName': 'meta_data',
  'fieldName': ':id',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'position',
  'dataTypeName': 'meta_data',
  'fieldName': ':position',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'created_at',
  'dataTypeName': 'meta_data',
  'fieldName': ':created_at',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'created_meta',
  'dataTypeName': 'meta_data',
  'fieldName': ':created_meta',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'updated_at',
  'dataTypeName': 'meta_data',
  'fieldName': ':updated_at'

## Convert to a DataFrame

Create a DataFrame from the JSON file. Be sure to retrive the column names for the dataframe. (Search within the 'meta' key of the master dictionary.) The DataFrame should include all 42 columns.

In [10]:
import pandas as pd

# explore
len(data['meta']['view'].keys())
data['meta'].keys()
data['meta']['view']['columns']
data['data'][:5]

40

dict_keys(['view'])

[{'id': -1,
  'name': 'sid',
  'dataTypeName': 'meta_data',
  'fieldName': ':sid',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'id',
  'dataTypeName': 'meta_data',
  'fieldName': ':id',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'position',
  'dataTypeName': 'meta_data',
  'fieldName': ':position',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'created_at',
  'dataTypeName': 'meta_data',
  'fieldName': ':created_at',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'created_meta',
  'dataTypeName': 'meta_data',
  'fieldName': ':created_meta',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'updated_at',
  'dataTypeName': 'meta_data',
  'fieldName': ':updated_at'

[[1,
  'FF49C41F-CE8D-46C4-9164-653B1227CF6F',
  1,
  1527194521,
  '959778',
  1527194521,
  '959778',
  None,
  '2016',
  '2016',
  'US',
  'United States',
  'BRFSS',
  'Alcohol',
  'Binge drinking prevalence among adults aged >= 18 years',
  None,
  '%',
  'Crude Prevalence',
  '16.9',
  '16.9',
  '*',
  '50 States + DC: US Median',
  '16',
  '18',
  'Overall',
  'Overall',
  None,
  None,
  None,
  None,
  [None, None, None, None, None],
  None,
  '59',
  'ALC',
  'ALC2_2',
  'CRDPREV',
  'OVERALL',
  'OVR',
  None,
  None,
  None,
  None],
 [2,
  'F4468C3D-340A-4CD2-84A3-DF554DFF065E',
  2,
  1527194521,
  '959778',
  1527194521,
  '959778',
  None,
  '2016',
  '2016',
  'AL',
  'Alabama',
  'BRFSS',
  'Alcohol',
  'Binge drinking prevalence among adults aged >= 18 years',
  None,
  '%',
  'Crude Prevalence',
  '13',
  '13',
  None,
  None,
  '11.9',
  '14.1',
  'Overall',
  'Overall',
  None,
  None,
  None,
  None,
  [None, '32.84057112200048', '-86.63186076199969', None, False

In [11]:
data['meta']['view']['columns']

[{'id': -1,
  'name': 'sid',
  'dataTypeName': 'meta_data',
  'fieldName': ':sid',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'id',
  'dataTypeName': 'meta_data',
  'fieldName': ':id',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'position',
  'dataTypeName': 'meta_data',
  'fieldName': ':position',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'created_at',
  'dataTypeName': 'meta_data',
  'fieldName': ':created_at',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'created_meta',
  'dataTypeName': 'meta_data',
  'fieldName': ':created_meta',
  'position': 0,
  'renderTypeName': 'meta_data',
  'format': {},
  'flags': ['hidden']},
 {'id': -1,
  'name': 'updated_at',
  'dataTypeName': 'meta_data',
  'fieldName': ':updated_at'

In [12]:
# this is the code that will successfully outpt the headers of the 42 columns
cols = [item['name'] for item in data['meta']['view']['columns']]

In [13]:
df = pd.DataFrame(data['data'], columns=cols)
df.head()
len(df.columns)

Unnamed: 0,sid,id,position,created_at,created_meta,updated_at,updated_meta,meta,YearStart,YearEnd,...,LocationID,TopicID,QuestionID,DataValueTypeID,StratificationCategoryID1,StratificationID1,StratificationCategoryID2,StratificationID2,StratificationCategoryID3,StratificationID3
0,1,FF49C41F-CE8D-46C4-9164-653B1227CF6F,1,1527194521,959778,1527194521,959778,,2016,2016,...,59,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
1,2,F4468C3D-340A-4CD2-84A3-DF554DFF065E,2,1527194521,959778,1527194521,959778,,2016,2016,...,1,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
2,3,65609156-A343-4869-B03F-2BA62E96AC19,3,1527194521,959778,1527194521,959778,,2016,2016,...,2,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
3,4,0DB09B00-EFEB-4AC0-9467-A7CBD2B57BF3,4,1527194521,959778,1527194521,959778,,2016,2016,...,4,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
4,5,D98DA5BA-6FD6-40F5-A9B1-ABD45E44967B,5,1527194521,959778,1527194521,959778,,2016,2016,...,5,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,


42

In [14]:
print(df.columns)

Index(['sid', 'id', 'position', 'created_at', 'created_meta', 'updated_at',
       'updated_meta', 'meta', 'YearStart', 'YearEnd', 'LocationAbbr',
       'LocationDesc', 'DataSource', 'Topic', 'Question', 'Response',
       'DataValueUnit', 'DataValueType', 'DataValue', 'DataValueAlt',
       'DataValueFootnoteSymbol', 'DatavalueFootnote', 'LowConfidenceLimit',
       'HighConfidenceLimit', 'StratificationCategory1', 'Stratification1',
       'StratificationCategory2', 'Stratification2', 'StratificationCategory3',
       'Stratification3', 'GeoLocation', 'ResponseID', 'LocationID', 'TopicID',
       'QuestionID', 'DataValueTypeID', 'StratificationCategoryID1',
       'StratificationID1', 'StratificationCategoryID2', 'StratificationID2',
       'StratificationCategoryID3', 'StratificationID3'],
      dtype='object')


In [22]:
df.head(10)

Unnamed: 0,sid,id,position,created_at,created_meta,updated_at,updated_meta,meta,YearStart,YearEnd,LocationAbbr,LocationDesc,DataSource,Topic,Question,Response,DataValueUnit,DataValueType,DataValue,DataValueAlt,DataValueFootnoteSymbol,DatavalueFootnote,LowConfidenceLimit,HighConfidenceLimit,StratificationCategory1,Stratification1,StratificationCategory2,Stratification2,StratificationCategory3,Stratification3,GeoLocation,ResponseID,LocationID,TopicID,QuestionID,DataValueTypeID,StratificationCategoryID1,StratificationID1,StratificationCategoryID2,StratificationID2,StratificationCategoryID3,StratificationID3
0,1,FF49C41F-CE8D-46C4-9164-653B1227CF6F,1,1527194521,959778,1527194521,959778,,2016,2016,US,United States,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,16.9,16.9,*,50 States + DC: US Median,16.0,18.0,Overall,Overall,,,,,"[None, None, None, None, None]",,59,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
1,2,F4468C3D-340A-4CD2-84A3-DF554DFF065E,2,1527194521,959778,1527194521,959778,,2016,2016,AL,Alabama,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,13.0,13.0,,,11.9,14.1,Overall,Overall,,,,,"[None, 32.84057112200048, -86.63186076199969, ...",,1,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
2,3,65609156-A343-4869-B03F-2BA62E96AC19,3,1527194521,959778,1527194521,959778,,2016,2016,AK,Alaska,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,18.2,18.2,,,16.0,20.6,Overall,Overall,,,,,"[None, 64.84507995700051, -147.72205903599973,...",,2,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
3,4,0DB09B00-EFEB-4AC0-9467-A7CBD2B57BF3,4,1527194521,959778,1527194521,959778,,2016,2016,AZ,Arizona,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,15.6,15.6,,,14.3,16.9,Overall,Overall,,,,,"[None, 34.865970280000454, -111.76381127699972...",,4,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
4,5,D98DA5BA-6FD6-40F5-A9B1-ABD45E44967B,5,1527194521,959778,1527194521,959778,,2016,2016,AR,Arkansas,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,15.0,15.0,,,13.0,17.2,Overall,Overall,,,,,"[None, 34.74865012400045, -92.27449074299966, ...",,5,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
5,6,49758545-682D-46D8-A9F8-0F98EFDDE64A,6,1527194521,959778,1527194521,959778,,2016,2016,CA,California,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,16.3,16.3,,,15.4,17.2,Overall,Overall,,,,,"[None, 37.63864012300047, -120.99999953799971,...",,6,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
6,7,AEB36999-5746-463F-B921-97E404EEF234,7,1527194521,959778,1527194521,959778,,2016,2016,CO,Colorado,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,19.0,19.0,,,18.1,20.0,Overall,Overall,,,,,"[None, 38.843840757000464, -106.13361092099967...",,8,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
7,8,FEBA783D-B277-4DAD-A93B-BF333F9B582D,8,1527194521,959778,1527194521,959778,,2016,2016,CT,Connecticut,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,16.7,16.7,,,15.6,17.9,Overall,Overall,,,,,"[None, 41.56266102000046, -72.64984095199964, ...",,9,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
8,9,85670BEF-2891-4372-A5AE-A4B7867CEEE9,9,1527194521,959778,1527194521,959778,,2016,2016,DE,Delaware,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,17.0,17.0,,,15.2,19.0,Overall,Overall,,,,,"[None, 39.008830667000495, -75.57774116799965,...",,10,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,
9,10,E3B61235-D0C5-40F3-8B5F-E7D4A9509654,10,1527194521,959778,1527194521,959778,,2016,2016,DC,District of Columbia,BRFSS,Alcohol,Binge drinking prevalence among adults aged >=...,,%,Crude Prevalence,25.6,25.6,,,23.6,27.7,Overall,Overall,,,,,"[None, 38.907192, -77.036871, None, False]",,11,ALC,ALC2_2,CRDPREV,OVERALL,OVR,,,,


In [18]:
df['LocationDesc']

0               United States
1                     Alabama
2                      Alaska
3                     Arizona
4                    Arkansas
5                  California
6                    Colorado
7                 Connecticut
8                    Delaware
9        District of Columbia
10                    Florida
11                    Georgia
12                     Hawaii
13                      Idaho
14                   Illinois
15                    Indiana
16                       Iowa
17                     Kansas
18                   Kentucky
19                  Louisiana
20                      Maine
21                   Maryland
22              Massachusetts
23                   Michigan
24                  Minnesota
25                Mississippi
26                   Missouri
27                    Montana
28                   Nebraska
29                     Nevada
                 ...         
60236          Virgin Islands
60237                    Guam
60238     

## Level-Up
## Create a bar graph of states with the highest asthma rates for adults age 18+

In [None]:
fig, ax = plt.subplots(figsize=(15,20))
data = df
ax[0].hist()

## Level-Up!
## Create a function (or class) that returns an outline of the schema structure like this: 
<img src="outline.jpg" width=350>

Rules:
* Your outline should follow the numbering outline above (I, A, 1, a, i).
* Your outline should be properly indented! (Four spaces or one tab per indentation level.)
* Your function goes to at least a depth of 5 (Level-up: create a parameter so that the user can specify this)
* If an entry is a dictionary, list its keys as the subheadings
* After listing a key name (where applicable) include a space, a dash and the data type of the entry
* If an entry is a dict or list put in parentheses how many items are in the entry
* lists will not have key names for their entries (they're just indexed)
* For subheadings of a list, state their datatypes. 
* If a dictionary or list is more then 5 items long, only show the first 5 (we want to limit our previews); make an arbitrary order choice for dictionaries. (Level-up: Parallel to above; allow user to specify number of items to preview for large subheading collections.)

In [None]:
# Your code here; you will probably want to define subfunctions.
def print_obj_outline(json_obj):
    return outline

In [22]:
outline = print_obj_outline(data)

In [23]:
print(outline) #Your function should produce the following output for this json object (and work for all json files!)

I. root - <class 'dict'> (2 items)
    A. meta <class 'dict'> (1 items)
        1. view <class 'dict'> (40 items)
            a. id <class 'str'> 
            b. name <class 'str'> 
            c. attribution <class 'str'> 
            d. attributionLink <class 'str'> 
            e. averageRating <class 'int'> 
    B. data <class 'list'> (60266 items)
        1. <class 'list'> (42 items)
            a. <class 'int'> 
            b. <class 'str'> 
            c. <class 'int'> 
            d. <class 'int'> 
            e. <class 'str'> 
        2. <class 'list'> (42 items)
            a. <class 'int'> 
            b. <class 'str'> 
            c. <class 'int'> 
            d. <class 'int'> 
            e. <class 'str'> 
        3. <class 'list'> (42 items)
            a. <class 'int'> 
            b. <class 'str'> 
            c. <class 'int'> 
            d. <class 'int'> 
            e. <class 'str'> 
        4. <class 'list'> (42 items)
            a. <class 'int'> 
            b. <c

## Summary

Well done! In this lab you got some extended practice exploring the structure of JSON files and writing a recursive generalized function for outlining a JSON file's schema! 