# Practice working with JSON files w/o Pandas or Numpy

## Importing the JSON reading library (aka the easy way)

In [1]:
import json

In [2]:
with open('us_county_population_data.json','r') as f:
    data=json.load(f)

In [3]:
data[0]

{'State': 'California',
 'pop2022': 9934710,
 'pop2010': 9823246,
 'GrowthRate': '0.0113'}

In [4]:
type(data)

list

In [5]:
type(data[0])

dict

In [6]:
data[:2]

[{'State': 'California',
  'pop2022': 9934710,
  'pop2010': 9823246,
  'GrowthRate': '0.0113'},
 {'State': 'Illinois',
  'pop2022': 5085052,
  'pop2010': 5199016,
  'GrowthRate': '-0.0219'}]

In [9]:
# So this way I have a list of dictionaries.  All I have to do is search the list for the dictinary where I get the right entry
# If I were looking for the dictionary with for instance the state of Illinois and ask it what the population was in 2022
for n in data:
    if n['State']=='Illinois':
        print(n['pop2022'])

5085052
909943
689525
687379
531452


In [None]:
# Looks like we have multiple entries for Illinois,  but still it works

## opening a JSON file w/o any libraries (aka the hard way)

First thing to remember is that JSON files are essentially a string containing a dictionary.

In [10]:
with open('us_county_population_data.json', 'r') as file:
    d=file.read()

In [11]:
d[:115]

'[\n  {\n    "State": "California",\n    "pop2022": 9934710,\n    "pop2010": 9823246,\n    "GrowthRate": "0.0113"\n  },\n  '

In [12]:
## So what we have here is a large number of '\n carrage return operators'
print(d[:115])

[
  {
    "State": "California",
    "pop2022": 9934710,
    "pop2010": 9823246,
    "GrowthRate": "0.0113"
  },
  


In [13]:
# okay so first thing I believe I need to do is remove all the \n so I can get to the meat of the data
d=d.replace('\n','')
d[:101]

'[  {    "State": "California",    "pop2022": 9934710,    "pop2010": 9823246,    "GrowthRate": "0.0113'

In [14]:
# Next I bgelieve we have a lot of tabs and white space, so lets eliminate all the extraneous spaces
# this would be more difficult if some of the tags had spaces in it, but I think we are lucky with 
# this dataset.  Also I'll get rid of the square brackets since they aren't helpful here
d=d.replace(' ','')
print(d[:100])

[{"State":"California","pop2022":9934710,"pop2010":9823246,"GrowthRate":"0.0113"},{"State":"Illinois


In [15]:
# Looks great.  Now lets capture everything withing the curley brackets '{}'
new_data=d.replace('{','').split('}')
new_data[:3]

['["State":"California","pop2022":9934710,"pop2010":9823246,"GrowthRate":"0.0113"',
 ',"State":"Illinois","pop2022":5085052,"pop2010":5199016,"GrowthRate":"-0.0219"',
 ',"State":"Texas","pop2022":4813165,"pop2010":4107666,"GrowthRate":"0.1718"']

In [16]:
# awesome So now I have the data in list format and need to turn that list into a dictionary
# First I want to see what happens if I split this again by the colon ':'
new_data1=[x.split(',') for x in new_data]
new_data1[:3]

[['["State":"California"',
  '"pop2022":9934710',
  '"pop2010":9823246',
  '"GrowthRate":"0.0113"'],
 ['',
  '"State":"Illinois"',
  '"pop2022":5085052',
  '"pop2010":5199016',
  '"GrowthRate":"-0.0219"'],
 ['',
  '"State":"Texas"',
  '"pop2022":4813165',
  '"pop2010":4107666',
  '"GrowthRate":"0.1718"']]

In [17]:
# looks like I have an extra space in each entry beyond 1
for n in new_data1:
    if len(n)==5:
        n.pop(0)
    else:
        print(n)

['["State":"California"', '"pop2022":9934710', '"pop2010":9823246', '"GrowthRate":"0.0113"']
[']']


In [18]:
new_data2=[]
for n in new_data1:
    for j in n:
        new_data2.append(j.split(':'))
new_data2[:8]
    

[['["State"', '"California"'],
 ['"pop2022"', '9934710'],
 ['"pop2010"', '9823246'],
 ['"GrowthRate"', '"0.0113"'],
 ['"State"', '"Illinois"'],
 ['"pop2022"', '5085052'],
 ['"pop2010"', '5199016'],
 ['"GrowthRate"', '"-0.0219"']]

In [20]:
new_data2[-8:]

[['"pop2022"', '368680'],
 ['"pop2010"', '375372'],
 ['"GrowthRate"', '"-0.0178"'],
 ['"State"', '"Utah"'],
 ['"pop2022"', '368621'],
 ['"pop2010"', '307910'],
 ['"GrowthRate"', '"0.1972"'],
 [']']]

In [21]:
# I have the last entry as a hanging bracket
new_data2.pop(-1)

[']']

In [22]:
k=dict()
for n in new_data2:
    k[n[0]]=n[1]
k.keys()


dict_keys(['["State"', '"pop2022"', '"pop2010"', '"GrowthRate"', '"State"'])

In [23]:
# That almost worked but I have an extra bracket in the beginning, b/c this was a list of dictionaries
new_data2[0][0]='"State"'


In [24]:
k=dict()
for n in new_data2:
    k[n[0]]=n[1]
k.keys()

dict_keys(['"State"', '"pop2022"', '"pop2010"', '"GrowthRate"'])

In [25]:
# this overwrote the previous dictionary entries.
k

{'"State"': '"Utah"',
 '"pop2022"': '368621',
 '"pop2010"': '307910',
 '"GrowthRate"': '"0.1972"'}

In [26]:
k['"State"']

'"Utah"'

In [27]:
# What is it I actually want this to do?
# What I want is a data frame, but I can't use NUMPY or PANDAS
state_dict={"State":["Alabama","Alaska","Arazona"],"pop2020":[23451,21365,64213],"pop2010":[12345,13245,14267]}
state_dict

{'State': ['Alabama', 'Alaska', 'Arazona'],
 'pop2020': [23451, 21365, 64213],
 'pop2010': [12345, 13245, 14267]}

In [28]:
state_dict.keys()

dict_keys(['State', 'pop2020', 'pop2010'])

In [29]:
state_dict.values()

dict_values([['Alabama', 'Alaska', 'Arazona'], [23451, 21365, 64213], [12345, 13245, 14267]])

In [None]:
state_dict["State"]