In [None]:
# code that builds a DoD from indicators table

# indices of entries in the table 
_code = 0
_year = 1
_pop = 3
_gdp = 4 
_life = 5

indicators = open("indicators.csv", "r")
dod = {}
first = indicators.readline()  # read off first line 
for line in indicators:
    rowList = line.split(',')
    if len(rowList) != 7:       # discard countries with extra commas in names
        continue
    code = rowList[_code]       # get code and year
    year = rowList[_year]  
    
    # build "inner" dictionary with each indicator mapping
    incDict = {}
    typeFilter = lambda entry: float(entry) if entry != "" else ""
    incDict["pop"] = typeFilter(rowList[_pop])
    incDict["gdp"] =  typeFilter(rowList[_gdp])
    incDict["life"] = typeFilter(rowList[_life])
    
    # add code and year links into the dod (add code link first if it does not exist)
    if code not in dod:
        dod[code] = {}
    dod[code][year] = incDict 

print(dod)

In [None]:
def dodToLod(dod):
    ''' Converts a dod into a lod structure for indicators table, where rows are indexed
        by the labels from the root, and then rows are constructed to hold 
        all information contained within each subtree. 
        
        Parameters: dod giving hierarchical representation of the indicators table. 
        Return value: lod representing indicators table
        '''  
    # code that converts dod into an lod 
    lod = []                    
    for code in dod:            # nested loop that checks are paths in the tree
        for year in dod[code]:    
            rowdict = {}
            rowdict["code"] = code
            rowdict["year"] = year
            for stat in dod[code][year]:
                rowdict[stat] = dod[code][year][stat]
            lod.append(rowdict)
    return lod

In [None]:
import pandas as pd 
import json

# read in json file of indicators data 
jsonInd = open("indicators.json", "r")

# load in json file as a Python dictionary
indDict = json.load(jsonInd)
jsonInd.close()

indLod = dodToLod(indDict) # convert to lod

indDf = pd.DataFrame(indLod)   # show we get the correct data frame if we used lod 
indDf[indDf["code"] == "USA"].set_index(["code", "year"]).head(10)

In [None]:
# loading file as a json file,
# and directly trying to (naively) read info into a data frame

jsonInd = open("indicators.json", "r")
indDf = pd.read_json(jsonInd)
jsonInd.close()

indDf.head(10)

In [None]:
# increasing gdp of USA to account for inflation (value in 2018 dollars)
# and writing updated tree to a new json file

# read in file as dod
jsonIncJson = open("indicators.json", "r")
dod = json.load(jsonIncJson)
jsonIncJson.close()

# nested loop (similar to above) updated  
for code in dod:
    for year in dod[code]:
        # see if gdp data exists for this entry 
        if "gdp" in dod[code][year]:
            # update according to inflation 
            inflatedGdp = round(dod[code][year]["gdp"]*pow(1.03, 2018-int(year)), 2)
            dod[code][year]["gdp-inflated"] = inflatedGdp
       
# write contents of json file, and close the file 
updatedJsonInc = open("indicators-updated.json", "w")
json.dump(dod, updatedJsonInc)
updatedJsonInc.close()

In [None]:
# original data for USA
jsonIncJson = open("indicators.json", "r")
lodOriginal = dodToLod(json.load(jsonIncJson))
dfOriginal = pd.DataFrame(lodOriginal)
dfOriginal[dfOriginal["code"] == "USA"][["year", "gdp"]].set_index("year").tail(20)

In [None]:
# ... just to verify the above operation worked correctly

# updated info for USA with inflated gdp
updatedJsonInc = open("indicators-updated.json", "r")
lodUpdated = dodToLod(json.load(updatedJsonInc))
dfUpdated = pd.DataFrame(lodUpdated)
dfUpdated[dfUpdated["code"] == "USA"][["year", "gdp-inflated"]].set_index("year").tail(20)