### This notebook gives an example of how we work with JSON.

Single record 

In [1]:
alice = {
    "name": "Alice",
    "homework": [100.0, 92.0, 98.0, 100.0],
    "quizzes": [82.0, 83.0, 91.0],
    "tests": [89.0, 97.0]
}

In [2]:
alice['homework']

[100.0, 92.0, 98.0, 100.0]

Multiple records

In [3]:
di = {"records":[
    {
    "name": "Lloyd",
    "homework": [90.0, 97.0, 75.0, 92.0],
    "quizzes": [88.0, 40.0, 94.0],
    "tests": [75.0, 90.0]
    },
    {
    "name": "Alice",
    "homework": [100.0, 92.0, 98.0, 100.0],
    "quizzes": [82.0, 83.0, 91.0],
    "tests": [89.0, 97.0]
    },
    {
    "name": "Tyler",
    "homework": [0.0, 87.0, 75.0, 22.0],
    "quizzes": [0.0, 75.0, 78.0],
    "tests": [100.0, 100.0]
    }]
     }

Retrieving names

In [4]:
for i in di['records']:
    print(i['name'])

Lloyd
Alice
Tyler


Retrieving homework scores

In [5]:
for i in di['records']:
    print(i['homework'])

[90.0, 97.0, 75.0, 92.0]
[100.0, 92.0, 98.0, 100.0]
[0.0, 87.0, 75.0, 22.0]


Retrieving a single homework score from one student's record

In [6]:
di['records'][1]['homework'][0]

100.0

Getting the sum of all scores for a student

In [7]:
sum(di['records'][1]['homework'])

390.0

In [8]:
for i in di['records']:
    print(sum(i['homework']))

354.0
390.0
184.0


Saving sum of homework scores for each student into a list

In [9]:
homework_total = []
for i in di['records']:
    homework_total.append(sum(i['homework']))

In [10]:
homework_total

[354.0, 390.0, 184.0]

Converting json to pandas dataframe

In [11]:
from pandas.io.json import json_normalize

In [12]:
df = json_normalize(di['records'])

In [13]:
df #note that some cells contain lists

Unnamed: 0,homework,name,quizzes,tests
0,"[90.0, 97.0, 75.0, 92.0]",Lloyd,"[88.0, 40.0, 94.0]","[75.0, 90.0]"
1,"[100.0, 92.0, 98.0, 100.0]",Alice,"[82.0, 83.0, 91.0]","[89.0, 97.0]"
2,"[0.0, 87.0, 75.0, 22.0]",Tyler,"[0.0, 75.0, 78.0]","[100.0, 100.0]"


Retrieving homework scores from the first row

In [14]:
df['homework'][0]

[90.0, 97.0, 75.0, 92.0]

Getting the sum of scores for each student from the dataframe

In [15]:
for i in range(0, df.shape[0]):
    print(sum(df['homework'][i]))

354.0
390.0
184.0


Unstacking the list structures within the dataframe

In [16]:
import pandas as pd

In [17]:
pd.DataFrame(df.homework.tolist(), index=df.name).stack().reset_index(level=1, drop=True).reset_index(name='homework')[['name','homework']]

Unnamed: 0,name,homework
0,Lloyd,90.0
1,Lloyd,97.0
2,Lloyd,75.0
3,Lloyd,92.0
4,Alice,100.0
5,Alice,92.0
6,Alice,98.0
7,Alice,100.0
8,Tyler,0.0
9,Tyler,87.0


References:  
    https://www.codecademy.com/forum_questions/51fb851babf821be89002852  
    https://stackoverflow.com/questions/39011511/pandas-expand-rows-from-list-data-available-in-column  