# JSON in python
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_json.html

In [3]:
import pandas as pd

In [4]:
from io import StringIO
df = pd.DataFrame([['a', 'b'], ['c', 'd']],
                  index=['row 1', 'row 2'],
                  columns=['col 1', 'col 2'])
df

Unnamed: 0,col 1,col 2
row 1,a,b
row 2,c,d


In [7]:
df.to_json(orient='split')

'{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}'

In [8]:
pd.read_json(StringIO(_), orient='split')

Unnamed: 0,col 1,col 2
row 1,a,b
row 2,c,d


In [9]:
df.to_json(orient='index')

'{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'

In [11]:
from pydataset import data

In [12]:
mtcars = data('mtcars')
mtcars.head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [16]:
mtcars[1:5].to_json(orient='split')

'{"columns":["mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb"],"index":["Mazda RX4 Wag","Datsun 710","Hornet 4 Drive","Hornet Sportabout"],"data":[[21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4],[22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1],[21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1],[18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2]]}'

In [15]:
mtcars[1:5].to_json(orient='index')

'{"Mazda RX4 Wag":{"mpg":21.0,"cyl":6,"disp":160.0,"hp":110,"drat":3.9,"wt":2.875,"qsec":17.02,"vs":0,"am":1,"gear":4,"carb":4},"Datsun 710":{"mpg":22.8,"cyl":4,"disp":108.0,"hp":93,"drat":3.85,"wt":2.32,"qsec":18.61,"vs":1,"am":1,"gear":4,"carb":1},"Hornet 4 Drive":{"mpg":21.4,"cyl":6,"disp":258.0,"hp":110,"drat":3.08,"wt":3.215,"qsec":19.44,"vs":1,"am":0,"gear":3,"carb":1},"Hornet Sportabout":{"mpg":18.7,"cyl":8,"disp":360.0,"hp":175,"drat":3.15,"wt":3.44,"qsec":17.02,"vs":0,"am":0,"gear":3,"carb":2}}'

In [20]:
mtcars[1:5].to_json(orient='records')

'[{"mpg":21.0,"cyl":6,"disp":160.0,"hp":110,"drat":3.9,"wt":2.875,"qsec":17.02,"vs":0,"am":1,"gear":4,"carb":4},{"mpg":22.8,"cyl":4,"disp":108.0,"hp":93,"drat":3.85,"wt":2.32,"qsec":18.61,"vs":1,"am":1,"gear":4,"carb":1},{"mpg":21.4,"cyl":6,"disp":258.0,"hp":110,"drat":3.08,"wt":3.215,"qsec":19.44,"vs":1,"am":0,"gear":3,"carb":1},{"mpg":18.7,"cyl":8,"disp":360.0,"hp":175,"drat":3.15,"wt":3.44,"qsec":17.02,"vs":0,"am":0,"gear":3,"carb":2}]'

In [18]:
pd.read_json(StringIO(_), orient='records')

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4
1,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
2,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
3,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2


In [23]:
mtcars[1:5].to_json(orient='table')

'{"schema":{"fields":[{"name":"index","type":"string"},{"name":"mpg","type":"number"},{"name":"cyl","type":"integer"},{"name":"disp","type":"number"},{"name":"hp","type":"integer"},{"name":"drat","type":"number"},{"name":"wt","type":"number"},{"name":"qsec","type":"number"},{"name":"vs","type":"integer"},{"name":"am","type":"integer"},{"name":"gear","type":"integer"},{"name":"carb","type":"integer"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":"Mazda RX4 Wag","mpg":21.0,"cyl":6,"disp":160.0,"hp":110,"drat":3.9,"wt":2.875,"qsec":17.02,"vs":0,"am":1,"gear":4,"carb":4},{"index":"Datsun 710","mpg":22.8,"cyl":4,"disp":108.0,"hp":93,"drat":3.85,"wt":2.32,"qsec":18.61,"vs":1,"am":1,"gear":4,"carb":1},{"index":"Hornet 4 Drive","mpg":21.4,"cyl":6,"disp":258.0,"hp":110,"drat":3.08,"wt":3.215,"qsec":19.44,"vs":1,"am":0,"gear":3,"carb":1},{"index":"Hornet Sportabout","mpg":18.7,"cyl":8,"disp":360.0,"hp":175,"drat":3.15,"wt":3.44,"qsec":17.02,"vs":0,"am":0,"gear":3,"carb":2}]}

## json normalise
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.json_normalize.html

In [25]:
data = [
    {"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
    {"name": {"given": "Mark", "family": "Regner"}},
    {"id": 2, "name": "Faye Raker"},
]
data

[{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
 {'name': {'given': 'Mark', 'family': 'Regner'}},
 {'id': 2, 'name': 'Faye Raker'}]

In [26]:
pd.json_normalize(data)

Unnamed: 0,id,name.first,name.last,name.given,name.family,name
0,1.0,Coleen,Volk,,,
1,,,,Mark,Regner,
2,2.0,,,,,Faye Raker


In [27]:
data = [
    {
        "id": 1,
        "name": "Cole Volk",
        "fitness": {"height": 130, "weight": 60},
    },
    {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
    {
        "id": 2,
        "name": "Faye Raker",
        "fitness": {"height": 130, "weight": 60},
    },
]
data

[{'id': 1, 'name': 'Cole Volk', 'fitness': {'height': 130, 'weight': 60}},
 {'name': 'Mark Reg', 'fitness': {'height': 130, 'weight': 60}},
 {'id': 2, 'name': 'Faye Raker', 'fitness': {'height': 130, 'weight': 60}}]

In [28]:
pd.json_normalize(data, max_level=0)

Unnamed: 0,id,name,fitness
0,1.0,Cole Volk,"{'height': 130, 'weight': 60}"
1,,Mark Reg,"{'height': 130, 'weight': 60}"
2,2.0,Faye Raker,"{'height': 130, 'weight': 60}"


In [32]:
pd.json_normalize(data, max_level=1)  #gets done in level1

Unnamed: 0,id,name,fitness.height,fitness.weight
0,1.0,Cole Volk,130,60
1,,Mark Reg,130,60
2,2.0,Faye Raker,130,60


In [31]:
pd.json_normalize(data, max_level=3)

Unnamed: 0,id,name,fitness.height,fitness.weight
0,1.0,Cole Volk,130,60
1,,Mark Reg,130,60
2,2.0,Faye Raker,130,60


In [33]:
data = [
    {
        "state": "Florida",
        "shortname": "FL",
        "info": {"governor": "Rick Scott"},
        "counties": [
            {"name": "Dade", "population": 12345},
            {"name": "Broward", "population": 40000},
            {"name": "Palm Beach", "population": 60000},
        ],
    },
    {
        "state": "Ohio",
        "shortname": "OH",
        "info": {"governor": "John Kasich"},
        "counties": [
            {"name": "Summit", "population": 1234},
            {"name": "Cuyahoga", "population": 1337},
        ],
    },
]
data

[{'state': 'Florida',
  'shortname': 'FL',
  'info': {'governor': 'Rick Scott'},
  'counties': [{'name': 'Dade', 'population': 12345},
   {'name': 'Broward', 'population': 40000},
   {'name': 'Palm Beach', 'population': 60000}]},
 {'state': 'Ohio',
  'shortname': 'OH',
  'info': {'governor': 'John Kasich'},
  'counties': [{'name': 'Summit', 'population': 1234},
   {'name': 'Cuyahoga', 'population': 1337}]}]

In [38]:
pd.json_normalize(data, max_level=1)

Unnamed: 0,state,shortname,counties,info.governor
0,Florida,FL,"[{'name': 'Dade', 'population': 12345}, {'name': 'Broward', 'population': 40000}, {'na...",Rick Scott
1,Ohio,OH,"[{'name': 'Summit', 'population': 1234}, {'name': 'Cuyahoga', 'population': 1337}]",John Kasich


In [41]:
pd.json_normalize(data, 'counties', ['state','shortname', ['info', 'governor']])

Unnamed: 0,name,population,state,shortname,info.governor
0,Dade,12345,Florida,FL,Rick Scott
1,Broward,40000,Florida,FL,Rick Scott
2,Palm Beach,60000,Florida,FL,Rick Scott
3,Summit,1234,Ohio,OH,John Kasich
4,Cuyahoga,1337,Ohio,OH,John Kasich


In [43]:
data = {'A':[1,2]}
data

{'A': [1, 2]}

In [45]:
pd.json_normalize(data, 'A', record_prefix='Prefix.')

Unnamed: 0,Prefix.0
0,1
1,2


## DF to JSON
-  https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html

In [46]:
from json import loads, dumps

In [47]:
df = pd.DataFrame(
    [["a", "b"], ["c", "d"]],
    index=["row 1", "row 2"],
    columns=["col 1", "col 2"],
)
df

Unnamed: 0,col 1,col 2
row 1,a,b
row 2,c,d


In [48]:
result = df.to_json(orient='split')
result

'{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}'

In [49]:
parsed = loads(result)
parsed

{'columns': ['col 1', 'col 2'],
 'index': ['row 1', 'row 2'],
 'data': [['a', 'b'], ['c', 'd']]}

In [52]:
dumps(parsed, indent=0)

'{\n"columns": [\n"col 1",\n"col 2"\n],\n"index": [\n"row 1",\n"row 2"\n],\n"data": [\n[\n"a",\n"b"\n],\n[\n"c",\n"d"\n]\n]\n}'

In [53]:
result = df.to_json(orient='records')
result

'[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]'

In [54]:
dumps(parsed, indent=4)

'{\n    "columns": [\n        "col 1",\n        "col 2"\n    ],\n    "index": [\n        "row 1",\n        "row 2"\n    ],\n    "data": [\n        [\n            "a",\n            "b"\n        ],\n        [\n            "c",\n            "d"\n        ]\n    ]\n}'

In [55]:
result = df.to_json(orient='index')
result

'{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'

In [56]:
parsed = loads(result)
parsed

{'row 1': {'col 1': 'a', 'col 2': 'b'}, 'row 2': {'col 1': 'c', 'col 2': 'd'}}

In [57]:
dumps(parsed, indent=4)

'{\n    "row 1": {\n        "col 1": "a",\n        "col 2": "b"\n    },\n    "row 2": {\n        "col 1": "c",\n        "col 2": "d"\n    }\n}'

In [58]:
result = df.to_json(orient='columns')
result

'{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}'

In [59]:
parsed = loads(result)
parsed

{'col 1': {'row 1': 'a', 'row 2': 'c'}, 'col 2': {'row 1': 'b', 'row 2': 'd'}}

In [60]:
dumps(parsed, indent=4)

'{\n    "col 1": {\n        "row 1": "a",\n        "row 2": "c"\n    },\n    "col 2": {\n        "row 1": "b",\n        "row 2": "d"\n    }\n}'

In [61]:
result = df.to_json(orient='values')
result

'[["a","b"],["c","d"]]'

In [62]:
parsed = loads(result)
parsed

[['a', 'b'], ['c', 'd']]

In [68]:
print(dumps(parsed, indent=4))

{
    "schema": {
        "fields": [
            {
                "name": "index",
                "type": "string"
            },
            {
                "name": "col 1",
                "type": "string"
            },
            {
                "name": "col 2",
                "type": "string"
            }
        ],
        "primaryKey": [
            "index"
        ],
        "pandas_version": "1.4.0"
    },
    "data": [
        {
            "index": "row 1",
            "col 1": "a",
            "col 2": "b"
        },
        {
            "index": "row 2",
            "col 1": "c",
            "col 2": "d"
        }
    ]
}


In [64]:
# Table Schema
result = df.to_json (orient='table')
result

'{"schema":{"fields":[{"name":"index","type":"string"},{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":"row 1","col 1":"a","col 2":"b"},{"index":"row 2","col 1":"c","col 2":"d"}]}'

In [65]:
parsed = loads(result)
parsed

{'schema': {'fields': [{'name': 'index', 'type': 'string'},
   {'name': 'col 1', 'type': 'string'},
   {'name': 'col 2', 'type': 'string'}],
  'primaryKey': ['index'],
  'pandas_version': '1.4.0'},
 'data': [{'index': 'row 1', 'col 1': 'a', 'col 2': 'b'},
  {'index': 'row 2', 'col 1': 'c', 'col 2': 'd'}]}

In [69]:
print(dumps(parsed, indent=2))

{
  "schema": {
    "fields": [
      {
        "name": "index",
        "type": "string"
      },
      {
        "name": "col 1",
        "type": "string"
      },
      {
        "name": "col 2",
        "type": "string"
      }
    ],
    "primaryKey": [
      "index"
    ],
    "pandas_version": "1.4.0"
  },
  "data": [
    {
      "index": "row 1",
      "col 1": "a",
      "col 2": "b"
    },
    {
      "index": "row 2",
      "col 1": "c",
      "col 2": "d"
    }
  ]
}


## Build Table Schema
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.io.json.build_table_schema.html

In [70]:
from pandas.io.json._table_schema import build_table_schema
df = pd.DataFrame(
    {'A': [1, 2, 3],
     'B': ['a', 'b', 'c'],
     'C': pd.date_range('2016-01-01', freq='d', periods=3),
    }, index=pd.Index(range(3), name='idx'))

df

Unnamed: 0_level_0,A,B,C
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,a,2016-01-01
1,2,b,2016-01-02
2,3,c,2016-01-03


In [71]:
build_table_schema(df)

{'fields': [{'name': 'idx', 'type': 'integer'},
  {'name': 'A', 'type': 'integer'},
  {'name': 'B', 'type': 'string'},
  {'name': 'C', 'type': 'datetime'}],
 'primaryKey': ['idx'],
 'pandas_version': '1.4.0'}