# Reading JSON Files

In [1]:
# import the pandas library
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

print(pd.__version__)

2.1.1


## Reading JSON data

In [2]:
# read the json file
data = pd.read_json('datasets/simple.json')

# print the top rows of the dataframe
data.head()

Unnamed: 0,name,age,grade
0,Andew,12,A
1,Bhuvan,18,B
2,Clinton,11,A
3,Drake,12,C
4,Eisha,13,B


## Different challenges with JSON files

### Reading JSON files written as records

- Some of the json files are written as records i.e each json object is written in a line.
- If we try to read these type of files direclty, you will get an error. To resolve this error, you need to pass the parameter **`lines = True`**

In [3]:
# read the data (This will give an error)
data = pd.read_json('datasets/simple_records.json')

ValueError: Trailing data

In [4]:
# read json files with records 
data = pd.read_json('datasets/simple_records.json', lines=True)
data.head()

Unnamed: 0,name,age,grade
0,Andew,12,A
1,Bhuvan,18,B
2,Clinton,11,A
3,Drake,12,C
4,Eisha,13,B


## JSON Standard Library

- Most of the json files are nested and we cannot directly import them into a dataframe.
- We first need to clean and filter the json file in order to convert it into a dataframe.

In [5]:
# importing the json module of standard library
import json

# load the JSON data
with open('datasets/nested.json') as f :
    json_data = json.load(f)

print(json_data)

[{'student_roll_no': 101, 'details': {'name': 'Andew', 'age': 12, 'grade': 'A'}}, {'student_roll_no': 102, 'details': {'name': 'Bhuvan', 'age': 18, 'grade': 'B'}}, {'student_roll_no': 103, 'details': {'name': 'Clinton', 'age': 11, 'grade': 'A'}}, {'student_roll_no': 104, 'details': {'name': 'Drake', 'age': 12, 'grade': 'C'}}, {'student_roll_no': 105, 'details': {'name': 'Eisha', 'age': 13, 'grade': 'B'}}, {'student_roll_no': 106, 'details': {'name': 'Farhan', 'age': 22, 'grade': 'C'}}, {'student_roll_no': 107, 'details': {'name': 'Garima', 'age': 11, 'grade': 'A'}}, {'student_roll_no': 108, 'details': {'name': 'Himanshu', 'age': 19, 'grade': 'A'}}, {'student_roll_no': 109, 'details': {'name': 'Ishaan', 'age': 10, 'grade': 'D'}}, {'student_roll_no': 110, 'details': {'name': 'Jason', 'age': 9, 'grade': 'B'}}]


In [6]:
# use pprint or (pretty print) to print the data in the structured format
from pprint import pprint
pprint(json_data)

[{'details': {'age': 12, 'grade': 'A', 'name': 'Andew'},
  'student_roll_no': 101},
 {'details': {'age': 18, 'grade': 'B', 'name': 'Bhuvan'},
  'student_roll_no': 102},
 {'details': {'age': 11, 'grade': 'A', 'name': 'Clinton'},
  'student_roll_no': 103},
 {'details': {'age': 12, 'grade': 'C', 'name': 'Drake'},
  'student_roll_no': 104},
 {'details': {'age': 13, 'grade': 'B', 'name': 'Eisha'},
  'student_roll_no': 105},
 {'details': {'age': 22, 'grade': 'C', 'name': 'Farhan'},
  'student_roll_no': 106},
 {'details': {'age': 11, 'grade': 'A', 'name': 'Garima'},
  'student_roll_no': 107},
 {'details': {'age': 19, 'grade': 'A', 'name': 'Himanshu'},
  'student_roll_no': 108},
 {'details': {'age': 10, 'grade': 'D', 'name': 'Ishaan'},
  'student_roll_no': 109},
 {'details': {'age': 9, 'grade': 'B', 'name': 'Jason'}, 'student_roll_no': 110}]


#### Create a new json file contains the age and name of the people whose age is greater than 15.

In [9]:
print(json_data[0])

# iterate through the json data
for data in json_data:
    print(data['details']['age'], end = ' ')

{'student_roll_no': 101, 'details': {'name': 'Andew', 'age': 12, 'grade': 'A'}}
12 18 11 12 13 22 11 19 10 9 

In [71]:
# create a new empty list to store the filtered data
data = []

# iterate through the json data
for i in json_data:
    
    # create new empty dictionary
    var = {}
    
    # check for the condition
    if i['details']['age'] > 15:
        # if condition satisfies, store the age and name
        filtered_variable['age'] = i['details']['age']
        filtered_variable['name']= i['details']['name']
        data.append(filtered_variable)

In [72]:
# check the filtered data
filtered_data

[{'age': 18, 'name': 'Bhuvan'},
 {'age': 22, 'name': 'Farhan'},
 {'age': 19, 'name': 'Himanshu'}]

In [79]:
filtered_data[0].values()

dict_values([18, 'Bhuvan'])

#### WRITING A JSON FILE

---

In [73]:
# put the filtered data into the new json file
with open('datasets/filtered.json','w') as f:
    
    json.dump(filtered_data, f, indent=4)

---