In [20]:
import json

# JSON parsing python

In [21]:
json_string = """
{
    "name": "John Doe",
    "age": 32,
    "address": {
        "street": "123 Main St",
        "city": "Anytown",
        "state": "CA"
    },
    "tags": ["Finance", "Admin"]
}
"""

In [22]:
parsed_json = json.loads(json_string)
print(parsed_json["address"]["street"])

123 Main St


# Python Class Serialize/Deserialize in Python

In [73]:
class CustomProfile:
    def __init__(self, name, age, tags, address):
        self.name = name
        self.age = age
        self.tags = tags
        self.address = address
        
class Address:
    def __init__(self, street, city, state):
        self.street = street
        self.city = city
        self.state = state
        
class CustomProfileEncoder(JSONEncoder):
        def default(self, o):
            return o.__dict__

class ProfileEncoder(json.JSONEncoder):
    def default(self, custom_obj):
        if isinstance(custom_obj, CustomProfile):
            return custom_obj.__dict__
        else:
            return super().default(custom_obj)
address = Address("123 Main St", "Anytown", "CA")
profile = CustomProfile("Yasoob Khalid", 33, ["Finance"],address)
with open("new_profile.json", "w") as f:
    # Serialization
    json.dump(profile, f, cls=CustomProfileEncoder)
print(CustomProfileEncoder().encode(profile))

# Deserialization
with open("new_profile.json") as json_file:
    data = json.load(json_file)
 
    # Json Load above make it a Dict
    print("Type:", type(data))
    print(CustomProfileEncoder().encode(data))


{"name": "Yasoob Khalid", "age": 33, "tags": ["Finance"], "address": {"street": "123 Main St", "city": "Anytown", "state": "CA"}}
Type: <class 'dict'>
{"name": "Yasoob Khalid", "age": 33, "tags": ["Finance"], "address": {"street": "123 Main St", "city": "Anytown", "state": "CA"}}


In [69]:
import pandas as pd
text_file = open("new_profile.json", "r")
 
#read whole file to a string
profileJSONData = text_file.read()
profile = json.loads(profileJSONData)
print(profile)
print(CustomProfileEncoder().encode(profile))

{'name': 'Yasoob Khalid', 'age': 33, 'tags': ['Finance'], 'address': {'street': '123 Main St', 'city': 'Anytown', 'state': 'CA'}}
{"name": "Yasoob Khalid", "age": 33, "tags": ["Finance"], "address": {"street": "123 Main St", "city": "Anytown", "state": "CA"}}


In [24]:
import json
from json import JSONEncoder

class Employee:
    def __init__(self, name, salary, address):
        self.name = name
        self.salary = salary
        self.address = address

class Address:
    def __init__(self, city, street, pin):
        self.city = city
        self.street = street
        self.pin = pin

# subclass JSONEncoder
class EmployeeEncoder(json.JSONEncoder):
        def default(self, o):
            return o.__dict__

address = Address("Alpharetta", "7258 Spring Street", "30004")
employee = Employee("John", 9000, address)

print("Printing to check how it will look like")
print(EmployeeEncoder().encode(employee))

print("Encode Employee Object into JSON formatted Data using custom JSONEncoder")
# Serialization
employeeJSONData = json.dumps(employee, indent=4, cls=EmployeeEncoder)
print(employeeJSONData)

# Let's load it using the load method to check if we can decode it or not.
print("Decode JSON formatted Data")
# DeSerialization
employeeJSON = json.loads(employeeJSONData)
print(employeeJSON)

Printing to check how it will look like
{"name": "John", "salary": 9000, "address": {"city": "Alpharetta", "street": "7258 Spring Street", "pin": "30004"}}
Encode Employee Object into JSON formatted Data using custom JSONEncoder
{
    "name": "John",
    "salary": 9000,
    "address": {
        "city": "Alpharetta",
        "street": "7258 Spring Street",
        "pin": "30004"
    }
}
Decode JSON formatted Data
{'name': 'John', 'salary': 9000, 'address': {'city': 'Alpharetta', 'street': '7258 Spring Street', 'pin': '30004'}}


# Dataframe read JSON

In [25]:
import pandas as pd
import json
from pandas import json_normalize
data = '''
{
"technologies":
         [
         { "Courses": "Spark", "Fee": 22000,"Duration":"40Days"},
         { "Courses": "PySpark","Fee": 25000,"Duration":"60Days"},
         { "Courses": "Hadoop", "Fee": 23000,"Duration":"50Days"}
         ],
"status": ["ok"]
}
'''
print(data)


{
"technologies":
         [
         { "Courses": "Spark", "Fee": 22000,"Duration":"40Days"},
         { "Courses": "PySpark","Fee": 25000,"Duration":"60Days"},
         { "Courses": "Hadoop", "Fee": 23000,"Duration":"50Days"}
         ],
"status": ["ok"]
}



In [26]:
# Use json_normalize() to convert JSON to DataFrame
dict = json.loads(data)
df2 = json_normalize(dict['technologies']) 
print(df2)


   Courses    Fee Duration
0    Spark  22000   40Days
1  PySpark  25000   60Days
2   Hadoop  23000   50Days


# Orient Indexed data

In [27]:
import pandas as pd
jsonStr = '''{"Index0":{"Courses": "Pandas","Discount": "1200"},
           "Index1":{"Courses": "Hadoop","Discount": "1500"},
           "Index2":{"Courses": "Spark","Discount": "1800"}
          }'''
# Convert JSON to DataFrame Using read_json()
df2 = pd.read_json(jsonStr, orient ='index')
print(df2)

       Courses  Discount
Index0  Pandas      1200
Index1  Hadoop      1500
Index2   Spark      1800


# Orient Name Value Pair
# Create a DataFrame from the dict object where keys from the dict are used as an index

In [46]:
# Use DataFrame.from_dict() to Convert JSON to DataFrame
import pandas as pd
import json
from pandas import json_normalize
json_string = '{ "Courses": "Spark", "Fee": 22000,"Duration":"40Days"}'
data = json.loads(json_string)

# Use pandas.DataFrame.from_dict() to Convert JSON to DataFrame
df2 = pd.DataFrame.from_dict(data, orient="index")
print(df2)
# Get the list of all column names from headers
column_headers = list(df2.columns.values)
print("The Column Header :", column_headers)
# Column metadata
print(df2[0])
# 2nd Value of first column
print(df2[0][1])
print(df.index)
print(df.index.tolist())

               0
Courses    Spark
Fee        22000
Duration  40Days
The Column Header : [0]
Courses      Spark
Fee          22000
Duration    40Days
Name: 0, dtype: object
22000
RangeIndex(start=0, stop=5, step=1)
[0, 1, 2, 3, 4]


In [36]:

import pandas as pd
import numpy as np

technologies= {
    'Courses':["Spark","PySpark","Hadoop","Python","Pandas"],
    'Fee' :[22000,25000,23000,24000,26000],
    'Duration':['30days','50days','30days', None,np.nan],
    'Discount':[1000,2300,1000,1200,2500]
          }
df = pd.DataFrame(technologies)
print(df)

# Get the list of all column names from headers
column_headers = list(df.columns.values)
print("The Column Header :", column_headers)

# Get the list of all column names from headers
column_headers = df.columns.values.tolist()
print("The Column Header 1:", column_headers)

# Using list(df) to get the column headers as a list
column_headers = list(df.columns)
print("The Column Header 2:", column_headers)

# Using list(df) to get the list of all Column Names
column_headers = list(df)
print("The Column Header 3:", column_headers)

# Dataframe show all columns sorted list
col_headers=sorted(df)
print("The Column Header 4:",col_headers)

# Get all Column Header Labels as List
for column_headers in df.columns: 
    print(column_headers)
    
column_headers = df.keys().values.tolist()
print("The Column Header :", column_headers)

# Get all numeric columns
numeric_columns = df._get_numeric_data().columns.values.tolist()
print(numeric_columns)

# Simple Pandas Numeric Columns Code
numeric_columns=df.dtypes[df.dtypes == "int64"].index.values.tolist()
print(numeric_columns)



   Courses    Fee Duration  Discount
0    Spark  22000   30days      1000
1  PySpark  25000   50days      2300
2   Hadoop  23000   30days      1000
3   Python  24000     None      1200
4   Pandas  26000      NaN      2500
The Column Header : ['Courses', 'Fee', 'Duration', 'Discount']
The Column Header 1: ['Courses', 'Fee', 'Duration', 'Discount']
The Column Header 2: ['Courses', 'Fee', 'Duration', 'Discount']
The Column Header 3: ['Courses', 'Fee', 'Duration', 'Discount']
The Column Header 4: ['Courses', 'Discount', 'Duration', 'Fee']
Courses
Fee
Duration
Discount
The Column Header : ['Courses', 'Fee', 'Duration', 'Discount']
['Fee', 'Discount']
['Fee', 'Discount']


Loading data from python arrays

In [49]:
# Example Python program that makes a JSON
# string from a pandas DataFrame object

# Field names
tripFields = ("from", "to", "duration", "distance", "through", "start");

# Data
trips   = (("San Jose", "Mountain View", 17, 13.6, "US-101 N", 1621602000),
           ("Mountain View", "Milpitas", 13, 11.2, "CA-237 E", 1621603200),
           ("Milpitas", "San Jose", 13, 8.0, ("I-880 S", "N 10th St"), 1621604100),
           ("San Jose", "Los Gatos", 16, 10.9, ("I-280 N", "CA-17 S"), 1621605300),
           ("Los Gatos", "Cupertino", 12, 8.7, "CA-85 N", 1621606500));

# Create a pandas DataFrame
tripData = pd.DataFrame(data=trips, columns=tripFields);
print("DataFrame:");
print(tripData);

# Convert the pandas DataFrame object to a JSON string
jsonString = tripData.to_json(orient="records");
print("DataFrame as JSON string:");
print(jsonString);

DataFrame:
            from             to  duration  distance               through  \
0       San Jose  Mountain View        17      13.6              US-101 N   
1  Mountain View       Milpitas        13      11.2              CA-237 E   
2       Milpitas       San Jose        13       8.0  (I-880 S, N 10th St)   
3       San Jose      Los Gatos        16      10.9    (I-280 N, CA-17 S)   
4      Los Gatos      Cupertino        12       8.7               CA-85 N   

        start  
0  1621602000  
1  1621603200  
2  1621604100  
3  1621605300  
4  1621606500  
DataFrame as JSON string:
[{"from":"San Jose","to":"Mountain View","duration":17,"distance":13.6,"through":"US-101 N","start":1621602000},{"from":"Mountain View","to":"Milpitas","duration":13,"distance":11.2,"through":"CA-237 E","start":1621603200},{"from":"Milpitas","to":"San Jose","duration":13,"distance":8.0,"through":["I-880 S","N 10th St"],"start":1621604100},{"from":"San Jose","to":"Los Gatos","duration":16,"distance":10

In [64]:
from typing import List
import json
 
 
class Student(object):
    def __init__(self, first_name: str, last_name: str):
        self.first_name = first_name
        self.last_name = last_name
 
 
class Team(object):
    def __init__(self, students: List[Student]):
        self.students = students
 
 
student1 = Student(first_name="Geeky", last_name="Guy")
student2 = Student(first_name="GFG", last_name="Rocks")
team = Team(students=[student1, student2])
 
# Serialization
json_data = json.dumps(team, default=lambda o: o.__dict__, indent=4)
print(json_data)
 
# Deserialization
decoded_team = Team(**json.loads(json_data))
print(decoded_team.students[0]['first_name'])

{
    "students": [
        {
            "first_name": "Geeky",
            "last_name": "Guy"
        },
        {
            "first_name": "GFG",
            "last_name": "Rocks"
        }
    ]
}
Geeky
