In [1]:
import json

In [2]:
print("Welcome to world of power programming")

Welcome to world of power programming


In [3]:
def load_data(filename):
    with open(filename,"r") as file:
        data=json.load(file)
    return data

In [4]:
def display_users(data):
    print("Users and Their Connections:\n")
    for user in data["users"]:
        print(f"{user['name']} (ID: {user['id']}) - Friends: {user['friends']} - Liked Pages: {user['liked_pages']}")
    print("\nPages:\n")
    for page in data["pages"]:
        print(f"{page['id']}: {page['name']}")

In [7]:
data = load_data("codebook_data.json")
display_users(data)

Users and Their Connections:

Amit (ID: 1) - Friends: [2, 3] - Liked Pages: [101]
Priya (ID: 2) - Friends: [1, 4] - Liked Pages: [102]
Rahul (ID: 3) - Friends: [1] - Liked Pages: [101, 103]
Sara (ID: 4) - Friends: [2] - Liked Pages: [104]

Pages:

101: Python Developers
102: Data Science Enthusiasts
103: AI & ML Community
104: Web Dev Hub


# Working with Incomplete Data

In [8]:
"""
User ID 3 has an empty name.
User ID 4 has a duplicate friend entry.
User ID 5 has no connections or liked pages (inactive user).
The pages list contains duplicate page IDs.
"""

'\nUser ID 3 has an empty name.\nUser ID 4 has a duplicate friend entry.\nUser ID 5 has no connections or liked pages (inactive user).\nThe pages list contains duplicate page IDs.\n'

In [10]:
"""
We will:

Remove users with missing names.
Remove duplicate friend entries.
Remove inactive users (users with no friends and no liked pages).
Deduplicate pages based on IDs.
"""

'\nWe will:\n\nRemove users with missing names.\nRemove duplicate friend entries.\nRemove inactive users (users with no friends and no liked pages).\nDeduplicate pages based on IDs.\n'

In [38]:
def clean_data(data):
    # Remove the missing name 
    data["users"]= [user for user in data["users"] if user["name"].strip()]

    # Remove the duplicate friends
    for user in data["users"]:
        user["friends"]= list(set(user["friends"]))

    # Remove inactive users (users with no friends and no liked pages)
    data["users"]= [user for user in data["users"] if len(user["friends"])>0 and len(user["liked_pages"])>0]

    #Remove duplicate page
    unique_page={}
    for page in data["pages"]:
        unique_page[page["id"]]=page
    data["pages"]=list(unique_page.value()]
        
    return data

In [39]:
data = json.load(open("codebook_incomplete_data.json"))
data = clean_data(data)
json.dump(data, open("cleaned_codebook_data.json", "w"), indent=4)
print("Data cleaned successfully!")

Data cleaned successfully!


In [40]:
print(json.dump(data, open("cleaned_codebook_data.json", "w"), indent=4))

None


In [41]:
print(data)

{'users': [{'id': 1, 'name': 'Amit', 'friends': [2, 3], 'liked_pages': [101]}, {'id': 2, 'name': 'Priya', 'friends': [1, 4], 'liked_pages': [102]}, {'id': 4, 'name': 'Sara', 'friends': [2], 'liked_pages': [104]}], 'pages': [{'id': 101, 'name': 'Python Developers'}, {'id': 102, 'name': 'Data Science Enthusiasts'}, {'id': 103, 'name': 'AI & ML Community'}, {'id': 104, 'name': 'Web Dev Hub'}, {'id': 104, 'name': 'Web Development'}]}
