In [2]:
import json

# In this project, we will do industry level data analysis using python.

# function to load the data
def load(file: str) -> dict:
    with open(file, 'r') as f:
        temp = json.load(f)
    return temp


# function to print the data: name, friends, pages liked
def print_data(d: dict) -> None:
    for k in d:  # iterates over the keys: users, pages
        print(f"{k}:")
        for i in d[k]:  # iterate in the users/pages' values
            for j in i.keys():  # iterate in each user/page
                if j != 'id':
                    print(f"{j}: {i[j]}")
            print('---------------')

    
data = load("data.json")
print_data(data)


users:
name: Amit
friends: [2, 3]
liked_pages: [101]
---------------
name: Priya
friends: [1, 4]
liked_pages: [102]
---------------
name: Rahul
friends: [1]
liked_pages: [101, 103]
---------------
name: Sara
friends: [2]
liked_pages: [104]
---------------
pages:
name: Python Developers
---------------
name: Data Science Enthusiasts
---------------
name: AI & ML Community
---------------
name: Web Dev Hub
---------------


### Cleaning and structuring data

In [3]:
def clean(d: dict) -> dict:    
    # eliminate users with empty name, liked_pages, and friends
    d["users"] = [x for x in d["users"] if (x["name"].strip()) and (x["friends"]) and (x["liked_pages"])]

    # eliminate duplicate friends
    for i in d["users"]:
        i["friends"] = list(set(i["friends"]))

    # eliminate duplicate pages
    ids = []
    pages = []
    for i in d["pages"]:
        if i["id"] not in ids:
            ids.append(i["id"])
            pages.append(i)
    d["pages"] = pages
    return d
            
data2 = load("data2.json")
data2 = clean(data2)
with open("cleaned_json.json",'w') as f:
    json.dump(data2, f, indent = 4)

### People you may know

In [48]:
def suggest_people(user_id: int, users: dict) -> list:
    people = {x["id"]: x["friends"] for x in users}

    if user_id not in people.keys():
        return -1
        
    d_friends = set(people[user_id])
    suggestions = {}
    for x, y in people.items(): # iterate through other users
        if x in d_friends or x==user_id: # skip existing friends and user_id
            continue
            
        mutuals = d_friends.intersection(y) # get common friends
        
        if mutuals: # check if any
            suggestions[x] = len(mutuals)
    suggestions = [x[0] for x in sorted(suggestions.items(), key = lambda x: x[1], reverse=True)]
    return suggestions


data = load("massive_data.json")
data = suggest_people(1, data["users"])
data

[7, 8, 9, 10, 11, 12, 26, 27]

### Pages you may like

In [49]:
def suggest_pages(user_id: int, users: dict):
    pages = {x["id"]: set(x["liked_pages"]) for x in users} # get user pages
    if user_id not in pages: # check for wrong user_id
        return -1
        
    my_page = set(pages[user_id]) # get pages for user_id
    like_page = [] # list to store suggestions
    
    for i in pages: # iterate over other users' liked pages
        if i != user_id: # ignore the user_id
            cpages = my_page.intersection(pages[i]) # get the number of pages both like
            
            if cpages: # check for common pages liked.
                # ignore the common pages and suggest other pages liked by second user
                li = [x for x in pages[i] if x not in cpages] 
                
                like_page.append([li, len(cpages)]) # append the pages as well as the score. 
                # score = no. of common pages
                
    like_page = [x[0] for x in sorted(like_page, key=lambda x: x[1], reverse= True)] 
    return like_page # return the pages sorted in descending order with scores excluded.

data = load("massive_data.json")
data = suggest_pages(1, data["users"])
data

[[103], [103], [105], [107]]