In [1]:
import json

In [2]:
# to load the data as json in the notebook using json

def load_data(filename):
    with open (filename, "r") as f :
        data = json.load(f)
    return data

In [6]:
data = load_data("1.txt")

In [7]:
data

{'users': [{'id': 1, 'name': 'Amit', 'friends': [2, 3], 'liked_pages': [101]},
  {'id': 2, 'name': 'Priya', 'friends': [1, 4], 'liked_pages': [102]},
  {'id': 3, 'name': 'Rahul', 'friends': [1], 'liked_pages': [101, 103]},
  {'id': 4, 'name': 'Sara', 'friends': [2], 'liked_pages': [104]}],
 'pages': [{'id': 101, 'name': 'Python Developers'},
  {'id': 102, 'name': 'Data Science Enthusiasts'},
  {'id': 103, 'name': 'AI & ML Community'},
  {'id': 104, 'name': 'Web Dev Hub'}]}

In [8]:
type(data)

dict

In [10]:
# writing a function to display the users and their connections
def dp_data(data):
    print("Users and their connections\n ")
    for user in data['users']:
        print(f"ID:-{user['id']} and {user['name']} is friends with: {user['friends']} and liked pages are {user['liked_pages']}")    
    print("\npages information")
    for page in data['pages']:
        print(f"{page['id']}: {page['name']}")
dp_data(data)

Users and their connections
 
ID:-1 and Amit is friends with: [2, 3] and liked pages are [101]
ID:-2 and Priya is friends with: [1, 4] and liked pages are [102]
ID:-3 and Rahul is friends with: [1] and liked pages are [101, 103]
ID:-4 and Sara is friends with: [2] and liked pages are [104]

pages information
101: Python Developers
102: Data Science Enthusiasts
103: AI & ML Community
104: Web Dev Hub


In [11]:
data2 = load_data("2.txt")
data2

{'users': [{'id': 1, 'name': 'Amit', 'friends': [2, 3], 'liked_pages': [101]},
  {'id': 2, 'name': 'Priya', 'friends': [1, 4], 'liked_pages': [102]},
  {'id': 3, 'name': '', 'friends': [1], 'liked_pages': [101, 103]},
  {'id': 4, 'name': 'Sara', 'friends': [2, 2], 'liked_pages': [104]},
  {'id': 5, 'name': 'Amit', 'friends': [], 'liked_pages': []}],
 'pages': [{'id': 101, 'name': 'Python Developers'},
  {'id': 102, 'name': 'Data Science Enthusiasts'},
  {'id': 103, 'name': 'AI & ML Community'},
  {'id': 104, 'name': 'Web Dev Hub'},
  {'id': 104, 'name': 'Web Development'}]}

In [12]:
#defining a funcs

def clean_data(data):
    
    # remove  suers with missing name
    data["users"] = [user for user in data["users"] if user["name"].strip()]

    # remove duplicate friends
    for user in data["users"]:
        user['friends'] = list(set(user['friends']))

    # remove in active users 
    data['users'] = [user for user in data['users'] if user['friends'] or user['liked_pages']]

    # remove duplicate pages
    unq_pages = {}
    for page in data['pages']:
        unq_pages[page['id']] = page
    data['pages'] = list(unq_pages.values())
    
    return(data)

In [13]:
#loading the data

data = json.load(open("2.txt"))
data = clean_data(data)
json.dump(data, open("cleaned_data2.json", "w"), indent=4)
print("data has been cleaned successfully")

data has been cleaned successfully


In [16]:
import json

def load_data(filename):
    with open(filename, "r") as f:
        return json.load(f)

def find_people_you_may_know(user_id, data):
    user_friends = {}
    for user in data['users']:
        user_friends[user['id']] = set(user['friends'])
        
    if user_id not in user_friends:
        return[]

    direct_friends = user_friends[user_id]
    suggestions = {}
    for friend in direct_friends:
        for mutual in user_friends[friend]:
            if mutual!=user_id and mutual not in direct_friends:
                # counting the mutual friends 
                suggestions[mutual] = suggestions.get(mutual, 0) + 1

    sorted_suggestions = sorted(suggestions.items(), key=lambda x: x[1], reverse=True)
    return [user_id for user_id, mutual_count in sorted_suggestions]

In [17]:
# loading the data

data = load_data("massive_data.json")
user_id = 1
recc = find_people_you_may_know(user_id, data)
print(recc)

[7, 8, 9, 10, 11, 12]


In [28]:
import json

def pages_you_might_like_names(user_id, data):
    # 1. Create a "Translation Map" (ID -> Name)
    # This turns the list of pages into a quick-lookup dictionary
    page_names = {page['id']: page['name'] for page in data['pages']}
    
    # 2. Map out what pages every user likes
    user_pages = {user['id']: set(user['liked_pages']) for user in data['users']}
    
    if user_id not in user_pages:
        return []
        
    user_liked_pages = user_pages[user_id]
    page_suggestion = {}

    # 3. Find people with similar interests
    for other_user_id, pages in user_pages.items():
        if other_user_id != user_id:
            # Check how many interests you share
            shared_count = len(user_liked_pages.intersection(pages))
            
            if shared_count > 0:
                for page_id in pages:
                    if page_id not in user_liked_pages:
                        # Add points based on shared interests
                        page_suggestion[page_id] = page_suggestion.get(page_id, 0) + shared_count
    
    # 4. Sort the suggestions by score
    sorted_ids = sorted(page_suggestion.items(), key=lambda x: x[1], reverse=True)
    
    # 5. Convert IDs to Names for the final output
    final_recommendations = []
    for page_id, score in sorted_ids:
        name = page_names.get(page_id, "Unknown Page")
        final_recommendations.append((name, score))
        
    return final_recommendations

In [29]:
# Load your massive data file
data = load_data("massive_data.json")

# Call the function (make sure the name matches!)
recommendations = pages_you_might_like(1, data)

print("Recommended Page IDs and their scores:")
print(recommendations)

Recommended Page IDs and their scores:
[(103, 2), (105, 1), (107, 1), (104, 0), (106, 0), (108, 0), (109, 0), (110, 0), (111, 0), (112, 0), (113, 0), (114, 0), (115, 0), (116, 0), (117, 0), (118, 0), (119, 0), (120, 0), (121, 0), (122, 0), (123, 0), (124, 0), (125, 0), (126, 0), (127, 0)]


In [30]:
# --- Run the test ---
data = json.load(open("massive_data.json"))
print(f"Top 3 suggestions for Amit (User 1):")
print(pages_you_might_like_names(1, data)[:3])

Top 3 suggestions for Amit (User 1):
[('AI & ML Community', 2), ('Blockchain Innovators', 1), ('Cloud Computing Pros', 1)]
