In [1]:
import json
from collections import Counter
from matplotlib import pyplot as plt
import gc
import sys
import pickle
import os

In [None]:
if not os.path.exists('graphs'):
    os.makedirs('graphs')

## Initial Loading of User Data

In [2]:
jsons = []
with open("dataset/user.json") as f:
    for i, line in enumerate(f):
        jsons.append(json.loads(line))

In [8]:
friends = {}

for person in jsons:
    friends[person['user_id']] = person['friends']

In [16]:
print("Number of users:", "{:,}".format(len(friends.keys())))

Number of users: 1,183,362


### Pickling User friendships

In [18]:
pickle.dump(friends, open('graphs/users_friends.p', 'wb'))

### Loading User friendships

In [2]:
pkl_file = open('graphs/users_friends.p', 'rb')

data1 = pickle.load(pkl_file)

In [3]:
print( len( data1))

1183362


## Let's get graphic!
#### Loading graph_tool (C-based Python graph library). Looks like it is what anyone uses in Python

In [4]:
import sys
sys.path.insert(0,'/usr/local/lib/python3.6/site-packages')

In [5]:
from graph_tool.all import *

In [6]:
g = Graph()

### Creates mappings to and from user ID and a user's node index

In [7]:
user_to_index_map = {}
index_to_user_lookup = list(data1.keys())

for i, key in enumerate(data1.keys()):
    user_to_index_map[key] = i


In [8]:
vlist = g.add_vertex(len(index_to_user_lookup))

In [9]:
node_ids = g.new_vertex_property("string")   
for (i, key) in enumerate(index_to_user_lookup):
    node_ids[g.vertex(i)] = key

### Adding friendships to the graph 
##### And storing any users that haven't been created yet (these are users that we have no reviews or other information about)

In [None]:

for i, (key, friends) in enumerate(data1.items()):
    user_vert = g.vertex(user_to_index_map[key])
    for friend in friends:
        if friend not in user_to_index_map:
            user_to_index_map[friend] = len(index_to_user_lookup)
            index_to_user_lookup.append(friend)
            friend_vert = g.add_vertex(1)
            node_ids[friend_vert] = friend
            g.vertex_index[friend_vert] == user_to_index_map[friend]
        friend_vert = g.vertex(user_to_index_map[friend])
        g.add_edge(user_vert, friend_vert)

### Save the graph and the user_id to and from user_index mappings

##### Of real interest: The .gz file (which is custom to graph_tool) takes up 345MB of space. The standard .graphml file takes up 3.27GB of space! Let's use the .gz file in my humble opinion

In [20]:
g.save("graphs/friend_network.xml.gz")

In [21]:
with open('graphs/user_graph_mappings.p', 'wb') as f: 
    pickle.dump([user_to_index_map, index_to_user_lookup], f)

In [22]:
g.save("graphs/friend_network.graphml", fmt='graphml')