<a href="https://colab.research.google.com/github/arangoml/networkx-adapter/blob/dgl_updates/IMDB_Networkx_Adapter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%%capture
!git clone -b dgl_updates https://github.com/arangoml/networkx-adapter.git
!rsync -av networkx-adapter/examples/ ./ --exclude=.git
!pip3 install networkx
!pip3 install matplotlib
!pip3 install --index-url https://test.pypi.org/simple/ adbnx-adapter==0.0.0.2.5
!pip3 install pyarango
!pip3 install python-arango
!pip install node2vec

In [0]:
import oasis
con = oasis.getTempCredentials()

print()
print("https://{}:{}".format(con["hostname"], con["port"]))
print("Username: " + con["username"])
print("Password: " + con["password"])
print("Database: " + con["dbName"])


from adbnx_adapter.imdb_arangoDB_networkx_adapter import IMDBArangoDB_Networkx_Adapter
ma = IMDBArangoDB_Networkx_Adapter(conn = con)

In [0]:
import csv
import json
import requests
import sys
import oasis


from pyArango.connection import *
from pyArango.collection import Collection, Edges, Field
from pyArango.graph import Graph, EdgeDefinition
from pyArango.collection import BulkOperation as BulkOperation

In [0]:
# Connect to the temp database
conn = oasis.connect_pyarango(con)
db = conn[con["dbName"]] 

In [0]:
from pyArango.collection import Collection, Field
from pyArango.graph import Graph, EdgeDefinition


class Users(Collection):
    _fields = {
        "user_id": Field(),
#         "age": Field(),
#         "gender": Field()
    }
    
class Movies(Collection):
    _fields = {
        "movie_id": Field(),
#         "movie_title": Field(),
#         "release_data": Field()
    }

class Ratings(Edges): 
    _fields = {
        #user_id and item_id are encoded by _from, _to 
        "rating": Field(),
#         "timestamp": Field()
    }

class IMDBGraph(Graph) :
    _edgeDefinitions = [EdgeDefinition("Ratings", fromCollections=["Users"], toCollections=["Movies"])]
    _orphanedCollections = []

db.createCollection("Users")
db.createCollection("Movies")
db.createCollection("Ratings")
iMDBGraph = db.createGraph("IMDBGraph")

print("Collection/Graph Setup done.")

In [0]:
collection = db["Users"]
with BulkOperation(collection, batchSize=100) as col:
    with open('data/users.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='|')
        #Skip header
        next(reader)
        for row in reader:
            user_id,age,gender,occupation,zip = tuple(row)
            doc = col.createDocument()
            doc["_key"] = user_id
#             doc["age"] = age
#             doc["gender"] = gender
            doc.save()

collection = db["Movies"]
with BulkOperation(collection, batchSize=100) as col:
    with open('data/movies.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='|')
        #Skip header
        next(reader)
        for row in reader:
            movie_id, movie_title , release_date , video_release_date , url , unknown , action , adventure , animation , childrens , comedy , crime , documentary , drama , fantasy , noir , horror , musical , mystery , romance , scifi , thriller , war , western = tuple(row)
            doc = col.createDocument()
            doc["_key"] = movie_id
#             doc["movie_title"] = movie_title
#             doc["release_date"] = release_date
            doc.save()

collection = db["Ratings"]
with BulkOperation(collection, batchSize=1000) as col:
    with open('data/ratings.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='|')
        #Skip header
        next(reader)
        for row in reader:
            user_id,movie_id,rating,timestamp = tuple(row)
            doc = col.createDocument()
            doc["_from"] = "Users/"+user_id
            doc["_to"] = "Movies/"+movie_id
            doc["ratings"] = rating
#             doc["timestamp"] = timestamp
            doc.save()
        
print("Import Done")

In [0]:
imdb_attributes = { 'vertexCollections': {'Users': {},\
       'Movies': {}},\
                              'edgeCollections' : {'Ratings': {'_from', '_to', 'ratings'}}}

In [0]:
g = ma.create_networkx_graph(graph_name = 'IMDBGraph',  graph_attributes =   imdb_attributes)

In [0]:
g.nodes['Users/2']

In [0]:
g.nodes['Movies/4']

In [0]:
 m4v = [ t[0] for t in g.in_edges('Movies/4')]

In [0]:
from itertools import combinations
m4vucmb = list(combinations(m4v, 2))

In [0]:
import networkx as nx
gp = g.to_undirected()
jcp = nx.jaccard_coefficient(gp, m4vucmb)

In [0]:
gs = nx.DiGraph()
for u, v, p in jcp:
  gs.add_edge(u, v, weight = p)
  #print('(%s, %s) -> %.8f' % (u, v, p))

In [0]:
gs.number_of_edges()

In [0]:
from node2vec import Node2Vec
node2vec = Node2Vec(gs, dimensions=32, walk_length=100, num_walks=30, workers=4)

In [0]:
model = node2vec.fit(window=10, min_count=1, batch_words=4)

In [0]:
model.wv.most_similar(m4v[5])