In [1]:
import pandas as pd
from collections import deque
from arango import ArangoClient

In [2]:
client = ArangoClient(hosts="https://2ae4f052d710.arangodb.cloud:8529")

db = client.db('machine_learning', username='lab_test', password='z-rRdN-Enf4qQwybGiVdbG')

## Definition

In [5]:
cursor = db.aql.execute(
        '''
            FOR transaction in transactions
                LET sender_organization = DOCUMENT(transaction._from).organization_id
                LET receiver_organization = DOCUMENT(transaction._to).organization_id
                COLLECT sender = sender_organization, receiver = receiver_organization into group
                RETURN {
                    _from: sender,
                    _to: receiver,
                    count: LENGTH(group),
                }
        '''
    )
results = []
for doc in cursor:
    results.append(doc)

In [19]:
if db.has_collection('graph_organization_edges'):
    db.delete_collection('graph_organization_edges')
    
edges = db.create_collection('graph_organization_edges', edge=True)
edges.insert_many(results, silent=False)
edges.count()

2106

In [22]:
nodes = db.collection('organizations')

In [23]:
if  db.has_graph('graph_organization'):
    db.delete_graph('graph_organization')

graph = db.create_graph('graph_organization')
graph.create_vertex_collection('organizations')
graph.create_edge_definition(
    edge_collection='graph_organization_edges',
    from_vertex_collections=['organizations'],
    to_vertex_collections=['organizations']
)

<EdgeCollection graph_organization_edges>

# Centrality

## Degree

In [24]:
degree_cursor = db.aql.execute("""
    FOR v IN organizations
        LET outbound_degree_centrality = LENGTH(
            FOR e IN graph_organization_edges
                FILTER e._from == v._id
                RETURN e
        )
        LET inbound_degree_centrality = LENGTH(
            FOR e IN graph_organization_edges
                FILTER e._to == v._id
                RETURN e
        )
        
        LET total_degree_centrality = outbound_degree_centrality + inbound_degree_centrality

        RETURN {
            organization: v._key,
            name: v.name,
            inbound_degree: inbound_degree_centrality,
            outbound_degree: outbound_degree_centrality,
            total_degree: total_degree_centrality
        }
""")
degree = []
for doc in degree_cursor:
    degree.append(doc)
inbound_degree_organizations = sorted(degree, key=lambda x: x['inbound_degree'], reverse=True)[:3]
outbound_degree_organizations = sorted(degree, key=lambda x: x['outbound_degree'], reverse=True)[:3]
total_degree_organizations = sorted(degree, key=lambda x: x['total_degree'], reverse=True)[:3]

In [25]:
inbound_degree_organizations

[{'organization': '66ebf2a3aa71c90b4df3fc93590e22db',
  'name': 'Tengizchevroil Llp',
  'inbound_degree': 845,
  'outbound_degree': 1,
  'total_degree': 846},
 {'organization': '4174ce1ae09c87cce8a4accaf940bc58',
  'name': 'Avery Dennison Converted Products De Mexico Sa De Cv',
  'inbound_degree': 135,
  'outbound_degree': 2,
  'total_degree': 137},
 {'organization': 'ed46d890f9c84ab5a1c90e01a760cfdc',
  'name': 'Contitech Fluid Mexicana Servicios, S.A. de C.V.',
  'inbound_degree': 117,
  'outbound_degree': 2,
  'total_degree': 119}]

In [26]:
outbound_degree_organizations

[{'organization': '1949468c1d7790b0f6db42511069ee7f',
  'name': 'ALTECO TECHNIK GMBH',
  'inbound_degree': 0,
  'outbound_degree': 5,
  'total_degree': 5},
 {'organization': '831a31a1466f0ace3eb20b52d4575f92',
  'name': 'Carboline (India) Private Limited',
  'inbound_degree': 4,
  'outbound_degree': 5,
  'total_degree': 9},
 {'organization': 'f4864ac3d5d716cc586d60afe8a403ef',
  'name': 'TREMCO CPG INDIA PRIVATE LIMITED',
  'inbound_degree': 5,
  'outbound_degree': 5,
  'total_degree': 10}]

In [27]:
total_degree_organizations

[{'organization': '66ebf2a3aa71c90b4df3fc93590e22db',
  'name': 'Tengizchevroil Llp',
  'inbound_degree': 845,
  'outbound_degree': 1,
  'total_degree': 846},
 {'organization': '4174ce1ae09c87cce8a4accaf940bc58',
  'name': 'Avery Dennison Converted Products De Mexico Sa De Cv',
  'inbound_degree': 135,
  'outbound_degree': 2,
  'total_degree': 137},
 {'organization': 'ed46d890f9c84ab5a1c90e01a760cfdc',
  'name': 'Contitech Fluid Mexicana Servicios, S.A. de C.V.',
  'inbound_degree': 117,
  'outbound_degree': 2,
  'total_degree': 119}]

## Closeness

In [31]:
closeness_job_id = db.pregel.create_job(
    graph='graph_organization',
    algorithm='effectivecloseness',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='effective_closeness'
)
closeness_aql = """
FOR organization IN organizations
    RETURN {"_key": organization._key, "name": organization.name, "effective_closeness": ROUND(organization.effective_closeness*100)/100}
"""
closeness = []
for doc in db.aql.execute(closeness_aql):
    closeness.append(doc)
sorted(closeness, key=lambda x: x['effective_closeness'], reverse=True)[:3]

[{'_key': '0f6bb7bf3256bffac6eb50bf549b2752',
  'name': 'Ad Fire Protection Systems Corp',
  'effective_closeness': 6.06},
 {'_key': '4bdea5a3d7e97a8efd8d586e7f682924',
  'name': 'GST970206G13 GRUPO STONCOR SA DE CV',
  'effective_closeness': 5.09},
 {'_key': '36d6acce681cb79780c2ccefdb9b2466',
  'name': 'Carboline Company',
  'effective_closeness': 4.11}]

## Betweenness

In [32]:
betweeness_job_id = db.pregel.create_job(
    graph='graph_organization',
    algorithm='linerank',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='line_rank'
)
betweeness_aql = """
FOR organization IN organizations
    RETURN {"_key": organization._key, "name": organization.name, "line_rank": ROUND(organization.line_rank*100)/100}
"""
betweeness = []
for doc in db.aql.execute(betweeness_aql):
    betweeness.append(doc)
sorted(betweeness, key=lambda x: x['line_rank'], reverse=True)[:3]

[{'_key': '66ebf2a3aa71c90b4df3fc93590e22db',
  'name': 'Tengizchevroil Llp',
  'line_rank': 0.11},
 {'_key': '48541ba40bcab069aa5d91c12a5299d0',
  'name': 'Stoncor Benelux B.V.',
  'line_rank': 0.05},
 {'_key': '4174ce1ae09c87cce8a4accaf940bc58',
  'name': 'Avery Dennison Converted Products De Mexico Sa De Cv',
  'line_rank': 0.02}]

## Pagerank

In [33]:
pagerank_job_id = db.pregel.create_job(
    graph='graph_organization',
    algorithm='pagerank',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='page_rank'
)
pagerank_aql = """
FOR organization IN organizations
    RETURN {"_key": organization._key, "name": organization.name, "page_rank": ROUND(organization.page_rank*1000)/1000}
"""
pagerank = []
for doc in db.aql.execute(pagerank_aql):
    pagerank.append(doc)
sorted(pagerank, key=lambda x: x['page_rank'], reverse=True)[:3]

[{'_key': '66ebf2a3aa71c90b4df3fc93590e22db',
  'name': 'Tengizchevroil Llp',
  'page_rank': 0.052},
 {'_key': '48541ba40bcab069aa5d91c12a5299d0',
  'name': 'Stoncor Benelux B.V.',
  'page_rank': 0.044},
 {'_key': '4174ce1ae09c87cce8a4accaf940bc58',
  'name': 'Avery Dennison Converted Products De Mexico Sa De Cv',
  'page_rank': 0.008}]