In [2]:
import pandas as pd
from arango import ArangoClient

In [3]:
client = ArangoClient(hosts="https://2ae4f052d710.arangodb.cloud:8529")

db = client.db('machine_learning', username='lab_test', password='z-rRdN-Enf4qQwybGiVdbG')

## Definition

In [4]:
cursor = db.aql.execute(
        '''
            FOR transaction in transactions
                LET sender_country = DOCUMENT(transaction._from).country_id
                LET receiver_country = DOCUMENT(transaction._to).country_id
                COLLECT sender = sender_country, receiver = receiver_country into group
                RETURN {
                    _from: sender,
                    _to: receiver,
                    count: LENGTH(group),
                }
        '''
    )
results = cursor.batch()

In [4]:
if db.has_collection('graph_country_edges'):
    db.delete_collection('graph_country_edges')
    
edges = db.create_collection('graph_country_edges', edge=True)
edges.insert_many(results, silent=True)
edges.count()

249

In [9]:
nodes = db.collection('countries')

In [10]:
if  db.has_graph('graph_country'):
    db.delete_graph('graph_country')

graph = db.create_graph('graph_country')
graph.create_vertex_collection('countries')
graph.create_edge_definition(
    edge_collection='graph_country_edges',
    from_vertex_collections=['countries'],
    to_vertex_collections=['countries']
)

<EdgeCollection graph_country_edges>

# Centrality

## Degree

In [11]:
degree = db.aql.execute("""
    FOR v IN countries
        LET outbound_degree_centrality = LENGTH(
            FOR e IN graph_country_edges
                FILTER e._from == v._id
                RETURN e
        )
        LET inbound_degree_centrality = LENGTH(
            FOR e IN graph_country_edges
                FILTER e._to == v._id
                RETURN e
        )
        
        LET total_degree_centrality = outbound_degree_centrality + inbound_degree_centrality

        RETURN {
            country: v._key,
            inbound_degree: inbound_degree_centrality,
            outbound_degree: outbound_degree_centrality,
            total_degree: total_degree_centrality
        }
""").batch()
inbound_degree_countries = sorted(degree, key=lambda x: x['inbound_degree'], reverse=True)[:3]
outbound_degree_countries = sorted(degree, key=lambda x: x['outbound_degree'], reverse=True)[:3]
total_degree_countries = sorted(degree, key=lambda x: x['total_degree'], reverse=True)[:3]

In [12]:
inbound_degree_countries

[{'country': 'KZ',
  'inbound_degree': 60,
  'outbound_degree': 3,
  'total_degree': 63},
 {'country': 'IN',
  'inbound_degree': 40,
  'outbound_degree': 14,
  'total_degree': 54},
 {'country': 'MX',
  'inbound_degree': 36,
  'outbound_degree': 7,
  'total_degree': 43}]

In [13]:
outbound_degree_countries

[{'country': 'IN',
  'inbound_degree': 40,
  'outbound_degree': 14,
  'total_degree': 54},
 {'country': 'Other',
  'inbound_degree': 26,
  'outbound_degree': 13,
  'total_degree': 39},
 {'country': 'US',
  'inbound_degree': 34,
  'outbound_degree': 11,
  'total_degree': 45}]

In [14]:
total_degree_countries

[{'country': 'KZ',
  'inbound_degree': 60,
  'outbound_degree': 3,
  'total_degree': 63},
 {'country': 'IN',
  'inbound_degree': 40,
  'outbound_degree': 14,
  'total_degree': 54},
 {'country': 'US',
  'inbound_degree': 34,
  'outbound_degree': 11,
  'total_degree': 45}]

## Closeness

In [18]:
closeness_job_id = db.pregel.create_job(
    graph='graph_country',
    algorithm='effectivecloseness',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='effective_closeness'
)
closeness_aql = """
FOR country IN countries
    RETURN {"_key": country._key, "effective_closeness": ROUND(country.effective_closeness*100)/100}
"""
closeness = db.aql.execute(closeness_aql).batch()
sorted(closeness, key=lambda x: x['effective_closeness'], reverse=True)[:3]

[{'_key': 'CN', 'effective_closeness': 3.02},
 {'_key': 'AE', 'effective_closeness': 2.82},
 {'_key': 'CH', 'effective_closeness': 2.82}]

## Betweenness

In [29]:
betweeness_job_id = db.pregel.create_job(
    graph='graph_country',
    algorithm='linerank',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='line_rank'
)
betweeness_aql = """
FOR country IN countries
    RETURN {"_key": country._key, "line_rank": ROUND(country.line_rank*100)/100}
"""
betweeness = db.aql.execute(betweeness_aql).batch()
sorted(betweeness, key=lambda x: x['line_rank'], reverse=True)[:3]

[{'_key': 'KZ', 'linerank': 0.35},
 {'_key': 'IN', 'linerank': 0.28},
 {'_key': 'US', 'linerank': 0.22}]

## Pagerank

In [32]:
pagerank_job_id = db.pregel.create_job(
    graph='graph_country',
    algorithm='pagerank',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='page_rank'
)
pagerank_aql = """
FOR country IN countries
    RETURN {"_key": country._key, "page_rank": ROUND(country.page_rank*100)/100}
"""
pagerank = db.aql.execute(pagerank_aql).batch()
sorted(pagerank, key=lambda x: x['page_rank'], reverse=True)[:3]

[{'_key': 'KZ', 'page_rank': 0.17},
 {'_key': 'IN', 'page_rank': 0.12},
 {'_key': 'US', 'page_rank': 0.09}]