In [1]:
import pandas as pd
from neo4j import GraphDatabase

## Connecting to a Sandbox Instance

The following class will handle the connection and queries for us.  We will need to get the IP address of our Sandbox instance as well as the password and provide these in the subsequent cell.

Prior to running this notebook you should have populated the database using the queries in `./cypher_queries/populate_db.cql`.

In [2]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [None]:
uri = ''
pwd = ''

conn = Neo4jConnection(uri=uri, user="neo4j", pwd=pwd)
result = conn.query('MATCH (n) RETURN COUNT(n) AS count')

print('One way to get results back: ', result)
print('Another way: ', result[0]['count'])

### Now let's count the number of relationships...

In [4]:
result = conn.query('MATCH ()-[r]->() RETURN COUNT(r) AS count')

print('Number of relationships: ', result[0]['count'])

Number of relationships:  73954


### Measure the inDegree of flight routes

In [15]:
in_degree_query = """MATCH (a:Airport)
                     WITH a, SIZE(()-[:HAS_ROUTE]->(a)) AS in_degree
                     RETURN a.city, in_degree
                     ORDER BY in_degree DESC
                     LIMIT 10
                  """

in_degree_df = pd.DataFrame([dict(_) for _ in conn.query(in_degree_query)])
in_degree_df.head(10)

Unnamed: 0,a.city,in_degree
0,Frankfurt,303
1,Paris,291
2,Amsterdam,280
3,Istanbul,268
4,Munich,265
5,Chicago,257
6,Dallas,248
7,Atlanta,242
8,Dubai,237
9,London,226


### Measure the total degree of flight routes

In [16]:
total_degree_query = """MATCH (a:Airport)
                        WITH a, SIZE(()-[:HAS_ROUTE]-(a)) AS total_degree
                        RETURN a.city, total_degree
                        ORDER BY total_degree DESC
                        LIMIT 10
                     """

total_degree_df = pd.DataFrame([dict(_) for _ in conn.query(total_degree_query)])
total_degree_df.head(10)

Unnamed: 0,a.city,total_degree
0,Frankfurt,610
1,Paris,584
2,Istanbul,575
3,Amsterdam,562
4,Munich,535
5,Chicago,521
6,Dallas,499
7,Dubai,484
8,Atlanta,484
9,Beijing,469


### "6 Degree of Kevin Bacon"

Let's look at how many airports are exactly 1, exactly 2, and then 1 or 2 hops from a target airport.  Be sure to compare this to the equivalent SQL queries!

In [18]:
one_hop_query = "MATCH (a:Airport {iata: 'DEN'})-[:HAS_ROUTE]->(a2) RETURN COUNT(DISTINCT a2)"
two_hop_query = "MATCH (a:Airport {iata: 'DEN'})-[:HAS_ROUTE*2]->(a2) RETURN COUNT(DISTINCT a2)"
one_or_two_query = "MATCH (a:Airport {iata: 'DEN'})-[:HAS_ROUTE*1..2]->(a2) RETURN COUNT(DISTINCT a2)"

print('Number of airports within exactly 1 hop: ', conn.query(one_hop_query))
print('Number of airports within exactly 2 hops: ', conn.query(two_hop_query))
print('Number of airports within 1 or 2 hops: ', conn.query(one_or_two_query))

Number of airports within exactly 1 hop:  [<Record COUNT(DISTINCT a2)=216>]
Number of airports within exactly 2 hops:  [<Record COUNT(DISTINCT a2)=1218>]
Number of airports within 1 or 2 hops:  [<Record COUNT(DISTINCT a2)=1231>]
