# Example queries on COVID-19 Knowledge Graph

This notebook shows how to run simple [Cypher](https://neo4j.com/developer/cypher-query-language/) queries on the knowledge graph.

In [1]:
import os
import time
import pandas as pd
from py2neo import Graph

### Setup Pandas parameters

In [2]:
pd.options.display.max_rows = None  # display all rows
pd.options.display.max_columns = None  # display all columsns

In [3]:
def make_clickable(val):
    return f'<a target="_blank" href="{val}">{val}</a>'

### Start Neo4j database

In [4]:
NEO4J_HOME = os.getenv('NEO4J_HOME')
print(NEO4J_HOME)

/Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14


In [5]:
!"$NEO4J_HOME"/bin/neo4j start

Active database: graph.db
Directories in use:
  home:         /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14
  config:       /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/conf
  logs:         /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/logs
  plugins:      /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/plugins
  import:       NOT SET
  data:         /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-993db298-6374-4f0a-9a9a-d0783480877a/installation-3.5.14/data
  certificates: /Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4

Wait until database is started up

In [11]:
# TODO check database status instead of waiting for 15 seconds. If steps below fail, run sleep command and try again.
time.sleep(15)

In [12]:
graph = Graph(password="neo4jbinder")

## Query graph about available Dashboards

### List COVID-19 Dashboards

In [8]:
query = """
MATCH (:Outbreak{name:'COVID-19'})-[:EXPLORE_IN]->(d:Dashboard)
RETURN d.name as name, d.description as description, d.url as url
"""
df = graph.run(query).to_data_frame()
df.style.format({'url': make_clickable})

ServiceUnavailable: Failed to establish connection to ('127.0.0.1', 7687) (reason [Errno 61] Connection refused)

### Find COVID-19 Dashboards for specific cities

In [None]:
query = """
MATCH (c:City)-[:EXPLORE_IN]-(d:Dashboard)
RETURN c.name as city, d.name as name, d.url as url
"""
df = graph.run(query).to_data_frame()
df.style.format({'url': make_clickable})

## Explore Strain Data

### List coronavirus outbreaks

In [None]:
query = """
MATCH (p:Pathogen)-[:CAUSES]->(o:Outbreak)
RETURN p.acronym as acronym, p.name as pathogen, p.taxonomy_id as taxonomy_id, o.name as outbreak, o.start_date as start_date
"""
graph.run(query).to_data_frame()

### List person demographics and strain information for California
Note, demographs have become unavailable recently (see: https://github.com/nextstrain/ncov/issues/251)

In [None]:
query = """
MATCH (a:Admin1)<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
WHERE a.name = 'California'
RETURN p.age as age, p.sex as sex, p.exposure_location as exposure_location, s.name as strain, s.clade as clade
"""
graph.run(query).to_data_frame()

#### Same query using parameterized Cypher
Parameters to Cypher queries can be passed as key-value arguments. Parameters in Cypher are named and are wrapped in curly braces.

In [None]:
admin1 = 'California'

query = """
MATCH (a:Admin1{name: {admin1}})<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
RETURN p.age as age, p.sex as sex, p.exposure_location as exposure_location, 
       s.name as strain, s.clade as clade, s.date as date
ORDER BY s.date
"""
graph.run(query, admin1=admin1).to_data_frame().head(100)

### Where did clade A originate?

In [None]:
clade = 'A'

query = """
MATCH (s:Strain)--(a:Country)
WHERE s.clade STARTS WITH {clade}
RETURN s.clade as clade, s.name, s.date, a.name
ORDER BY s.date
"""
graph.run(query, clade=clade).to_data_frame().head(100)

### Find persons that imported the virus from another location

In [None]:
query = """
MATCH (c:Admin1)<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
WHERE c.name <> p.exposure_location
RETURN c.name as `state/province`, p.age as age, p.sex as sex, p.exposure_location as exposure_location, 
       s.name as strain, s.clade as clade
ORDER BY p.exposure_location
"""
graph.run(query).to_data_frame()

### Strains in Sydney

In [None]:
city = 'Sydney'

query = """
MATCH (c:City{name: {city}})<-[:LOCATED_IN]-(p:Person)-[:CARRIES]->(s:Strain)
RETURN c.name as city, s.name as strain, s.clade as clade, p.exposure_location, s.date as date
ORDER BY s.date
"""
graph.run(query, city=city).to_data_frame()

In [None]:
### Stop Neo4j database when done

In [None]:
!"$NEO4J_HOME"/bin/neo4j stop