<a href="https://colab.research.google.com/github/danb-neo4j/patient_journey/blob/main/patientJourney_gds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Patient Journey Demo: GDS Workflow

# Import Libraries

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

import matplotlib.pyplot as plt 
import seaborn as sns
sns.set_style('darkgrid')
sns.set_palette("colorblind")
sns.set(rc={'figure.figsize':(12,7)})

import os
import configparser
from IPython.display import Image

In [2]:
# install or import Neo4j GraphDataScience library
try: 
  from graphdatascience import GraphDataScience
  print('Successfully imported GraphDataScience')
except ModuleNotFoundError:
  !pip install graphdatascience
  from graphdatascience import GraphDataScience
  print('installed and imported GraphDataScience')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting graphdatascience
  Downloading graphdatascience-1.6-py3-none-any.whl (918 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m919.0/919.0 KB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting neo4j<6.0,>=4.4.2
  Downloading neo4j-5.6.0.tar.gz (171 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.2/171.2 KB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (pyproject.toml) ... [?25l[?25hdone
  Created wheel for neo4j: filename=neo4j-5.6.0-py3-none-any.whl size=237450 sha256=b8750a9dcb118413657ecf98c38e2e1be4288091aae66bdd0ba6e4c077bd90e6
 

# Mount Google Drive

In [3]:
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


# Connect to Neo4j Database

In [5]:
# os.getcwd()

In [4]:
os.chdir('drive/MyDrive/Colab Notebooks/gds_demos/patient_journey/')

In [6]:
# import utility function to connect to Neo4j DB
from neoUtils import read_neo4j_properties

In [7]:
# read in Neo4j host and authentication 
NEO4J_PROPERTIES_FILE = 'auth/patientJourney_auth.ini'
# NEO4J_PROPERTIES_FILE = 'patientJourney_auth.ini'
HOST, USERNAME, PASSWORD = read_neo4j_properties(NEO4J_PROPERTIES_FILE=NEO4J_PROPERTIES_FILE)

Using HOST, USERNAME, PASSWORD from .ini file


In [8]:
# instantiate GDS and connect to database 
gds = GraphDataScience(HOST, auth=(USERNAME, PASSWORD), aura_ds=True)

# confirm connection with gds version 
print('Neo4j GDS Version:', gds.version())

Neo4j GDS Version: 2.3.2+14


# Create Direct Relationships
We will use these relationships to create bi-partite graphs between patients and other nodes.

## Direct Patient to Encounter Type Relationship
Encounters are unique to the patient, so creating a direct link to the SNOMED_CT code to use for an aggregated bi-partite graph.

In [None]:
gds.run_cypher('''
MATCH (p:Patient)-[e:HAS_ENCOUNTER]-(en:Encounter)-[r:OF_TYPE]-(t:SNOMED_CT)
WITH p, t, COUNT(r) AS relCount
MERGE(p)-[s:ENCOUNTER_CODE]->(t)
SET s.relCount = relCount
RETURN count(DISTINCT s) AS newRels
''')

Unnamed: 0,newRels
0,50331


## Direct Patient to Procedure Type Relationship
Bypass the Encounter node to create a drect relationship from Patients to Procedure Types based upon the SNOMED_CT code.

In [None]:
gds.run_cypher('''
MATCH (p:Patient)-[e:HAS_ENCOUNTER]-(en:Encounter)-[r:HAS_PROCEDURE]-(t:SNOMED_CT)
WITH p, t, COUNT(r) AS relCount
MERGE(p)-[s:PROCEDURE_CODE]->(t)
SET s.relCount = relCount
RETURN count(DISTINCT s) AS newRels
''')

Unnamed: 0,newRels
0,93820


## Direct Patient to Drug Relationship
Bypass the Encounter node to create a direct relationship between Patients and the Drugs they are prescribed or administered. 

In [None]:
gds.run_cypher('''
MATCH (p:Patient)-[e:HAS_ENCOUNTER]-(en:Encounter)-[r:HAS_DRUG]-(d:Drug)
WITH p, d, COUNT(r) AS relCount
MERGE(p)-[s:DRUGS_PRESCRIBED]->(d)
SET s.relCount = relCount
RETURN count(DISTINCT s) AS newRels
''')

Unnamed: 0,newRels
0,36093


# Create Additional Statistics on Patient Nodes
Use the Degree Centrality algorithm to create Patient statistics related to:
* Encounters (Total and Emergency)
* Procedures
* Drugs (administered or prescribed)

## Create Encounter Projection

In [None]:
# specify projection config
node_projection = ['Patient', 'Encounter', 'Emergency', 'SNOMED_CT', 'Drug']
relationship_projection = {'HAS_ENCOUNTER': {'orientation': 'NATURAL'},
                           'ENCOUNTER_CODE': {'orientation': 'NATURAL', 'properties': 'relCount'},
                           'PROCEDURE_CODE': {'orientation': 'NATURAL', 'properties': 'relCount'},
                           'DRUGS_PRESCRIBED': {'orientation': 'NATURAL', 'properties': 'relCount'}
                           }

# generate graph projection 
g, _ = gds.graph.project(
          graph_name = 'patient-stats', 
          node_spec = node_projection, 
          relationship_spec = relationship_projection)

Loading:   0%|          | 0/100 [00:00<?, ?%/s]

In [None]:
# pd.DataFrame(_)

## Total Encounter Count

In [None]:
_ = gds.degree.write(G=g, 
                     nodeLabels = ['Patient', 'Encounter'],
                     relationshipTypes = ['HAS_ENCOUNTER'],
                     writeProperty='totalEncounters')

## Emergency Encounter Count

In [None]:
_ = gds.degree.write(G=g, 
                     nodeLabels=['Patient', 'Emergency'],
                     relationshipTypes = ['HAS_ENCOUNTER'],
                     writeProperty='emergencyEncounters')

## Procedures Count
Specify `relationshipWeightProperty` to add up the weights on each of the relationships. 

In [None]:
_ = gds.degree.write(G=g, 
                     nodeLabels=['Patient', 'SNOMED_CT'],
                     relationshipTypes = ['PROCEDURE_CODE'],
                     relationshipWeightProperty = 'relCount',
                     writeProperty='procedureCount')

## Drugs Count

In [None]:
_ = gds.degree.write(G=g, 
                     nodeLabels=['Patient', 'Drug'],
                     relationshipTypes = ['DRUGS_PRESCRIBED'],
                     relationshipWeightProperty = 'relCount',
                     writeProperty='drugCount')

# Embeddings: Patient to Encounter Types

## Create Bipartite Graph Projection

In [27]:
# specify projection config
node_projection = ['Patient','SNOMED_CT']
relationship_projection = {'ENCOUNTER_CODE': {'orientation': 'UNDIRECTED', 'properties': 'relCount'}}

# generate projection
g, _ = gds.graph.project(
          graph_name = 'patient-encounter', 
          node_spec = node_projection, 
          relationship_spec = relationship_projection)

## Embeddings from Patient-Encounter Bi-Partitie Graph

In [29]:
_ = gds.fastRP.write(G=g,
                     nodeLabels=['Patient','SNOMED_CT'],
                     relationshipTypes=['ENCOUNTER_CODE'], 
                     relationshipWeightProperty="relCount",
                     writeProperty="encounterEmbedding",
                     randomSeed=42,
                     embeddingDimension=128)

## Drop Graph Projection

In [30]:
# drop projection 
_ = g.drop()

# Embeddings: Patient to Procedure Types

## Create Bipartite Graph Projection

In [19]:
# specify projection config
node_projection = ['Patient','SNOMED_CT']
relationship_projection = {'PROCEDURE_CODE': {'orientation': 'UNDIRECTED', 'properties': 'relCount'}}

# generate projection
g, _ = gds.graph.project(
          graph_name = 'patient-procedure', 
          node_spec = node_projection, 
          relationship_spec = relationship_projection)

## Embeddings from Patient-Encounter Similarity Graph

In [20]:
_ = gds.fastRP.write(G=g,
                     nodeLabels=['Patient','SNOMED_CT'],
                     relationshipTypes=['PROCEDURE_CODE'], 
                     relationshipWeightProperty="relCount",
                     writeProperty="procedureEmbedding",
                     randomSeed=42,
                     embeddingDimension=128)

## Drop Graph Projection

In [21]:
# drop projection 
_ = g.drop()

# Embeddings: Patient to Drugs

## Create Bipartite Graph Projection

In [22]:
# specify projection config
node_projection = ['Patient','Drug']
relationship_projection = {'DRUGS_PRESCRIBED': {'orientation': 'UNDIRECTED', 'properties': 'relCount'}}

# generate projection
g, _ = gds.graph.project(
          graph_name = 'patient-drugs', 
          node_spec = node_projection, 
          relationship_spec = relationship_projection)

Loading:   0%|          | 0/100 [00:00<?, ?%/s]

## Embeddings from Patient-Encounter Similarity Graph

In [23]:
_ = gds.fastRP.write(G=g,
                     nodeLabels=['Patient','Drug'],
                     relationshipTypes=['DRUGS_PRESCRIBED'], 
                     relationshipWeightProperty="relCount",
                     writeProperty="drugsEmbedding",
                     randomSeed=42,
                     embeddingDimension=128)

## Drop Graph Projection

In [24]:
# drop projection 
_ = g.drop()

# Patient Clustering via KNN
Run KNN on Patient nodes leveraging prior statistics and embeddings.

## Create Graph Projection

In [32]:
g, _ = gds.graph.project(
    graph_name = 'patient-knn', 
    node_spec = ['Patient'], 
    nodeProperties = ['expenses',
                      'encounterEmbedding',
                      'procedureEmbedding',
                      'drugsEmbedding'],
    relationship_spec = '*'
)

In [33]:
pd.DataFrame(_)

Unnamed: 0,0
nodeProjection,"{'Patient': {'label': 'Patient', 'properties': {'procedureEmbedding': {'defaultValue': None, 'property': 'procedureEmbedding'}, 'drugsEmbedding': {'defaultValue': None, 'property': 'drugsEmbedding'}, 'encounterEmbedding': {'defaultValue': None, 'property': 'encounterEmbedding'}, 'expenses': {'defaultValue': None, 'property': 'expenses'}}}}"
relationshipProjection,"{'__ALL__': {'orientation': 'NATURAL', 'indexInverse': False, 'aggregation': 'DEFAULT', 'type': '*', 'properties': {}}}"
graphName,patient-knn
nodeCount,5885
relationshipCount,0
projectMillis,235


## Scale Non-Embedding Properties
*Using Mean scaler so that the values are between -1 and 1 like the embeddings.*

In [34]:
_ = gds.alpha.scaleProperties.mutate(G = g, 
                                 nodeProperties = ['expenses'],
                                 scaler = 'Mean',
                                 mutateProperty = 'scaledExpenses')

## Run KNN and Write to Graph

In [36]:
patientKNN_result = gds.knn.write(
                          G = g,
                          topK=25,
                          nodeProperties=['scaledExpenses',
                                          'encounterEmbedding',
                                          'procedureEmbedding',
                                          'drugsEmbedding'],
                          randomSeed=42,
                          concurrency=1,
                          writeRelationshipType="PATIENT-KNN",
                          writeProperty="similarity"
)


Knn:   0%|          | 0/100 [00:00<?, ?%/s]

In [37]:
pd.DataFrame(patientKNN_result)[0]['similarityDistribution']

{'p1': 0.5000019073486328,
 'max': 1.0000057220458984,
 'p5': 0.9115238189697266,
 'p90': 0.9966716766357422,
 'p50': 0.9810276031494141,
 'p95': 0.9979228973388672,
 'p10': 0.9425792694091797,
 'p75': 0.9922046661376953,
 'p99': 0.9990291595458984,
 'p25': 0.9656085968017578,
 'p100': 1.0000057220458984,
 'min': 0.4926319122314453,
 'mean': 0.964299832158506,
 'stdDev': 0.06893502697243697}

In [38]:
_ = g.drop()

## Create New Projection with Undirected Relationships

In [40]:
g, _ = gds.graph.project(
    graph_name = 'patient-knn-undirected', 
    node_spec = ['Patient'], 
    relationship_spec = {'PATIENT-KNN': {'orientation': 'UNDIRECTED'}},
    relationshipProperties = ['similarity'] 
)

## Identify Communities via Label Propagation

In [42]:
_ = gds.labelPropagation.write(G = g, 
                          relationshipWeightProperty = 'similarity',
                          writeProperty = 'knnCommunity')

In [43]:
pd.DataFrame(_)[0][['communityCount', 'communityDistribution']]

communityCount                                                                                                                                102
communityDistribution    {'p99': 310, 'min': 6, 'max': 370, 'mean': 57.69607843137255, 'p90': 117, 'p50': 44, 'p999': 370, 'p95': 132, 'p75': 69}
Name: 0, dtype: object

## Run Weighted Page Rank on Similarity Network

In [44]:
_ = gds.pageRank.write(G=g, 
                   relationshipTypes=['PATIENT-KNN'], 
                   relationshipWeightProperty = 'similarity',
                   writeProperty='knnSimilarityPageRank')

In [45]:
pd.DataFrame(_)[0]['centralityDistribution']

{'p99': 1.776144027709961,
 'min': 0.3652496337890625,
 'max': 2.326749801635742,
 'mean': 0.9612404213178533,
 'p90': 1.3508892059326172,
 'p50': 0.9157199859619141,
 'p999': 2.058591842651367,
 'p95': 1.483968734741211,
 'p75': 1.1192607879638672}