In [2]:
from pathlib import Path
import pandas as pd
import yaml
from neo4j_lib import NeoApp
from IPython.display import display


In [3]:
# connect to neo4j
with Path("neo4j_config.yaml").open() as nc:
    neo_config = yaml.safe_load(nc)
    
neo_con = NeoApp(neo_config["uri"], neo_config["user"], neo_config["password"])

In [4]:
# 1. Who is most productive researcher writing papers on koalas?

query="""
MATCH (p)<-[:AUTHORED]-(a)
WITH a, COLLECT(p) as papers
ORDER BY SIZE(papers) DESC
RETURN a.name, SIZE(papers)
"""
response=neo_con.query(query)
pd.DataFrame(response).head(5)

Unnamed: 0,a.name,SIZE(papers)
0,Peter Timms,90
1,Carel Thijs,57
2,Adam Polkinghorne,55
3,P J Canfield,31
4,Stephen D Johnston,25


In [5]:
# 2. What is the most common topic in publications on koalas?
# find most common keywords

query="""
MATCH (p)-[:HAS_KEYWORD]->(k)
WITH k, COLLECT(p) as papers
ORDER BY SIZE(papers) DESC LIMIT 20
RETURN k.name, SIZE(papers)
"""
response=neo_con.query(query)
pd.DataFrame(response).head(10)

Unnamed: 0,k.name,SIZE(papers)
0,koala,66
1,Koala,51
2,Phascolarctos cinereus,41
3,Chlamydia,32
4,Chlamydia pecorum,20
5,koalas,17
6,koala retrovirus,14
7,KoRV,12
8,marsupial,11
9,Australia,8


In [32]:
# 3. What are diseases that affect the animal?
# return top 5 most common diseases and their paper count

query="""
MATCH (p)-[:ABOUT_DISEASE]->(d)
WITH d, COLLECT(p) as papers
ORDER BY SIZE(papers) DESC LIMIT 5
RETURN d.name, SIZE(papers)
"""
response=neo_con.query(query)
display(pd.DataFrame(response).head(10))

# 4. What chemical factors are related to these diseases?
query="""
MATCH (p)-[:ABOUT_DISEASE]->(d)
WITH d, COLLECT(p) as papers
WITH d ORDER BY SIZE(papers) DESC LIMIT 10
WITH collect(d) AS top_dis
UNWIND top_dis as d
MATCH (p)-[:ABOUT_DISEASE]->(d)
WITH d, COLLECT(p) as papers
UNWIND papers as p
MATCH (p)-[:ABOUT_CHEMICAL]->(c)
return c.name, COUNT(c) as c_cnt
ORDER BY c_cnt DESC
"""
response=neo_con.query(query)
display(pd.DataFrame(response).head(10))

Unnamed: 0,d.name,SIZE(papers)
0,infection,100
1,infections,53
2,Chlamydia,49
3,chlamydiosis,32
4,chlamydial infection,31


Unnamed: 0,c.name,c_cnt
0,amino acid,7
1,ivermectin,4
2,testosterone,4
3,doxycycline,4
4,enrofloxacin,4
5,florfenicol,4
6,ionomycin,3
7,beta lactam,3
8,Penicillin,3
9,penicillin G,3


In [7]:
# 5. What are the rare diseases?
# return 5 least common diseases
query="""
MATCH (p)-[:ABOUT_DISEASE]->(d)
WITH d, COLLECT(p) as papers
ORDER BY SIZE(papers) ASC LIMIT 5
RETURN d.name, SIZE(papers)
"""
response=neo_con.query(query)
pd.DataFrame(response).head(10)

Unnamed: 0,d.name,SIZE(papers)
0,ocular chlamydial disease,1
1,urogenital tract structural disease,1
2,secondary disease,1
3,chlamydial and KoRV infections,1
4,disease of the,1


In [9]:
# 6. How does the prevalence of these diseases change over time
# filter papers by date and rerun disease search

query="""
MATCH (p)-[:ABOUT_DISEASE]->(d)
WHERE p.date < 2010
WITH d, COLLECT(p) as papers
ORDER BY SIZE(papers) DESC LIMIT 10
RETURN d.name, SIZE(papers)
"""
response=neo_con.query(query)
display(pd.DataFrame(response).head(10))

query="""
MATCH (p)-[:ABOUT_DISEASE]->(d)
WHERE p.date >= 2010
WITH d, COLLECT(p) as papers
ORDER BY SIZE(papers) DESC LIMIT 10
RETURN d.name, SIZE(papers)
"""
response=neo_con.query(query)
pd.DataFrame(response).head(10)



Unnamed: 0,d.name,SIZE(papers)
0,infection,19
1,infections,14
2,conjunctivitis,10
3,inflammation,8
4,cryptococcosis,8
5,pneumonia,7
6,wheeze,7
7,chlamydial infections,7
8,atopic,7
9,chlamydial infection,6


Unnamed: 0,d.name,SIZE(papers)
0,infection,81
1,Chlamydia,45
2,infections,39
3,chlamydiosis,28
4,chlamydial disease,27
5,chlamydial infection,25
6,chlamydial,22
7,in,18
8,Chlamydia pecorum,17
9,leukemia,17


In [33]:
# 7. Find similar papers based on Keywords
query_pmid=32556174

query=f"""
MATCH (p:Paper {{pmid:{query_pmid}}})-[r:SIMILAR_KEYWORDS]-(p2:Paper)
return p.title,p2.title, p2.pmid
"""
response=neo_con.query(query)
pd.DataFrame(response).head(10)


Unnamed: 0,p.title,p2.title,p2.pmid
0,Helping koalas battle disease - Recent advance...,Molecular Diagnosis of Koala Retrovirus (KoRV)...,34065572
1,Helping koalas battle disease - Recent advance...,"Identification of A Novel Picorna-Like Virus, ...",30832350
2,Helping koalas battle disease - Recent advance...,Sequence variation of koala retrovirus transme...,25462343
3,Helping koalas battle disease - Recent advance...,A targeted approach to investigating immune ge...,35510793
4,Helping koalas battle disease - Recent advance...,Changes in Endogenous and Exogenous Koala Retr...,31243137
5,Helping koalas battle disease - Recent advance...,"Lymphoma, Koala Retrovirus Infection and Repro...",28942303
6,Helping koalas battle disease - Recent advance...,Pharmacokinetic Profile of Fentanyl in the Koa...,34944325
7,Helping koalas battle disease - Recent advance...,Unpacking the mechanisms captured by a correla...,26960136
8,Helping koalas battle disease - Recent advance...,Prevalence and clinical significance of koala ...,31162024
9,Helping koalas battle disease - Recent advance...,Is Chlamydia to Blame for Koala Reproductive C...,34578173


In [34]:
# 8. Find similar papers based on Diseases
query_pmid=32556174

query=f"""
MATCH (p:Paper {{pmid:{query_pmid}}})-[r:SIMILAR_DISEASE]->(p2:Paper)
return p.title,p2.title, p2.pmid
"""
response=neo_con.query(query)
pd.DataFrame(response).head(10)

Unnamed: 0,p.title,p2.title,p2.pmid
0,Helping koalas battle disease - Recent advance...,Assessment of anti-bovine IL4 and IFN gamma an...,15350745
1,Helping koalas battle disease - Recent advance...,Multilocus sequence analysis provides insights...,23740730
2,Helping koalas battle disease - Recent advance...,Novel molecular markers of Chlamydia pecorum g...,21496349
3,Helping koalas battle disease - Recent advance...,Prevalence of Chlamydia pecorum in Juvenile Ko...,29733767
4,Helping koalas battle disease - Recent advance...,Susceptibility to a sexually transmitted disea...,36043238
5,Helping koalas battle disease - Recent advance...,Novel sequence types of Chlamydia pecorum infe...,25647593
6,Helping koalas battle disease - Recent advance...,Koala Retrovirus in Northern Australia Shows a...,33472936
7,Helping koalas battle disease - Recent advance...,Preliminary characterisation of tumor necrosis...,23527290
8,Helping koalas battle disease - Recent advance...,Inbreeding and disease avoidance in a free-ran...,32470998
9,Helping koalas battle disease - Recent advance...,Molecular Dynamics and Mode of Transmission of...,29237837
