In [1]:
import pandas as pd
from pymed import PubMed

pubmed = PubMed(tool="PubMedSearcher", email="myemail@ccc.com")

def get_value(article, key):
    try:
        return article[key]
    except KeyError:
        print(f"No {key} found")
        return None

## PUT YOUR SEARCH TERM HERE ##
search_term = "Asthma"
results = pubmed.query(search_term, max_results=100)
articleList = []
articleInfo = []

for article in results:
# Print the type of object we've found (can be either PubMedBookArticle or PubMedArticle).
# We need to convert it to dictionary with available function

    articleDict = article.toDict()
    articleList.append(articleDict)

for article in articleList:
    pubmedId = article['pubmed_id'].partition('\n')[0]

    # Extract values for different keys
    conclusions = get_value(article, 'conclusions')
    methods = get_value(article, 'methods')
    results = get_value(article, 'results')
    copyrights = get_value(article, 'copyrights')
    doi = get_value(article, 'doi')

    # Append the information to articleInfo
    articleInfo.append({
        u'authors': get_value(article, 'authors'),
        u'title': get_value(article, 'title'),
        u'publication_date': get_value(article, 'publication_date'),
        u'abstract': get_value(article, 'abstract'),
        u'conclusions': get_value(article, 'conclusions'),
        u'methods': get_value(article, 'methods'),
        u'results': get_value(article, 'results'),
        u'pubmed_id': pubmedId,
        # u'journal': get_value(article, 'journal'),
        #u'copyrights': get_value(article, 'copyrights'),
        u'doi': get_value(article, 'doi'),
            })


# Generate Pandas DataFrame from list of dictionaries
articlesPD = pd.DataFrame.from_dict(articleInfo)
export_csv = articlesPD.to_csv (r'/Users/dominikpichler/Documents/Pet Projects/Athene/storage_paper_scraper/test.csv', index = None, header=True) 

#Print first 10 rows of dataframe
print(articlesPD.head(10))

                                             authors  \
0  [{'lastname': 'Wang', 'firstname': 'Jian', 'in...   
1  [{'lastname': 'Wang', 'firstname': 'YuanYing',...   
2  [{'lastname': 'Ezhuthachan', 'firstname': 'Idi...   
3  [{'lastname': 'Wechsler', 'firstname': 'Michae...   
4  [{'lastname': 'Kasahara', 'firstname': 'Taissa...   
5  [{'lastname': 'Robu Popa', 'firstname': 'Danie...   
6  [{'lastname': 'Yasuda', 'firstname': 'Yuto', '...   
7  [{'lastname': 'Masini', 'firstname': 'Alice', ...   
8  [{'lastname': 'Zhu', 'firstname': 'Zhihua', 'i...   
9  [{'lastname': 'Sasaki', 'firstname': 'Mari', '...   

                                               title publication_date  \
0  Risk prediction model construction for asthma ...       2024-02-23   
1  Causal Association Between Allergic Diseases a...       2024-02-23   
2  The Future of Food Allergy Management: Advance...       2024-02-23   
3  Benralizumab versus Mepolizumab for Eosinophil...       2024-02-23   
4                 

In [2]:


import pandas as pd
from itertools import combinations

df_names = pd.DataFrame(columns=['Name 1', 'Name 2','pubmedId'])

for index, row in articlesPD.iterrows():
    try:
        data = row['authors']

        # Extracting first and last names
        names = [(d['firstname'], d['lastname']) for d in data]

        # Merge first and last names
        full_names = [' '.join(name) for name in names]

        # Generate all combinations of two names
        name_combinations = list(combinations(full_names, 2))

        # Creating a DataFrame with combinations
        df_combinations = pd.DataFrame(name_combinations, columns=['Name 1', 'Name 2'])
        df_combinations['pubmedId'] = row['pubmed_id']
        
        df_names = pd.concat([df_names, df_combinations], axis=0, ignore_index=True)


    except:
        continue
(df_names)


Unnamed: 0,Name 1,Name 2,pubmedId
0,Jian Wang,Tao Jiang,38394538
1,Jian Wang,Jian-Dao Hu,38394538
2,Tao Jiang,Jian-Dao Hu,38394538
3,YuanYing Wang,ShiHao Wang,38393908
4,YuanYing Wang,JiaXin Wu,38393908
...,...,...,...
3230,Martijn A Spruit,Lowie E G W Vanfleteren,38373596
3231,Martijn A Spruit,Anouk W Vaes,38373596
3232,Dirk-Jan Slebos,Lowie E G W Vanfleteren,38373596
3233,Dirk-Jan Slebos,Anouk W Vaes,38373596


In [3]:
from py2neo import Graph, Node, Relationship
import pandas as pd

graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))

def store_df_to_neo4j(df) -> Exception: 

    df.columns = ['Person', 'Node2', 'Relationship']
    try:
        for index, row in df.iterrows():
            node1 = Node("Node", name=row['Node1'])  # Assuming your node label is "Node"
            node2 = Node("Node", name=row['Node2'])
            relationship_type = row['Relationship']
            relationship = Relationship(node1, relationship_type, node2)
            graph.create(node1 | node2 | relationship)
        return "Success"

    except Exception as e:
        print(e)
        return e

In [4]:
df = articlesPD[['pubmed_id', 'title', 'conclusions']]
df

Unnamed: 0,pubmed_id,title,conclusions
0,38394538,Risk prediction model construction for asthma ...,Subcluster T effector cells may play a key rol...
1,38393908,Causal Association Between Allergic Diseases a...,
2,38393624,The Future of Food Allergy Management: Advance...,
3,38393328,Benralizumab versus Mepolizumab for Eosinophil...,
4,38392874,IgD,
...,...,...,...
95,38373824,Airway epithelial cell response to RSV is most...,Our results highlight that the response to RSV...
96,38373823,Type-2 inflammation: a key treatable trait ass...,
97,38373820,"Respiratory symptoms, exacerbations and sleep ...",Individuals with self-reported asthma with CAL...
98,38373665,Jiawei Yanghe Decoction attenuate allergic air...,Our results suggested that the mechanisms of J...


In [5]:
print((store_df_to_neo4j(df_names)))

'Node1'
'Node1'


In [6]:
import pandas as pd
from itertools import combinations

data = [{'lastname': 'Sasaki', 'firstname': 'Mari', 'initials': 'M', 'affiliation': "University Children's Hospital Z√ºrich, Z√ºrich, Switzerland.\nDivision of Respiratory Medicine and Allergology, Department of Paediatrics, Inselspital, University of Bern, Bern, Switzerland."}, {'lastname': 'Suaini', 'firstname': 'Noor H A', 'initials': 'NHA', 'affiliation': 'Singapore Institute for Clinical Sciences (SICS), Agency for Science, Technology and Research (A*STAR), Singapore, Singapore.'}, {'lastname': 'Afghani', 'firstname': 'Jamie', 'initials': 'J', 'affiliation': 'Environmental Medicine Faculty of Medicine, University of Augsburg, Augsburg, Germany.\nZIEL-Institute for Food and Health, Technical University of Munich, Freising, Germany.\nInstitute of Environmental Medicine, Environmental Health Centre, Helmholtz Munich - German Research Centre for Environmental Health (GmbH), Neuherberg, Germany.'}, {'lastname': 'Heye', 'firstname': 'Kristina N', 'initials': 'KN', 'affiliation': "Children's Hospital of Eastern Switzerland, St. Gallen, Switzerland."}, {'lastname': "O'Mahony", 'firstname': 'Liam', 'initials': 'L', 'affiliation': 'Department of Medicine, University College Cork, National University of Ireland, Cork, Ireland.\nAPC Microbiome Ireland, University College Cork, National University of Ireland, Cork, Ireland.\nSchool of Microbiology, University College Cork, National University of Ireland, Cork, Ireland.'}, {'lastname': 'Venter', 'firstname': 'Carina', 'initials': 'C', 'affiliation': 'Pediatric Allergy and Immunology, University of Colorado/Childrens Hospital Colorado, Aurora, Colorado, USA.'}, {'lastname': 'Lauener', 'firstname': 'Roger', 'initials': 'R', 'affiliation': "Children's Hospital of Eastern Switzerland, St. Gallen, Switzerland.\nChristine K√ºhne Center for Allergy Research and Education (CK-CARE), Davos, Switzerland."}, {'lastname': 'Frei', 'firstname': 'Remo', 'initials': 'R', 'affiliation': 'Division of Respiratory Medicine and Allergology, Department of Paediatrics, Inselspital, University of Bern, Bern, Switzerland.\nChristine K√ºhne Center for Allergy Research and Education (CK-CARE), Davos, Switzerland.\nDepartment of Biomedical Research, University of Bern, Bern, Switzerland.'}, {'lastname': 'Roduit', 'firstname': 'Caroline', 'initials': 'C', 'affiliation': "University Children's Hospital Z√ºrich, Z√ºrich, Switzerland.\nDivision of Respiratory Medicine and Allergology, Department of Paediatrics, Inselspital, University of Bern, Bern, Switzerland.\nChildren's Hospital of Eastern Switzerland, St. Gallen, Switzerland.\nChristine K√ºhne Center for Allergy Research and Education (CK-CARE), Davos, Switzerland."}]

# Extracting first and last names
names = [(d['firstname'], d['lastname']) for d in data]

# Merge first and last names
full_names = [' '.join(name) for name in names]

# Generate all combinations of two names
name_combinations = list(combinations(full_names, 2))

# Creating a DataFrame with combinations
df_combinations = pd.DataFrame(name_combinations, columns=['Name 1', 'Name 2'])

print(df_combinations)


             Name 1           Name 2
0       Mari Sasaki  Noor H A Suaini
1       Mari Sasaki    Jamie Afghani
2       Mari Sasaki  Kristina N Heye
3       Mari Sasaki    Liam O'Mahony
4       Mari Sasaki    Carina Venter
5       Mari Sasaki    Roger Lauener
6       Mari Sasaki        Remo Frei
7       Mari Sasaki  Caroline Roduit
8   Noor H A Suaini    Jamie Afghani
9   Noor H A Suaini  Kristina N Heye
10  Noor H A Suaini    Liam O'Mahony
11  Noor H A Suaini    Carina Venter
12  Noor H A Suaini    Roger Lauener
13  Noor H A Suaini        Remo Frei
14  Noor H A Suaini  Caroline Roduit
15    Jamie Afghani  Kristina N Heye
16    Jamie Afghani    Liam O'Mahony
17    Jamie Afghani    Carina Venter
18    Jamie Afghani    Roger Lauener
19    Jamie Afghani        Remo Frei
20    Jamie Afghani  Caroline Roduit
21  Kristina N Heye    Liam O'Mahony
22  Kristina N Heye    Carina Venter
23  Kristina N Heye    Roger Lauener
24  Kristina N Heye        Remo Frei
25  Kristina N Heye  Caroline Roduit
2

In [7]:
df_names.columns = ['Node1', 'Node2', 'Relationship']


nodes = pd.concat([df_names['Node1'], df_names['Node2']], axis=0).unique()
nodes.shape
nodes

array(['Jian Wang', 'Tao Jiang', 'YuanYing Wang', 'ShiHao Wang',
       'JiaXin Wu', 'XinLian Liu', 'Idil D Ezhuthachan',
       'Michele Beaudoin', 'Anna Nowak-Wegrzyn', 'Taissa de M Kasahara',
       'Daniela Robu Popa', 'Oana Elena Melinte', 'Mona-Elisabeta Dobrin',
       'Andrei Tudor Cernomaz', 'Cristina Grigorescu',
       'Alexandra Floriana Nemes', 'Doina Adina Todea',
       'Damiana Maria Vulturar', 'Ionela Alina Grosu-Creangă',
       'Tiberiu Lunguleac', 'Yuto Yasuda', 'Lu Wang', 'Pasquale Chitano',
       'Alice Masini', 'Laura Dallolio', 'Francesco Sanmarchi',
       'Francesco Lovecchio', 'Mario Falato', 'Yari Longobucco',
       'Marcello Lanari', 'Zhihua Zhu', 'Mark Naunton', 'Reza Mortazavi',
       'Mari Sasaki', 'Noor H A Suaini', 'Jamie Afghani',
       'Kristina N Heye', "Liam O'Mahony", 'Carina Venter',
       'Roger Lauener', 'Remo Frei', 'Sima Beigoli', 'Fatemeh Amin',
       'Hamideh Kazemi Rad', 'Ramin Rezaee', 'Kimil Acosta-Pagán',
       'Benjamín Bolaños-

In [8]:
import networkx as nx
G = nx.Graph()




## Add nodes to the graph
for node in nodes:
    G.add_node(
        str(node)
    )

## Add edges to the graph
for index, row in df_names.iterrows():
    G.add_edge(
        str(row["Node1"]),
        str(row["Node2"]),
        title=row["Relationship"] )       #weight=row['count']/2)

In [9]:
'''communities_generator = nx.community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
communities = sorted(map(sorted, next_level_communities))
print("Number of Communities = ", len(communities))
print(communities)
'''

'communities_generator = nx.community.girvan_newman(G)\ntop_level_communities = next(communities_generator)\nnext_level_communities = next(communities_generator)\ncommunities = sorted(map(sorted, next_level_communities))\nprint("Number of Communities = ", len(communities))\nprint(communities)\n'

In [10]:

'''
import seaborn as sns
import random
palette = "hls"

## Now add these colors to communities and make another dataframe
def colors2Community(communities) -> pd.DataFrame:
    ## Define a color palette
    p = sns.color_palette(palette, len(communities)).as_hex()
    random.shuffle(p)
    rows = []
    group = 0
    for community in communities:
        color = p.pop()
        group += 1
        for node in community:
            rows += [{"node": node, "color": color, "group": group}]
    df_colors = pd.DataFrame(rows)
    return df_colors


colors = colors2Community(communities)
colors

'''

'\nimport seaborn as sns\nimport random\npalette = "hls"\n\n## Now add these colors to communities and make another dataframe\ndef colors2Community(communities) -> pd.DataFrame:\n    ## Define a color palette\n    p = sns.color_palette(palette, len(communities)).as_hex()\n    random.shuffle(p)\n    rows = []\n    group = 0\n    for community in communities:\n        color = p.pop()\n        group += 1\n        for node in community:\n            rows += [{"node": node, "color": color, "group": group}]\n    df_colors = pd.DataFrame(rows)\n    return df_colors\n\n\ncolors = colors2Community(communities)\ncolors\n\n'

In [11]:
'''for index, row in colors.iterrows():
    G.nodes[row['node']]['group'] = row['group']
    G.nodes[row['node']]['color'] = row['color']
    G.nodes[row['node']]['size'] = G.degree[row['node']]'''

"for index, row in colors.iterrows():\n    G.nodes[row['node']]['group'] = row['group']\n    G.nodes[row['node']]['color'] = row['color']\n    G.nodes[row['node']]['size'] = G.degree[row['node']]"

In [12]:
from pyvis.network import Network

graph_output_directory = "/Users/dominikpichler/Documents/Pet Projects/Athene/docs/index.html"

net = Network(
    notebook=False,
    # bgcolor="#1a1a1a",
    cdn_resources="remote",
    height="900px",
    width="100%",
    select_menu=True,
    # font_color="#cccccc",
    filter_menu=False,
)

net.from_nx(G)
# net.repulsion(node_distance=150, spring_length=400)
net.force_atlas_2based(central_gravity=0.015, gravity=-31)
# net.barnes_hut(gravity=-18100, central_gravity=5.05, spring_length=380)
net.show_buttons(filter_=["physics"])

net.show(graph_output_directory, notebook=False)

/Users/dominikpichler/Documents/Pet Projects/Athene/docs/index.html
