In [1]:
import pandas as pd
import networkx as nx

df = pd.read_csv("defined_articles.csv")

In [2]:
#creating a bipartite network for organisms and compounds

#read data from file and crate datastructures in lists
compound_nodes = []
organism_nodes = []
edges = []

#iterate over the rows of the dataframe
for index, row in df.iterrows():

    #get lists of substrates, products, and organisms
    substrates = row["Substrate"].split(", ")
    products = row["Product"].split(", ")
    organisms = row["Organisms"].split(", ")

    for substrate in substrates:
        compound_nodes.append(substrate)

        for organism in organisms:
            organism_nodes.append(organism)
            #add edge from the substrate to the organism
            edges.append((substrate, organism))
            
            for product in products:
                compound_nodes.append(product)
                #add edge from the organism to the product
                edges.append((organism, product))

#build network
bipartiteNetwork = nx.DiGraph()

for node in compound_nodes:
    s = 5 + compound_nodes.count(node)
    bipartiteNetwork.add_node(node, bipartite = 0, color = "red", size = s)

for node in organism_nodes:
    s = 5 + organism_nodes.count(node)
    bipartiteNetwork.add_node(node, bipartite = 1, shape = 'square', size = s)

bipartiteNetwork.add_edges_from(edges)

#visualise network

from pyvis.network import Network

nt = Network('1000px', '1800px', directed=True, select_menu=True)
nt.from_nx(bipartiteNetwork)
nt.show('bipartite_organisms_comounds.html')

In [3]:
#creating a bipartite network for organisms and compounds using the genus of the organisms

#read data from file and crate datastructures in dictionaries

compound_nodes = []
organism_nodes = []
edges = []

#iterate over the rows of the dataframe
for index, row in df.iterrows():

    #get lists of substrates, products, and organisms
    substrates = row["Substrate"].split(", ")
    products = row["Product"].split(", ")
    organisms = row["Organisms (genus)"].split(", ")

    for substrate in substrates:
        compound_nodes.append(substrate)

        for organism in organisms:
            organism_nodes.append(organism)
            #add edge from the substrate to the organism
            edges.append((substrate, organism))
            
            for product in products:
                compound_nodes.append(product)
                #add edge from the organism to the product
                edges.append((organism, product))

#build network
bipartiteNetwork_genus = nx.DiGraph()

for node in compound_nodes:
    s = 5 + compound_nodes.count(node)
    bipartiteNetwork_genus.add_node(node, bipartite = 0, color = "red", size = s)

for node in organism_nodes:
    s = 5 + organism_nodes.count(node)
    bipartiteNetwork_genus.add_node(node, bipartite = 1, shape = 'square', size = s)

bipartiteNetwork_genus.add_edges_from(edges)

#visualise network

from pyvis.network import Network

nt = Network('1000px', '1800px', directed=True)
nt.from_nx(bipartiteNetwork_genus)
nt.show('bipartite_organisms_genus_comounds.html')

In [None]:
#co-occurrence graph for organisms

#imports
import itertools as it
from collections import Counter

#read data from file and crate datastructures in lists
nodes = []
edges = []

#iterate over the rows of the dataframe
for index, row in df.iterrows():

    #get lists of organisms
    organisms = row["Organisms"].split(", ")

    #get a list of each edge
    co_occurrences = list(it.combinations(organisms, 2))

    #dd all the organisms the the node list
    nodes.extend(organisms)

    #add all the egdes to the global edge list
    edges.extend(co_occurrences)

    #add a self-loop if there is only a single organism listed
    if len(organisms) == 1:
        edges.append((organisms[0], organisms[0]))


#build network
coOccurrenceNetwork = nx.Graph()

#add nodes w/ size
for node in nodes:
    s = 5 + nodes.count(node)
    coOccurrenceNetwork.add_node(node, size = s)

#add edges w/weights
edge_counts = Counter(edges)
for e in edge_counts:
    u = e[0]
    v = e[1]
    c = edge_counts[e]
    coOccurrenceNetwork.add_edge(u, v, weight = c)

#visualise network

from pyvis.network import Network

nt = Network('1000px', '1800px', select_menu=True)
nt.from_nx(coOccurrenceNetwork)
nt.show('co_occurrence_network.html')