In [12]:
import pandas as pd
import plotly.graph_objects as go

df = pd.read_csv("defined_articles.csv")

In [31]:
import plotly.express as px
import random

def pick_colour(name):
   color_wheel = 'grey'
   #color_wheel = random.choice(px.colors.qualitative.Pastel)
   color_wheel = px.colors.qualitative.Alphabet[8]
   #if name == "Clostridium":
    #   color_wheel = px.colors.qualitative.Pastel[6]
   if name == "Escherichia":
       color_wheel = px.colors.qualitative.Pastel[1]
   #else:
   #   color_wheel = random.choice(px.colors.qualitative.Pastel)
   return color_wheel

In [153]:
#creating a new dataframe to use in building the sankey chart

#create an empty dataframe
sankey_data = pd.DataFrame({'Source': [],
                   'Target': [],
                   'Value': [],
                   'Color': []})

organism_colouring = dict()
colour_counter = 0

#iterate over the rows of the dataframe
for index, row in df.iterrows():

    #get lists of substrates, products, and organisms
    substrates = row["Substrate"].split(", ")
    organisms = row["Organisms"].split(", ")
    products = row["Product"].split(", ")

    #adds colour to each organism using the pick_colour function
    for organism in organisms:
        if organism not in organism_colouring:
            organism_colouring[organism] = pick_colour(organism)

    #add links from each substrate to each organism
    for substrate in substrates:
        for organism in organisms:
            sankey_data.loc[len(sankey_data.index)] = [substrate, organism, 1, organism_colouring[organism]]

    #add links from each organism to each substrate
    for organism in organisms:
        for product in products:
            sankey_data.loc[len(sankey_data.index)] = [organism, product, 1, organism_colouring[organism]]

In [154]:
#needed modification for building the plot

#get each unique source_target and a mapping to their index
#unique_source_target = list(pd.unique(sankey_data[['Source', 'Target']].values.ravel('K')))
unique_source_target = list(pd.unique(sankey_data[['Source', 'Target', 'Color']].values.ravel('K')))
mapping_dict = {k: v for v, k in enumerate(unique_source_target)}
sankey_data['Source'] = sankey_data['Source'].map(mapping_dict)
sankey_data['Target'] = sankey_data['Target'].map(mapping_dict)
sankey_dict = sankey_data.to_dict(orient='list')

#setting colours for the nodes
node_colours = [organism_colouring.get(i, "grey") for i in unique_source_target]

In [155]:
#create diagram
import plotly.express as px

fig = go.Figure(data=[go.Sankey(
    orientation = "h",
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = unique_source_target,
      #color = "grey"
      color = node_colours
    ),
    link = dict(
      source = sankey_dict["Source"],
      target = sankey_dict["Target"],
      value = sankey_dict["Value"],
      color = sankey_dict["Color"]
      #color = [px.colors.qualitative.Plotly[unique_source_target.index(i) % len(px.colors.qualitative.Plotly)] for i in unique_source_target]
  ))])

layout = dict(
        title = "Sankey Diagram for consortia uses",
    height = 850,
    font = dict(
      size = 8),)

#show figure
fig.update_layout(layout)
fig.show()

In [156]:
#save the figure as a .svg file
fig.write_image("plots/sankey_diagram.png")

In [28]:
#new sankey chart for genus only

#creating a new dataframe to use in building the sankey chart

#create an empty dataframe
sankey_data = pd.DataFrame({'Source': [],
                   'Target': [],
                   'Value': [],
                   'Color': []})

organism_colouring = dict()
colour_counter = 0

#iterate over the rows of the dataframe
for index, row in df.iterrows():

    #get lists of substrates, products, and organisms
    all_substrates = row["Substrate"].split(", ")
    organisms = row["Organisms"].split(", ")
    all_products = row["Product"].split(", ")

    substrates = [substrate_map[substrate] for substrate in all_substrates]
    products = [product_map[product] for product in all_products]


    #taking the first word to get the genus instead of the organisms
    genuses = [organism.split(' ')[0] for organism in organisms]

    #adds colour to each organism using the pick_colour function
    for genus in genuses:
        if genus not in organism_colouring:
            organism_colouring[genus] = pick_colour(genus)

    #add links from each substrate to each organism
    for substrate in substrates:
        for genus in genuses:
            sankey_data.loc[len(sankey_data.index)] = [substrate, genus, 1, organism_colouring[genus]]

    #add links from each organism to each substrate
    for genus in genuses:
        for product in products:
            sankey_data.loc[len(sankey_data.index)] = [genus, product, 1, organism_colouring[genus]]


#needed modification for building the plot

#get each unique source_target and a mapping to their index
#unique_source_target = list(pd.unique(sankey_data[['Source', 'Target']].values.ravel('K')))
unique_source_target = list(pd.unique(sankey_data[['Source', 'Target', 'Color']].values.ravel('K')))
mapping_dict = {k: v for v, k in enumerate(unique_source_target)}
sankey_data['Source'] = sankey_data['Source'].map(mapping_dict)
sankey_data['Target'] = sankey_data['Target'].map(mapping_dict)
sankey_dict = sankey_data.to_dict(orient='list')

#setting colours for the nodes
node_colours = [organism_colouring.get(i, "grey") for i in unique_source_target]

#create diagram
import plotly.express as px

fig = go.Figure(data=[go.Sankey(
    orientation = "h",
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = unique_source_target,
      #color = "grey"
      color = node_colours
    ),
    link = dict(
      source = sankey_dict["Source"],
      target = sankey_dict["Target"],
      value = sankey_dict["Value"],
      color = sankey_dict["Color"]
      #color = [px.colors.qualitative.Plotly[unique_source_target.index(i) % len(px.colors.qualitative.Plotly)] for i in unique_source_target]
  ))])

layout = dict(
        title = "Sankey Diagram for consortia uses",
    height = 850,
    font = dict(
      size = 8),)

#show figure
fig.update_layout(layout)
fig.show()
#fig.write_image("plots/sankey_diagram_genus.pdf")

In [41]:
all_substrates = []
all_products = []

for index, row in df.iterrows():
    substrates = row["Substrate"].split(", ")
    products = row["Product"].split(", ")
    all_substrates.extend(substrates)
    all_products.extend(products)

all_unique_substrates = set(all_substrates)
all_unique_products = set(all_products)

In [122]:
print(all_unique_substrates)
print(all_unique_products)

{'Light', 'Xylose', 'Tryptophan', 'Cellulose', 'Lignocellulose', 'CO2', 'Syngas', 'Saccharides', 'VFA_s', 'Wastewater sludge', 'CO', 'Glucose', 'Meat extract', 'Industrial waste', 'Amino acid medium', 'Food waste', 'Glycerol', 'Phenanthrene', 'Methanol_s', 'Starch', 'Sucrose'}
{'Antilisterial compounds', '7-methylxanthine', 'L-lysine', 'Fructo-oligosaccharide', 'Caproate', 'Glycosides', 'coniferyl alcohol', 'O-methylated phenylpropanoids', 'MCFA', 'Hydrogen', 'caffeyl alcohol', '3-hydroxypropionic acid', 'Polysaccharide', 'Polyhydroxyalkanoate', 'Bacterial cellulose', 'Indigo', 'Anthocyanins', 'SCFA', '3-hydroxybenzoic acid', 'lovastatin', 'Antibodies', 'Rosmarinic acid', 'Butanol', 'Fumaric acid', 'Microbial proteases', 'Recombinant protein', 'Glucose_p', 'VFA', 'Ethanol', 'Pyranoanthocyanins', 'Itaconic acid', 'P-coumaric acid', 'Methanol_p', 'Electricity', 'Lipopeptides', 'Phenol', 'Biosurfactants', 'Resveratrol', 'Biomass', 'Cercosporin', 'Pharmaceutically active compounds', 'Caffe

In [21]:
all_df = pd.read_csv("all_articles.csv")

all_s = []
all_p = []

for index, row in all_df.iterrows():
    substrates = row["Substrate"].split(", ")
    products = row["Product"].split(", ")
    all_s.extend(substrates)
    all_p.extend(products)

all_u_s = set(all_s)
all_u_p = set(all_p)

In [129]:
print(all_u_s - all_unique_substrates)
print(all_u_p - all_unique_products)

{'Organic waste', 'Ethanol', 'Acetate', 'Crude oil', 'Arabinose', 'Antibiotic fermentation residue', 'Activated sludge', 'Galactose', 'Bovine milk', 'Amino acids', 'Glycerin', 'Sludge compost', 'Fructose', 'Lupin flour', 'Manure compost'}
{'Lactate', 'Caproic acid', 'Organic acids', 'Siderophores', 'Hyaluronic acid', 'Bioflocculant', 'Fermented food product', 'Lactic acid', 'Glutamic acid', 'SCCA', 'Methane', 'SCOA'}


In [3]:
substrate_map = {
    'Light': 'Light', 
    'Xylose': 'Sugars', 
    'Tryptophan': 'Amino acid medium', 
    'Cellulose': 'Lignocellulose', 
    'Lignocellulose': 'Lignocellulose', 
    'CO2': 'Gas', 
    'Syngas': 'Gas', 
    'Saccharides': 'Sugars', 
    'VFA_s': 'Short-chain fatty acids', 
    'Wastewater sludge': 'Waste', 
    'CO': 'Gas', 
    'Glucose': 'Sugars', 
    'Meat extract': 'Waste', 
    'Industrial waste': 'Waste', 
    'Amino acid medium': 'Amino acid medium', 
    'Food waste': 'Waste', 
    'Glycerol': 'Alcohols', 
    'Phenanthrene': 'Other substrate', 
    'Methanol_s': 'Alcohols', 
    'Starch': 'Sugars', 
    'Sucrose': 'Sugars',
    'Organic waste': 'Waste', 
    'Ethanol': 'Alcohols', 
    'Acetate': 'Short-chain fatty acids', 
    'Crude oil': 'Crude oil', 
    'Arabinose': 'Sugars', 
    'Antibiotic fermentation residue': 'Waste', 
    'Activated sludge': 'Waste', 
    'Galactose': 'Sugars', 
    'Bovine milk': 'Complex media', 
    'Amino acids': 'Amino acid medium', 
    'Glycerin': 'Alcohols', 
    'Sludge compost': 'Waste', 
    'Fructose': 'Sugars', 
    'Lupin flour': 'Complex media', 
    'Manure compost': 'Waste'
}

In [182]:
product_map_old = {
    'Antilisterial compounds': 'Antibiotics', 
    '7-methylxanthine': 'Pharmaceuticals', 
    'L-lysine': 'Amino acids', 
    'Fructo-oligosaccharide': 'Polysaccharides', 
    'Caproate': 'Carboxylic acids', 
    'Glycosides': 'Glycosides', 
    'coniferyl alcohol': 'Phenols', 
    'O-methylated phenylpropanoids': 'Pharmaceuticals', 
    'MCFA': 'Carboxylic acids', 
    'Hydrogen': 'Hydorgen gas', 
    'caffeyl alcohol': 'Phenols', 
    '3-hydroxypropionic acid': 'Carboxylic acids', 
    'Polysaccharide': 'Polysaccharides', 
    'Polyhydroxyalkanoate': 'Polyester polymers', 
    'Bacterial cellulose': 'Cellulose', 
    'Indigo': 'Dye', 
    'Anthocyanins': 'Pigments', 
    'SCFA': 'Carboxylic acids', 
    '3-hydroxybenzoic acid': 'Phenols', 
    'lovastatin': 'Pharmaceuticals', 
    'Antibodies': 'Proteins', 
    'Rosmarinic acid': 'Phenols', 
    'Butanol': 'Primary alcohols', 
    'Fumaric acid': 'Carboxylic acids', 
    'Microbial proteases': 'Proteins', 
    'Recombinant protein': 'Proteins', 
    'Glucose_p': 'Metabolites to support growth', 
    'VFA': 'Carboxylic acids', 
    'Ethanol': 'Primary alcohols', 
    'Pyranoanthocyanins': 'Phenols', 
    'Itaconic acid': 'Carboxylic acids', 
    'P-coumaric acid': 'Carboxylic acids', 
    'Methanol_p': 'Primary alcohols', 
    'Electricity': 'Electricity', 
    'Lipopeptides': 'Proteins', 
    'Phenol': 'Phenols', 
    'Biosurfactants': 'Surfactants', 
    'Resveratrol': 'Phenols', 
    'Biomass': 'Biomass', 
    'Cercosporin': 'Pharmaceuticals', 
    'Pharmaceutically active compounds': 'Pharmaceuticals', 
    'Caffeic acid': 'Phenols', 
    'Vitamins': 'Metabolites to support growth', 
    'Butyrate': 'Carboxylic acids', 
    'Flavonoids': 'Phenols', 
    'Monacolin': 'Pharmaceuticals', 
    'Glionitrin B': 'Pharmaceuticals', 
    'Hyaluronic acid': 'Pharmaceuticals', 
    '1,3-Propanediol': 'Glycols', 
    'Propionate': 'Carboxylic acids', 
    'PHB': 'Polyester polymers', 
    'Butyl acetate': 'Carboxylic esters', 
    'Fatty acid': 'Carboxylic acids',
    'Lactate': 'Carboxylic acids', 
    'Caproic acid': 'Carboxylic acids', 
    'Organic acids': 'Carboxylic acids', 
    'Siderophores': 'Bacterial extracellular compounds', 
    'Bioflocculant': 'Bacterial extracellular compounds', 
    'Fermented food product': 'Fermented food products', 
    'Lactic acid': 'Carboxylic acids',  
    'Glutamic acid': 'Amino acids', 
    'SCCA': 'Carboxylic acids', 
    'Methane': 'Methane', 
    'SCOA': 'Carboxylic acids'}

In [4]:
product_map = {
    'Antilisterial compounds': 'Antibiotics', 
    '7-methylxanthine': 'Pharmaceuticals', 
    'L-lysine': 'Amino acids', 
    'Fructo-oligosaccharide': 'Polysaccharides', 
    'Caproate': 'Carboxylic acids and esters', 
    'Glycosides': 'Other', 
    'coniferyl alcohol': 'Phenols', 
    'O-methylated phenylpropanoids': 'Pharmaceuticals', 
    'MCFA': 'Carboxylic acids and esters', 
    'Hydrogen': 'Hydorgen gas', 
    'caffeyl alcohol': 'Phenols', 
    '3-hydroxypropionic acid': 'Carboxylic acids and esters', 
    'Polysaccharide': 'Polysaccharides', 
    'Polyhydroxyalkanoate': 'Polyester polymers', 
    'Bacterial cellulose': 'Cellulose', 
    'Indigo': 'Dyes and pigments', 
    'Anthocyanins': 'Dyes and pigments', 
    'SCFA': 'Carboxylic acids and esters', 
    '3-hydroxybenzoic acid': 'Phenols', 
    'lovastatin': 'Pharmaceuticals', 
    'Antibodies': 'Proteins', 
    'Rosmarinic acid': 'Phenols', 
    'Butanol': 'Primary alcohols', 
    'Fumaric acid': 'Carboxylic acids and esters', 
    'Microbial proteases': 'Proteins', 
    'Recombinant protein': 'Proteins', 
    'Glucose_p': 'Metabolites to support growth', 
    'VFA': 'Carboxylic acids and esters', 
    'Ethanol': 'Primary alcohols', 
    'Pyranoanthocyanins': 'Phenols', 
    'Itaconic acid': 'Carboxylic acids and esters', 
    'P-coumaric acid': 'Carboxylic acids and esters', 
    'Methanol_p': 'Primary alcohols', 
    'Electricity': 'Electricity', 
    'Lipopeptides': 'Proteins', 
    'Phenol': 'Phenols', 
    'Biosurfactants': 'Other', 
    'Resveratrol': 'Phenols', 
    'Biomass': 'Biomass', 
    'Cercosporin': 'Pharmaceuticals', 
    'Pharmaceutically active compounds': 'Pharmaceuticals', 
    'Caffeic acid': 'Phenols', 
    'Vitamins': 'Metabolites to support growth', 
    'Butyrate': 'Carboxylic acids and esters', 
    'Flavonoids': 'Phenols', 
    'Monacolin': 'Pharmaceuticals', 
    'Glionitrin B': 'Pharmaceuticals', 
    'Hyaluronic acid': 'Pharmaceuticals', 
    '1,3-Propanediol': 'Glycols', 
    'Propionate': 'Carboxylic acids and esters', 
    'PHB': 'Polyester polymers', 
    'Butyl acetate': 'Carboxylic acids and esters', 
    'Fatty acid': 'Carboxylic acids and esters',
    'Lactate': 'Carboxylic acids and esters', 
    'Caproic acid': 'Carboxylic acids and esters', 
    'Organic acids': 'Carboxylic acids and esters', 
    'Siderophores': 'Bacterial extracellular compounds', 
    'Bioflocculant': 'Bacterial extracellular compounds', 
    'Fermented food product': 'Fermented food products', 
    'Lactic acid': 'Carboxylic acids and esters',  
    'Glutamic acid': 'Amino acids', 
    'SCCA': 'Carboxylic acids and esters', 
    'Methane': 'Methane', 
    'SCOA': 'Carboxylic acids and esters'}

In [5]:
#sankey visualisation of mapping to share with Daniel
import plotly.graph_objects as go

def SankeyVisualisation(d):
  # Get unique nodes and their indices
  nodes = list(set(list(d.keys()) + list(d.values())))
  node_indices = {node: i for i, node in enumerate(nodes)}

  # Define link sources and targets
  link_sources = [node_indices[key] for key in d.keys()]
  link_targets = [node_indices[value] for value in d.values()]

  # Define link values
  link_values = [1]*len(d)

  fig = go.Figure(data=[go.Sankey(
      #orientation = "h",
      node = dict(
        pad = 15,
        thickness = 40,
        line = dict(color = "black", width = 0.5),
        label = nodes
      ),
      link = dict(
      arrowlen=20,
        source = link_sources,
        target = link_targets,
        value = link_values
    ))])

  layout = dict(
      #title = "Substrate mapping",
      height = 1000,
      font = dict(
      size = 15),)

  #show figure
  fig.update_layout(layout)
  fig.show()


In [23]:
SankeyVisualisation(substrate_map)
SankeyVisualisation(product_map)

In [16]:
#new sankey chart for FAMILY of organisms

#creating a new dataframe to use in building the sankey chart

#create an empty dataframe
sankey_data = pd.DataFrame({'Source': [],
                   'Target': [],
                   'Value': [],
                   'Color': []})

organism_colouring = dict()
colour_counter = 0

#iterate over the rows of the dataframe
#for index, row in all_df.iterrows():
for index, row in df.iterrows():

    #get lists of substrates, products, and organisms
    substrates = row["Substrate"].split(", ")
    organisms = row["Family"].split(", ")
    products = row["Product"].split(", ")

    #taking the first word to get the genus instead of the organisms
    genuses = [organism.split(' ')[0] for organism in organisms]
    #cluster the substrates and products using dictionaries
    new_substrates = [substrate_map[substrate] for substrate in substrates]
    new_products = [product_map[product] for product in products]

    #adds colour to each organism using the pick_colour function
    for genus in genuses:
        if genus not in organism_colouring:
            organism_colouring[genus] = pick_colour(genus)

    #add links from each substrate to each organism
    for substrate in new_substrates:
        for genus in genuses:
            sankey_data.loc[len(sankey_data.index)] = [substrate, genus, 1, organism_colouring[genus]]

    #add links from each organism to each substrate
    for genus in genuses:
        for product in new_products:
            sankey_data.loc[len(sankey_data.index)] = [genus, product, 1, organism_colouring[genus]]


#needed modification for building the plot

#get each unique source_target and a mapping to their index
#unique_source_target = list(pd.unique(sankey_data[['Source', 'Target']].values.ravel('K')))
unique_source_target = list(pd.unique(sankey_data[['Source', 'Target', 'Color']].values.ravel('K')))
mapping_dict = {k: v for v, k in enumerate(unique_source_target)}
sankey_data['Source'] = sankey_data['Source'].map(mapping_dict)
sankey_data['Target'] = sankey_data['Target'].map(mapping_dict)
sankey_dict = sankey_data.to_dict(orient='list')

#setting colours for the nodes
node_colours = [organism_colouring.get(i, "grey") for i in unique_source_target]

#create diagram
import plotly.express as px

fig = go.Figure(data=[go.Sankey(
    orientation = "h",
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = unique_source_target,
      #color = "grey"
      color = node_colours
    ),
    link = dict(
      source = sankey_dict["Source"],
      target = sankey_dict["Target"],
      value = sankey_dict["Value"],
      color = sankey_dict["Color"]
  ))])

layout = dict(
        title = "Sankey Diagram for consortia uses",
    height = 850,
    font = dict(
      size = 8),)

#show figure
fig.update_layout(layout)
fig.show()

In [32]:
#sankey chart for genus of organisms, with updated groupings

#creating a new dataframe to use in building the sankey chart

#create an empty dataframe
sankey_data = pd.DataFrame({'Source': [],
                   'Target': [],
                   'Value': [],
                   'Color': []})

organism_colouring = dict()
colour_counter = 0

#iterate over the rows of the dataframe
#for index, row in all_df.iterrows():
for index, row in df.iterrows():

    #get lists of substrates, products, and organisms
    substrates = row["Substrate"].split(", ")
    organisms = row["Organisms"].split(", ")
    products = row["Product"].split(", ")

    #taking the first word to get the genus instead of the organisms
    genuses = [organism.split(' ')[0] for organism in organisms]
    #cluster the substrates and products using dictionaries
    new_substrates = [substrate_map[substrate] for substrate in substrates]
    new_products = [product_map[product] for product in products]

    #adds colour to each organism using the pick_colour function
    for genus in genuses:
        if genus not in organism_colouring:
            organism_colouring[genus] = pick_colour(genus)

    #add links from each substrate to each organism
    for substrate in new_substrates:
        for genus in genuses:
            sankey_data.loc[len(sankey_data.index)] = [substrate, genus, 1, organism_colouring[genus]]

    #add links from each organism to each substrate
    for genus in genuses:
        for product in new_products:
            sankey_data.loc[len(sankey_data.index)] = [genus, product, 1, organism_colouring[genus]]


#needed modification for building the plot

#get each unique source_target and a mapping to their index
#unique_source_target = list(pd.unique(sankey_data[['Source', 'Target']].values.ravel('K')))
unique_source_target = list(pd.unique(sankey_data[['Source', 'Target', 'Color']].values.ravel('K')))
mapping_dict = {k: v for v, k in enumerate(unique_source_target)}
sankey_data['Source'] = sankey_data['Source'].map(mapping_dict)
sankey_data['Target'] = sankey_data['Target'].map(mapping_dict)
sankey_dict = sankey_data.to_dict(orient='list')

#setting colours for the nodes
node_colours = [organism_colouring.get(i, "grey") for i in unique_source_target]

#create diagram
import plotly.express as px

fig = go.Figure(data=[go.Sankey(
    orientation = "h",
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = unique_source_target,
      #color = "grey"
      color = node_colours
    ),
    link = dict(
      source = sankey_dict["Source"],
      target = sankey_dict["Target"],
      value = sankey_dict["Value"],
      color = sankey_dict["Color"]
  ))])

layout = dict(
        title = "Sankey Diagram for consortia uses",
    height = 850,
    width = 1300,
    font = dict(
      size = 8),)

#show figure
fig.update_layout(layout)
fig.show()

In [33]:
fig.write_image('plots/sankey_genus_grouped_ecoli.pdf')

In [209]:
#Plot showing most common organisms
import matplotlib.pyplot as plt

o_df = df.copy()

o_df["Family"] = o_df["Family"].str.split(", ")

# Use the explode function to create a new row for each animal
o_df = o_df.explode("Family")

#group
organism_per_journal = o_df.groupby("Family").size()

#sort by decending order
organism_per_journal = organism_per_journal.sort_values(ascending=False)

organism_per_journal

# #make the plot
# organism_per_journal[:14].plot.bar()
# plt.style.use('default')

# #plt.setp(plt.gca().get_xticklabels(), rotation=60, ha="right", va="top")

# plt.ylabel("Number of publications")
# plt.show()

Family
Enterobacteriaceae        28
Clostridiaceae            17
Bacillaceae                7
Saccharomycetaceae         7
Hypocreaceae               5
Eubacteriaceae             5
Lactobacillaceae           3
Acetobacteraceae           2
Saccotheciaceae            2
Aspergillaceae             2
Enterococcaceae            2
Veillonellaceae            2
Dipodascaceae              1
Phanerochaetaceae          1
Thermoanaerobacterales     1
Synechococcaceae           1
Streptomycetaceae          1
Streptococcaceae           1
Sphingomonadaceae          1
Shewanellaceae             1
Schizophyllaceae           1
Beijerinckiaceae           1
Burkholderiaceae           1
Rhizopodaceae              1
Rhizobiaceae               1
Pseudomonas                1
Pseudomonadaceae           1
Promicromonosporaceae      1
Phaffomycetaceae           1
Desulfuromonadaceae        1
Pasteurellaceae            1
Paenibacillaceae           1
Oscillospiraceae           1
Nitrobacteraceae           1
Neocall

In [201]:
organism_per_journal['Thermoanaerobacterales']

KeyError: 'Thermoanaerobacterales'