In [1]:
import pandas as pd
import json
import os
from collections import Counter
import itertools
import re
import csv
import random
import statsmodels.stats.weightstats as sms

import matplotlib.pyplot as mpl
import seaborn as sns

import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import numpy as np

### Read Files

Read compounds found on Enceladus (Enceladus_Compounds_and_Concentrations), compounds necessary for life (Freileich), and files associated with each run.

In [2]:
#read seed set KEGG labels and names into a dictionary
with open("Enceladus_Compounds_and_Concentrations.csv") as f2:
  next(f2)
  lb = []
  reader = csv.reader(f2, skipinitialspace=True)
  for row in reader:
    lb.append(tuple([row[0], row[2]]))

lb = dict(lb)
print(lb)

{'C00011': 'CO2', 'C01407': 'C6H6', 'C00014': 'NH3', 'C00283': 'H2S', 'C00282': 'H2', 'C06548': 'C2H4O', 'C01438': 'CH4', 'C00697': 'N2', 'C06547': 'C2H4', 'C00132': 'CH4O', 'C00067': 'CH2O', 'C11505': 'C3H6', 'C01548': 'C2H2', 'C00469': 'C2H6O', 'C00001': 'H20', 'C20783': 'C3H8', 'C01326': 'HCN', 'C00237': 'CO'}


In [2]:
#Revised KEGG compounds and names
data = json.load(open("containskegg_and_majorspecies.json"))
lb = []

cpd_name_df = pd.read_csv("accessible_compounds.csv")
for cpd in data["Contains_KEGGID"]:
    df_row = cpd_name_df[cpd_name_df["Compound"] == cpd]
    lb.append(tuple([cpd, df_row.iloc[0]["Name"]]))
    
lb = dict(lb)
print(lb)
    

{u'C00027': 'Hydrogen peroxide', u'C12568': 'Potassium hydroxide', u'C18606': 'Potassium bicarbonate', u'C00283': 'Hydrogen sulfide', u'C00282': 'Hydrogen', u'C00007': 'Oxygen', u'C00288': 'HCO3-', u'C00001': 'H2O', u'C01380': 'Ethylene glycol', u'C01387': 'Octane', u'C06548': 'Ethylene oxide', u'C19316': 'Allyl chloride', u'C00741': 'Diacetyl', u'C06142': '1-Butanol', u'C01328': 'HO-', u'C00533': 'Nitric oxide', u'C01438': 'Methane', u'C06547': 'Ethylene', u'C11505': 'Propylene', u'C00238': 'Potassium cation', u'C01548': 'Acetylene', u'C00218': 'Methylamine', u'C00479': 'Propanal', u'C13563': 'Sodium chloride', u'C12603': 'Sodium bicarbonate', u'C01326': 'Hydrogen cyanide', u'C00011': 'CO2', u'C00014': 'Ammonia', u'C00132': 'Methanol', u'C01059': '2,5-Dihydroxypyridine', u'C00067': 'Formaldehyde', u'C21390': 'Butane', u'C20783': 'Propane', u'C19572': 'Silica', u'C12567': 'Magnesium oxide', u'C05979': 'Propane-1-ol', u'C12569': 'Sodium hydroxide', u'C01407': 'Benzene', u'C00266': 'Glyc

In [3]:
#Remove 11 compounds with no reactions associated with them
extra_cpds = ['C00238','C12244','C12567','C12568','C12569','C12603','C13563','C17390','C18606','C19316','C19572']
for cpd in extra_cpds:
    lb.pop(cpd)
    
print(lb)
print(len(lb))

{u'C00027': 'Hydrogen peroxide', u'C00283': 'Hydrogen sulfide', u'C00282': 'Hydrogen', u'C00007': 'Oxygen', u'C00288': 'HCO3-', u'C00001': 'H2O', u'C01380': 'Ethylene glycol', u'C01387': 'Octane', u'C06548': 'Ethylene oxide', u'C00741': 'Diacetyl', u'C06142': '1-Butanol', u'C01328': 'HO-', u'C00533': 'Nitric oxide', u'C01438': 'Methane', u'C06547': 'Ethylene', u'C11505': 'Propylene', u'C01548': 'Acetylene', u'C00218': 'Methylamine', u'C00479': 'Propanal', u'C01326': 'Hydrogen cyanide', u'C00011': 'CO2', u'C00014': 'Ammonia', u'C00132': 'Methanol', u'C01059': '2,5-Dihydroxypyridine', u'C00067': 'Formaldehyde', u'C21390': 'Butane', u'C20783': 'Propane', u'C05979': 'Propane-1-ol', u'C01407': 'Benzene', u'C00266': 'Glycolaldehyde', u'C00041': 'L-Alanine', u'C01353': 'Carbonic acid', u'C00469': 'Ethanol', u'C00189': 'Ethanolamine', u'C01330': 'Sodium cation', u'C00237': 'CO', u'C00543': 'Dimethylamine', u'C00697': 'Nitrogen'}
38


In [4]:
#define life-specifc reactions
freilich = "links/Freilich09.json"
with open(freilich) as fr:
   datajsonfr = json.load(fr)
    
lc = datajsonfr.keys()
print(lc)

[u'C00025', u'C00024', u'C00148', u'C00105', u'C00362', u'C00020', u'C00360', u'C00008', u'C15672', u'C00006', u'C00005', u'C00004', u'C00003', u'C00002', u'C00144', u'C00079', u'C00364', u'C05980', u'C00458', u'C00399', u'C01050', u'C00407', u'C00239', u'C16221', u'C00748', u'C00054', u'C00055', u'C00350', u'C00641', u'C06040', u'C00073', u'C00062', u'C00234', u'C00063', u'C00097', u'C00135', u'C00037', u'C00078', u'C00035', u'C05764', u'C00015', u'C00016', u'C00112', u'C00065', u'C00116', u'C00152', u'C00131', u'C00064', u'C00001', u'C00075', u'C00123', u'C00459', u'C05899', u'C00082', u'C00249', u'C00255', u'C00286', u'C05890', u'C05894', u'C00043', u'C00041', u'C00047', u'C00188', u'C00044', u'C00049', u'C00183']


### Open and organize expansions

Find all expansions that result in life-specific compounds 

In [5]:
#folder = "5nw", etc... (classification of each analysis)
def multiple_files(folder, n_files):
    #life reaction array
    life_reactions = []

    #files that contain expanded networks
    ex_files = []

    #Open path
    path = "results/formatted/"+folder
    files = os.listdir(path)
    
    for filename in random.sample(files, n_files):
        if ".json" in filename:
            with open(path + "/" + filename) as f:
              datajson = json.load(f)
              #Organize data by each generation
              generations = pd.DataFrame(datajson["generations"])
              generations = generations.transpose()
              generations.index = generations.index.astype(int)
              generations = generations.sort_index()

              #Test
              #generations.head()

              #List of cumulative compounds
              cum_compounds = generations.loc[: , "compounds_cumulative"]
              fc = cum_compounds.tail(1).tolist()

              fc = fc[0]
              #print(fc)

              #find all life-specifc reactions
              life = list(set(fc).intersection(lc))

              #add reactions to the list of total reactions found in previous expansions
              life_reactions.append(life)

              #Get the number of the run if the number of life reactions is greater than 0
              #Purpose - to find the seed compounds in original.dat file
              if (len(life) != 0):
                run = re.findall('\d+', filename)
                ex_files.append(int(run[0]))

    return life_reactions, ex_files


In [6]:
#label compounds from KEGG
#count the number of reactions that occur
def label_and_count(lifereactions, life_reactions):
  #read output KEGG labels and names into a dictionary##
  file = open("links/Freilich09.json")
  file_data = file.read()
  output_labels = json.loads(file_data)

  #NOTE: sum only useful for first time through - after, comment it out
  lifereactions = sum(lifereactions, [])

  #translate KEGG labels into formulas
  for l in lifereactions:
    life_reactions.append(output_labels[l])


  #count the number of times a compound occurs
  life_count = Counter(life_reactions)
  return life_count

### Find distribution of seed sets that result in expanded networks

In [7]:
#access data file
def data_file(file, labels, ex_files):
  path = file

  #list of all generated seeds
  seeds = []
  s = open(path)
  for line in s:
    seeds.append(line.split())
  s.close()

  #find all seed sets that resulted in an expanded network
  life_seeds = []
  lifeseeds = [] #properly labeled version
  for n in ex_files:
    life_seeds.append(seeds[n])
  
  #sum only works first time through
  life_seeds = sum(life_seeds, [])
  
  #translate KEGG labels into formulas
  for l in life_seeds:
    try:
        lifeseeds.append(lb[l])
    except:
        print(l + " doesn't work")

  life_seed_count = Counter(lifeseeds)
  
  return life_seed_count

In [8]:
def analyze(filename, n):
  #analyze the runs stored in a folder
  lifereactions, ex_files = multiple_files(filename, n)
  #count the number of times each compound appears
  life_count = label_and_count(lifereactions, [])
  dat_file = "seeds/rseeds_" + filename + ".txt"
  life_seed_count = data_file(dat_file, lb, ex_files)
 
  return life_seed_count

In [9]:
#analyze data folders
#first analysis
#count_10nw = analyze("10nw", 25)
#count_5nw = analyze("5nw", 25)
#count_5w = analyze("5w", 25)
#count_10w = analyze("10w", 25)
count_10nw_random = analyze("10nw_random_V2", 25)
count_10nw_fixed = analyze("10nw_fixed_V2", 25)

#life_total = count_5nw + count_5w + count_10w + count_10nw
life_total = count_10nw_random + count_10nw_fixed
life_list = []
life_list.append(life_total)

#Loop through 100 random samples (1st is above) to obtain a full sample
for i in range(99):
#     count_5nw = analyze("5nw", 25)
#     count_5w = analyze("5w", 25)
#     count_10w = analyze("10w", 25)
#     count_10nw = analyze("10nw", 25)
    count_10nw_random = analyze("10nw_random", 25)
    count_10nw_fixed = analyze("10nw_fixed", 25)

    #life = count_5nw + count_5w + count_10w + count_10nw
    life = count_10nw_random + count_10nw_fixed
    life_total += life
    life_list.append(life)
    print(str(i) + "\n-----")
    print(life)
    print("-----")
    
print("TOTAL")
print(life_total)

C00238 doesn't work
C12568 doesn't work
C12567 doesn't work
C19316 doesn't work
C19316 doesn't work
C13563 doesn't work
C12244 doesn't work
C13563 doesn't work
C12569 doesn't work
C19572 doesn't work
C12603 doesn't work
C13563 doesn't work
C19316 doesn't work
C00238 doesn't work
C17390 doesn't work
C12569 doesn't work
C12244 doesn't work
C12603 doesn't work
C19316 doesn't work
C19572 doesn't work
C00238 doesn't work
C19316 doesn't work
C12244 doesn't work
C00238 doesn't work
C12568 doesn't work
C12603 doesn't work
C12567 doesn't work
C18606 doesn't work
C12603 doesn't work
C17390 doesn't work
C18606 doesn't work
C00238 doesn't work
C17390 doesn't work
C17390 doesn't work
C12568 doesn't work
C19572 doesn't work
C17390 doesn't work
C12244 doesn't work
C19316 doesn't work
C12568 doesn't work
C18606 doesn't work
C12603 doesn't work
C18606 doesn't work
C18606 doesn't work
C12603 doesn't work
C00238 doesn't work
C18606 doesn't work
C12603 doesn't work
C12569 doesn't work
C13563 doesn't work


C19316 doesn't work
C13563 doesn't work
C19316 doesn't work
C12569 doesn't work
C12603 doesn't work
C17390 doesn't work
C19572 doesn't work
C12568 doesn't work
C12603 doesn't work
C19316 doesn't work
C00238 doesn't work
C12244 doesn't work
C18606 doesn't work
C12244 doesn't work
C12569 doesn't work
C00238 doesn't work
C17390 doesn't work
C12244 doesn't work
C13563 doesn't work
C12567 doesn't work
C17390 doesn't work
C12244 doesn't work
C12244 doesn't work
C12569 doesn't work
C12568 doesn't work
C12244 doesn't work
C12569 doesn't work
C19572 doesn't work
C17390 doesn't work
C19316 doesn't work
C00238 doesn't work
C12244 doesn't work
C19572 doesn't work
C18606 doesn't work
C19316 doesn't work
C18606 doesn't work
C17390 doesn't work
C12569 doesn't work
C18606 doesn't work
C19572 doesn't work
C19572 doesn't work
C12567 doesn't work
C18606 doesn't work
C12603 doesn't work
C00238 doesn't work
C17390 doesn't work
C00238 doesn't work
C18606 doesn't work
C13563 doesn't work
C12244 doesn't work


C12244 doesn't work
C18606 doesn't work
C19572 doesn't work
C12603 doesn't work
C12244 doesn't work
C12603 doesn't work
C17390 doesn't work
C19316 doesn't work
C12568 doesn't work
C19572 doesn't work
C13563 doesn't work
C12568 doesn't work
C13563 doesn't work
C19572 doesn't work
C12567 doesn't work
C12568 doesn't work
C17390 doesn't work
C12567 doesn't work
C19572 doesn't work
C19316 doesn't work
C17390 doesn't work
C12569 doesn't work
C18606 doesn't work
C19316 doesn't work
C18606 doesn't work
C13563 doesn't work
C19316 doesn't work
C12568 doesn't work
C12568 doesn't work
C13563 doesn't work
C00238 doesn't work
C00238 doesn't work
C19572 doesn't work
C13563 doesn't work
C19572 doesn't work
C12603 doesn't work
C19316 doesn't work
C12244 doesn't work
C19316 doesn't work
C19316 doesn't work
C12603 doesn't work
C13563 doesn't work
C19572 doesn't work
C12567 doesn't work
C12569 doesn't work
C00238 doesn't work
C12569 doesn't work
C12569 doesn't work
C13563 doesn't work
C19316 doesn't work


C19572 doesn't work
C12603 doesn't work
C12244 doesn't work
C19316 doesn't work
C17390 doesn't work
C19316 doesn't work
C13563 doesn't work
C17390 doesn't work
C12568 doesn't work
C00238 doesn't work
C12569 doesn't work
C12567 doesn't work
C18606 doesn't work
C17390 doesn't work
C12568 doesn't work
C12603 doesn't work
C18606 doesn't work
C12244 doesn't work
C17390 doesn't work
C12244 doesn't work
C19572 doesn't work
C19572 doesn't work
C17390 doesn't work
C18606 doesn't work
C19316 doesn't work
C13563 doesn't work
C12569 doesn't work
C12567 doesn't work
C18606 doesn't work
C00238 doesn't work
C12567 doesn't work
C17390 doesn't work
C18606 doesn't work
C13563 doesn't work
C12603 doesn't work
C12244 doesn't work
C12567 doesn't work
C12567 doesn't work
C00238 doesn't work
C12567 doesn't work
C00238 doesn't work
C18606 doesn't work
C12244 doesn't work
C12569 doesn't work
C17390 doesn't work
C12569 doesn't work
C12244 doesn't work
C19572 doesn't work
C12569 doesn't work
C13563 doesn't work


C00238 doesn't work
C13563 doesn't work
C12567 doesn't work
C18606 doesn't work
C18606 doesn't work
C12568 doesn't work
C19572 doesn't work
C12568 doesn't work
C19572 doesn't work
C12244 doesn't work
C12603 doesn't work
C17390 doesn't work
C12603 doesn't work
C17390 doesn't work
C19572 doesn't work
C12567 doesn't work
C17390 doesn't work
C12568 doesn't work
C12568 doesn't work
C19316 doesn't work
C12569 doesn't work
C18606 doesn't work
C19316 doesn't work
C00238 doesn't work
C12603 doesn't work
C19316 doesn't work
C13563 doesn't work
C12569 doesn't work
C12244 doesn't work
C18606 doesn't work
C12568 doesn't work
C19572 doesn't work
C19316 doesn't work
C12568 doesn't work
C12603 doesn't work
C12569 doesn't work
C12567 doesn't work
C12244 doesn't work
C17390 doesn't work
C12567 doesn't work
C12244 doesn't work
C17390 doesn't work
C12603 doesn't work
C18606 doesn't work
C12567 doesn't work
C19316 doesn't work
C17390 doesn't work
C00238 doesn't work
C13563 doesn't work
C19316 doesn't work


C12567 doesn't work
C00238 doesn't work
C12569 doesn't work
C18606 doesn't work
C19572 doesn't work
C12567 doesn't work
C19572 doesn't work
C12569 doesn't work
C19572 doesn't work
C00238 doesn't work
C19572 doesn't work
C00238 doesn't work
C12568 doesn't work
C17390 doesn't work
C12244 doesn't work
C17390 doesn't work
C12603 doesn't work
C18606 doesn't work
C17390 doesn't work
C12567 doesn't work
C00238 doesn't work
C19316 doesn't work
C12568 doesn't work
C17390 doesn't work
C00238 doesn't work
C17390 doesn't work
C00238 doesn't work
C12567 doesn't work
C12567 doesn't work
C19572 doesn't work
C13563 doesn't work
C12244 doesn't work
C19572 doesn't work
C12244 doesn't work
C19572 doesn't work
C12569 doesn't work
C12603 doesn't work
C12569 doesn't work
C00238 doesn't work
C12567 doesn't work
C12567 doesn't work
C12244 doesn't work
C12568 doesn't work
C19316 doesn't work
C12603 doesn't work
C12569 doesn't work
C18606 doesn't work
C19316 doesn't work
C13563 doesn't work
C19316 doesn't work


C12603 doesn't work
C18606 doesn't work
C13563 doesn't work
C18606 doesn't work
C12244 doesn't work
C19572 doesn't work
C19316 doesn't work
C00238 doesn't work
C17390 doesn't work
C12569 doesn't work
C18606 doesn't work
C18606 doesn't work
C19572 doesn't work
C12244 doesn't work
C00238 doesn't work
C12568 doesn't work
C18606 doesn't work
C18606 doesn't work
C18606 doesn't work
C19572 doesn't work
C19572 doesn't work
C17390 doesn't work
C12567 doesn't work
C18606 doesn't work
C12569 doesn't work
C12567 doesn't work
C12244 doesn't work
C17390 doesn't work
C12603 doesn't work
C17390 doesn't work
C18606 doesn't work
C19572 doesn't work
C17390 doesn't work
C12567 doesn't work
C12244 doesn't work
C17390 doesn't work
C00238 doesn't work
C12244 doesn't work
C12567 doesn't work
C18606 doesn't work
C00238 doesn't work
C12568 doesn't work
C12567 doesn't work
C17390 doesn't work
C12603 doesn't work
C12567 doesn't work
C13563 doesn't work
C18606 doesn't work
C12567 doesn't work
C00238 doesn't work


C12569 doesn't work
C12244 doesn't work
C12567 doesn't work
C18606 doesn't work
C12568 doesn't work
C18606 doesn't work
C12569 doesn't work
C19316 doesn't work
C18606 doesn't work
C12569 doesn't work
C19316 doesn't work
C12569 doesn't work
C12603 doesn't work
C12569 doesn't work
C19316 doesn't work
C00238 doesn't work
C12603 doesn't work
C18606 doesn't work
C17390 doesn't work
C12568 doesn't work
C19316 doesn't work
C17390 doesn't work
C19316 doesn't work
C18606 doesn't work
C12567 doesn't work
C12603 doesn't work
C18606 doesn't work
C17390 doesn't work
C12603 doesn't work
C18606 doesn't work
C12603 doesn't work
C13563 doesn't work
C12603 doesn't work
C00238 doesn't work
C17390 doesn't work
C12567 doesn't work
C19572 doesn't work
C17390 doesn't work
C18606 doesn't work
C17390 doesn't work
C18606 doesn't work
C19316 doesn't work
C19316 doesn't work
C12603 doesn't work
C12244 doesn't work
C00238 doesn't work
C12568 doesn't work
C18606 doesn't work
C12603 doesn't work
C19572 doesn't work


C18606 doesn't work
C12569 doesn't work
C19572 doesn't work
C12568 doesn't work
C12603 doesn't work
C12244 doesn't work
C12603 doesn't work
C19572 doesn't work
C00238 doesn't work
C17390 doesn't work
C12603 doesn't work
C12569 doesn't work
C12568 doesn't work
C12568 doesn't work
C12244 doesn't work
C13563 doesn't work
C12569 doesn't work
C12568 doesn't work
C13563 doesn't work
C17390 doesn't work
C12568 doesn't work
C00238 doesn't work
C19572 doesn't work
C12244 doesn't work
C12568 doesn't work
C12569 doesn't work
C19316 doesn't work
C17390 doesn't work
C19316 doesn't work
32
-----
Counter({'H2O': 34, 'CO2': 31, 'Ammonia': 30, 'Methane': 29, 'Hydrogen': 29, 'L-Alanine': 12, 'Ethylene oxide': 11, '2,5-Dihydroxypyridine': 10, 'HO-': 10, 'Formaldehyde': 10, 'Ethanolamine': 10, 'Benzene': 9, '1-Butanol': 9, 'Propane-1-ol': 9, 'Ethylene': 8, 'Hydrogen cyanide': 8, 'Propanal': 8, 'Propane': 8, 'Propylene': 8, 'Glycolaldehyde': 8, 'Hydrogen peroxide': 8, 'Ethylene glycol': 8, 'Octane': 8, 'Ox

C13563 doesn't work
C12568 doesn't work
C12569 doesn't work
C12244 doesn't work
C12244 doesn't work
C00238 doesn't work
C17390 doesn't work
C18606 doesn't work
C12603 doesn't work
C19572 doesn't work
C00238 doesn't work
C12569 doesn't work
C12603 doesn't work
C19572 doesn't work
C00238 doesn't work
C12567 doesn't work
C12244 doesn't work
C17390 doesn't work
C19316 doesn't work
C19316 doesn't work
C13563 doesn't work
C17390 doesn't work
C18606 doesn't work
C19572 doesn't work
C13563 doesn't work
C19316 doesn't work
C12244 doesn't work
C19316 doesn't work
C12244 doesn't work
36
-----
Counter({'CO2': 34, 'Ammonia': 31, 'H2O': 30, 'Hydrogen': 28, 'Methane': 27, 'Ethylene glycol': 14, 'HO-': 12, 'Hydrogen sulfide': 12, 'Sodium cation': 11, 'Oxygen': 11, 'Ethylene': 9, 'Diacetyl': 9, 'Benzene': 9, 'HCO3-': 8, '2,5-Dihydroxypyridine': 8, 'Propane': 8, 'Nitric oxide': 8, 'Glycolaldehyde': 8, 'Octane': 8, 'Propylene': 7, 'Propanal': 7, 'CO': 7, 'Ethanolamine': 7, 'Methylamine': 7, 'Acetylene': 

C12567 doesn't work
C12244 doesn't work
C12568 doesn't work
C12244 doesn't work
C00238 doesn't work
C12603 doesn't work
C12244 doesn't work
C00238 doesn't work
C17390 doesn't work
C19572 doesn't work
C12244 doesn't work
C12568 doesn't work
C17390 doesn't work
C12568 doesn't work
C12603 doesn't work
C19572 doesn't work
C12567 doesn't work
C13563 doesn't work
C00238 doesn't work
C19572 doesn't work
C12244 doesn't work
C13563 doesn't work
C13563 doesn't work
C19572 doesn't work
C19572 doesn't work
40
-----
Counter({'H2O': 31, 'Hydrogen': 31, 'Ammonia': 30, 'Methane': 29, 'CO2': 28, 'Acetylene': 13, 'Carbonic acid': 13, 'Hydrogen cyanide': 11, 'Methanol': 11, 'Propylene': 11, 'Formaldehyde': 11, 'Ethanolamine': 11, 'Methylamine': 10, '2,5-Dihydroxypyridine': 9, '1-Butanol': 9, 'CO': 9, 'Ethylene oxide': 9, 'Propane': 8, 'HO-': 8, 'Oxygen': 8, 'Sodium cation': 7, 'HCO3-': 7, 'Butane': 7, 'L-Alanine': 7, 'Octane': 7, 'Diacetyl': 6, 'Propanal': 6, 'Nitric oxide': 6, 'Hydrogen sulfide': 6, 'Et

C12567 doesn't work
C19316 doesn't work
C13563 doesn't work
C12567 doesn't work
C19572 doesn't work
C13563 doesn't work
C12567 doesn't work
C12244 doesn't work
C12568 doesn't work
C12569 doesn't work
C19316 doesn't work
C12244 doesn't work
C00238 doesn't work
C12569 doesn't work
C12567 doesn't work
C12569 doesn't work
C12244 doesn't work
C13563 doesn't work
C18606 doesn't work
C12244 doesn't work
C13563 doesn't work
C12568 doesn't work
C00238 doesn't work
C00238 doesn't work
C12567 doesn't work
C12569 doesn't work
C13563 doesn't work
C12603 doesn't work
C00238 doesn't work
C19316 doesn't work
C13563 doesn't work
C17390 doesn't work
C00238 doesn't work
C12603 doesn't work
C17390 doesn't work
C00238 doesn't work
C12568 doesn't work
C13563 doesn't work
44
-----
Counter({'Ammonia': 34, 'H2O': 30, 'Hydrogen': 30, 'CO2': 29, 'Methane': 28, 'Nitrogen': 13, 'HCO3-': 12, 'Benzene': 11, 'Ethylene glycol': 10, 'Ethylene': 9, 'Propylene': 9, 'Butane': 9, '1-Butanol': 9, 'Formaldehyde': 8, 'Ethylen

C12244 doesn't work
C00238 doesn't work
C18606 doesn't work
C12567 doesn't work
C18606 doesn't work
C19572 doesn't work
C19572 doesn't work
C13563 doesn't work
C17390 doesn't work
C19572 doesn't work
C12568 doesn't work
C13563 doesn't work
C19572 doesn't work
C17390 doesn't work
C17390 doesn't work
C12603 doesn't work
C17390 doesn't work
C12603 doesn't work
C17390 doesn't work
C00238 doesn't work
C19572 doesn't work
C12567 doesn't work
C19316 doesn't work
C13563 doesn't work
C12244 doesn't work
48
-----
Counter({'CO2': 35, 'Methane': 32, 'H2O': 32, 'Ammonia': 31, 'Hydrogen': 30, 'Hydrogen cyanide': 13, '1-Butanol': 13, 'Ethylene oxide': 13, 'Sodium cation': 11, 'Formaldehyde': 11, 'Ethanolamine': 11, 'L-Alanine': 11, 'Methanol': 10, '2,5-Dihydroxypyridine': 10, 'Ethanol': 10, 'Methylamine': 10, 'Carbonic acid': 10, 'HCO3-': 9, 'Propane': 9, 'Hydrogen peroxide': 9, 'Dimethylamine': 9, 'Nitric oxide': 8, 'HO-': 7, 'Propylene': 7, 'Glycolaldehyde': 7, 'Acetylene': 7, 'Benzene': 6, 'Diacet

C12568 doesn't work
C00238 doesn't work
C12567 doesn't work
C12568 doesn't work
C19572 doesn't work
C12244 doesn't work
C12567 doesn't work
C12603 doesn't work
C12568 doesn't work
C12603 doesn't work
C12244 doesn't work
C13563 doesn't work
C12603 doesn't work
C12569 doesn't work
C19316 doesn't work
C19572 doesn't work
C00238 doesn't work
C17390 doesn't work
C12244 doesn't work
C12567 doesn't work
C12568 doesn't work
C13563 doesn't work
C00238 doesn't work
C19572 doesn't work
C12569 doesn't work
C12244 doesn't work
C13563 doesn't work
C18606 doesn't work
C17390 doesn't work
C00238 doesn't work
C17390 doesn't work
C12569 doesn't work
C17390 doesn't work
C12603 doesn't work
C12567 doesn't work
C12569 doesn't work
C12244 doesn't work
C12244 doesn't work
52
-----
Counter({'H2O': 33, 'Ammonia': 30, 'CO2': 29, 'Hydrogen': 28, 'Methane': 26, 'Propane': 11, 'Hydrogen peroxide': 11, 'Propylene': 10, 'Benzene': 10, 'Sodium cation': 8, 'Oxygen': 8, 'Ethanolamine': 8, 'L-Alanine': 8, 'Nitrogen': 8,

C19316 doesn't work
C12603 doesn't work
C19572 doesn't work
C19572 doesn't work
C19316 doesn't work
C18606 doesn't work
C00238 doesn't work
C12568 doesn't work
C00238 doesn't work
C17390 doesn't work
C12569 doesn't work
C13563 doesn't work
C19572 doesn't work
C12568 doesn't work
C00238 doesn't work
C19316 doesn't work
C19316 doesn't work
C12244 doesn't work
C17390 doesn't work
C18606 doesn't work
C12567 doesn't work
C00238 doesn't work
C12244 doesn't work
C19572 doesn't work
C12244 doesn't work
C19316 doesn't work
C12567 doesn't work
C19316 doesn't work
C12568 doesn't work
C12567 doesn't work
C19316 doesn't work
C19316 doesn't work
C12568 doesn't work
C12603 doesn't work
C12569 doesn't work
C13563 doesn't work
C00238 doesn't work
C12603 doesn't work
C00238 doesn't work
C19572 doesn't work
C12567 doesn't work
C13563 doesn't work
C19572 doesn't work
C18606 doesn't work
C12568 doesn't work
C12567 doesn't work
C12569 doesn't work
C00238 doesn't work
C19316 doesn't work
C17390 doesn't work


C12569 doesn't work
C18606 doesn't work
C19316 doesn't work
C17390 doesn't work
C12569 doesn't work
C19316 doesn't work
C12568 doesn't work
C19572 doesn't work
C12567 doesn't work
C13563 doesn't work
C12603 doesn't work
C12567 doesn't work
C17390 doesn't work
C12603 doesn't work
C18606 doesn't work
C18606 doesn't work
C12244 doesn't work
C13563 doesn't work
C17390 doesn't work
C12567 doesn't work
C18606 doesn't work
C13563 doesn't work
C12568 doesn't work
C13563 doesn't work
C19572 doesn't work
C19572 doesn't work
C12568 doesn't work
C18606 doesn't work
C19316 doesn't work
C13563 doesn't work
C19316 doesn't work
C12244 doesn't work
C17390 doesn't work
C12244 doesn't work
C18606 doesn't work
C19316 doesn't work
C12603 doesn't work
C19572 doesn't work
C12603 doesn't work
C17390 doesn't work
C12569 doesn't work
C13563 doesn't work
C18606 doesn't work
C12569 doesn't work
C17390 doesn't work
C12567 doesn't work
C19572 doesn't work
C13563 doesn't work
C12569 doesn't work
C12603 doesn't work


C17390 doesn't work
C00238 doesn't work
C17390 doesn't work
C18606 doesn't work
C12567 doesn't work
C12603 doesn't work
C12569 doesn't work
C12244 doesn't work
C12603 doesn't work
C18606 doesn't work
C12569 doesn't work
C19572 doesn't work
C17390 doesn't work
C12568 doesn't work
C19316 doesn't work
C12603 doesn't work
C12244 doesn't work
C17390 doesn't work
C17390 doesn't work
C12567 doesn't work
C18606 doesn't work
C19572 doesn't work
C12567 doesn't work
C12244 doesn't work
C12568 doesn't work
C13563 doesn't work
C19572 doesn't work
C19572 doesn't work
C12567 doesn't work
C00238 doesn't work
C19572 doesn't work
C17390 doesn't work
C12567 doesn't work
C19572 doesn't work
C18606 doesn't work
C13563 doesn't work
C12244 doesn't work
C12568 doesn't work
C00238 doesn't work
C12603 doesn't work
C12603 doesn't work
C12244 doesn't work
C19572 doesn't work
C18606 doesn't work
C00238 doesn't work
C12569 doesn't work
C17390 doesn't work
C19316 doesn't work
C19572 doesn't work
C12568 doesn't work


C18606 doesn't work
C12603 doesn't work
C00238 doesn't work
C18606 doesn't work
C13563 doesn't work
C12603 doesn't work
C19572 doesn't work
C17390 doesn't work
C12569 doesn't work
C12603 doesn't work
C19572 doesn't work
C12567 doesn't work
C19316 doesn't work
C00238 doesn't work
C19572 doesn't work
C12567 doesn't work
C12603 doesn't work
C17390 doesn't work
C12568 doesn't work
C13563 doesn't work
C00238 doesn't work
C19572 doesn't work
C00238 doesn't work
C18606 doesn't work
C13563 doesn't work
C19572 doesn't work
C12568 doesn't work
C12569 doesn't work
C19316 doesn't work
C18606 doesn't work
C19572 doesn't work
C17390 doesn't work
C12568 doesn't work
C12567 doesn't work
C00238 doesn't work
C18606 doesn't work
C12603 doesn't work
C12603 doesn't work
C12568 doesn't work
C12603 doesn't work
C12567 doesn't work
C19316 doesn't work
C18606 doesn't work
C12569 doesn't work
C00238 doesn't work
C19316 doesn't work
C17390 doesn't work
C18606 doesn't work
C13563 doesn't work
C12567 doesn't work


C19572 doesn't work
C12603 doesn't work
C00238 doesn't work
C19572 doesn't work
C17390 doesn't work
C18606 doesn't work
C12568 doesn't work
C00238 doesn't work
C12567 doesn't work
C12569 doesn't work
C17390 doesn't work
C12569 doesn't work
C12568 doesn't work
C12603 doesn't work
C12569 doesn't work
C18606 doesn't work
C12603 doesn't work
C12567 doesn't work
C18606 doesn't work
C13563 doesn't work
C18606 doesn't work
C19316 doesn't work
C12569 doesn't work
C00238 doesn't work
C17390 doesn't work
C18606 doesn't work
C12603 doesn't work
C12569 doesn't work
C13563 doesn't work
C12568 doesn't work
C13563 doesn't work
C12603 doesn't work
C12567 doesn't work
C12244 doesn't work
C19572 doesn't work
C12567 doesn't work
C19316 doesn't work
C12603 doesn't work
C12568 doesn't work
C00238 doesn't work
C19572 doesn't work
C12567 doesn't work
C00238 doesn't work
C12603 doesn't work
C12244 doesn't work
C12568 doesn't work
C18606 doesn't work
C12603 doesn't work
C12569 doesn't work
C19316 doesn't work


C12244 doesn't work
C19572 doesn't work
C19316 doesn't work
C12603 doesn't work
C19316 doesn't work
C18606 doesn't work
C19316 doesn't work
C17390 doesn't work
C19316 doesn't work
C19316 doesn't work
C12568 doesn't work
C17390 doesn't work
C13563 doesn't work
C00238 doesn't work
C12568 doesn't work
C18606 doesn't work
C12603 doesn't work
C17390 doesn't work
C12569 doesn't work
C12567 doesn't work
C12244 doesn't work
C17390 doesn't work
C19316 doesn't work
C12568 doesn't work
C12603 doesn't work
C19316 doesn't work
C00238 doesn't work
C12244 doesn't work
C12603 doesn't work
C12569 doesn't work
C00238 doesn't work
C12567 doesn't work
C19572 doesn't work
C12603 doesn't work
C17390 doesn't work
C00238 doesn't work
C19316 doesn't work
C12569 doesn't work
C19572 doesn't work
C12244 doesn't work
C19572 doesn't work
C17390 doesn't work
C12567 doesn't work
C12603 doesn't work
C17390 doesn't work
C12569 doesn't work
C12569 doesn't work
C18606 doesn't work
C12568 doesn't work
C18606 doesn't work


C12603 doesn't work
C17390 doesn't work
C12603 doesn't work
C19316 doesn't work
C12568 doesn't work
C12244 doesn't work
C12244 doesn't work
C18606 doesn't work
C19572 doesn't work
C13563 doesn't work
C18606 doesn't work
C12567 doesn't work
C00238 doesn't work
C12244 doesn't work
C17390 doesn't work
C12568 doesn't work
C12569 doesn't work
C12567 doesn't work
C18606 doesn't work
C17390 doesn't work
C12244 doesn't work
C13563 doesn't work
C19316 doesn't work
C12567 doesn't work
C12568 doesn't work
C19316 doesn't work
C17390 doesn't work
C12568 doesn't work
C12603 doesn't work
C12567 doesn't work
C18606 doesn't work
C19316 doesn't work
C12568 doesn't work
C12603 doesn't work
C00238 doesn't work
C19572 doesn't work
C13563 doesn't work
C17390 doesn't work
C12603 doesn't work
C12244 doesn't work
C19572 doesn't work
C12244 doesn't work
C12569 doesn't work
C19572 doesn't work
C18606 doesn't work
C19316 doesn't work
C13563 doesn't work
C12603 doesn't work
C12569 doesn't work
C00238 doesn't work


C19572 doesn't work
C12567 doesn't work
C18606 doesn't work
C19316 doesn't work
C17390 doesn't work
C13563 doesn't work
C19316 doesn't work
C12569 doesn't work
C00238 doesn't work
C17390 doesn't work
C00238 doesn't work
C12569 doesn't work
C00238 doesn't work
C18606 doesn't work
C19572 doesn't work
C19572 doesn't work
C13563 doesn't work
C12567 doesn't work
C12568 doesn't work
C19316 doesn't work
C12244 doesn't work
C12567 doesn't work
C17390 doesn't work
C12244 doesn't work
C12568 doesn't work
C00238 doesn't work
C12568 doesn't work
C19572 doesn't work
C18606 doesn't work
C19316 doesn't work
C12603 doesn't work
C19316 doesn't work
C12568 doesn't work
C12244 doesn't work
C17390 doesn't work
C12569 doesn't work
C18606 doesn't work
C13563 doesn't work
C19316 doesn't work
C17390 doesn't work
C19572 doesn't work
C00238 doesn't work
C18606 doesn't work
C12567 doesn't work
C19572 doesn't work
C12567 doesn't work
C18606 doesn't work
C17390 doesn't work
C12244 doesn't work
C13563 doesn't work


C12569 doesn't work
C12567 doesn't work
C19316 doesn't work
C17390 doesn't work
C17390 doesn't work
C12567 doesn't work
C12244 doesn't work
C12569 doesn't work
C12603 doesn't work
C19572 doesn't work
C18606 doesn't work
C19572 doesn't work
C17390 doesn't work
C19572 doesn't work
C19316 doesn't work
C12569 doesn't work
C17390 doesn't work
C13563 doesn't work
C12567 doesn't work
C19316 doesn't work
C12244 doesn't work
C13563 doesn't work
C12603 doesn't work
C12244 doesn't work
C13563 doesn't work
C17390 doesn't work
C00238 doesn't work
C12567 doesn't work
C13563 doesn't work
C12568 doesn't work
C17390 doesn't work
C00238 doesn't work
C17390 doesn't work
C12567 doesn't work
C19572 doesn't work
C12603 doesn't work
C12568 doesn't work
C12568 doesn't work
C12603 doesn't work
C18606 doesn't work
C12244 doesn't work
C19572 doesn't work
C19316 doesn't work
C17390 doesn't work
C17390 doesn't work
C19316 doesn't work
C12567 doesn't work
C12244 doesn't work
C18606 doesn't work
C13563 doesn't work


C13563 doesn't work
C12567 doesn't work
C18606 doesn't work
C00238 doesn't work
C17390 doesn't work
C19316 doesn't work
C17390 doesn't work
C12568 doesn't work
C12569 doesn't work
C13563 doesn't work
C00238 doesn't work
C12568 doesn't work
C12569 doesn't work
C19572 doesn't work
C13563 doesn't work
C00238 doesn't work
C12568 doesn't work
C17390 doesn't work
C19316 doesn't work
C12603 doesn't work
C19572 doesn't work
C12567 doesn't work
C12603 doesn't work
C12569 doesn't work
C18606 doesn't work
C13563 doesn't work
C19316 doesn't work
C12244 doesn't work
C12603 doesn't work
C00238 doesn't work
C12569 doesn't work
C12568 doesn't work
C00238 doesn't work
C19316 doesn't work
C00238 doesn't work
C12244 doesn't work
C19572 doesn't work
C17390 doesn't work
C12567 doesn't work
C12567 doesn't work
C12567 doesn't work
C12568 doesn't work
C18606 doesn't work
C12568 doesn't work
C19316 doesn't work
C12603 doesn't work
C19316 doesn't work
C18606 doesn't work
C12569 doesn't work
C12568 doesn't work


C12603 doesn't work
C12569 doesn't work
C12603 doesn't work
C12244 doesn't work
C12567 doesn't work
C17390 doesn't work
C12567 doesn't work
C12244 doesn't work
C12569 doesn't work
C12244 doesn't work
C19316 doesn't work
C00238 doesn't work
C12603 doesn't work
C17390 doesn't work
C19572 doesn't work
C12567 doesn't work
C12603 doesn't work
C12569 doesn't work
C12569 doesn't work
C12567 doesn't work
C12244 doesn't work
C17390 doesn't work
C19316 doesn't work
C12569 doesn't work
C17390 doesn't work
C19316 doesn't work
C18606 doesn't work
C12569 doesn't work
C17390 doesn't work
C12603 doesn't work
C12603 doesn't work
C18606 doesn't work
C00238 doesn't work
C17390 doesn't work
C12568 doesn't work
C12568 doesn't work
C12568 doesn't work
C00238 doesn't work
C12244 doesn't work
C00238 doesn't work
C19316 doesn't work
C12603 doesn't work
C17390 doesn't work
C00238 doesn't work
C12569 doesn't work
C00238 doesn't work
C12568 doesn't work
C12567 doesn't work
C12569 doesn't work
C12603 doesn't work


In [11]:
#For each element: have a numpy array of the expansion sizes (for average and 95% confidence intervals)
count = 0
for cpd in life_total:
    count += 1
    print(cpd)
    cpd_array = []
    for l in life_list:
        cpd_array.append(int(l[cpd]))
    cpd_array = np.asarray(cpd_array)
    print("Average: " + str(np.average(cpd_array)))
    print("Confidence interval: " + str(sms.DescrStatsW(cpd_array).tconfint_mean(alpha=0.05))+ "\n")
    
print(count)

Ethylene
Average: 7.02
Confidence interval: (6.544740908452543, 7.495259091547456)

Sodium cation
Average: 6.91
Confidence interval: (6.410972335508798, 7.4090276644912025)

Hydrogen cyanide
Average: 6.94
Confidence interval: (6.446804054016447, 7.433195945983554)

Methanol
Average: 6.85
Confidence interval: (6.387123428148282, 7.312876571851717)

HCO3-
Average: 6.46
Confidence interval: (5.978555496321954, 6.941444503678046)

Diacetyl
Average: 6.45
Confidence interval: (5.963661351451594, 6.936338648548406)

2,5-Dihydroxypyridine
Average: 6.71
Confidence interval: (6.270100824504582, 7.149899175495418)

Propane
Average: 7.77
Confidence interval: (7.298205263552427, 8.241794736447572)

HO-
Average: 7.42
Confidence interval: (6.973919899228234, 7.866080100771766)

Propylene
Average: 7.21
Confidence interval: (6.766499387580785, 7.653500612419215)

Formaldehyde
Average: 7.19
Confidence interval: (6.677914195980376, 7.7020858040196245)

Ethanol
Average: 7.3
Confidence interval: (6.8310418