In [1]:
import pandas as pd
import json
import os
from collections import Counter
import itertools
import re
import csv
import random
import statsmodels.stats.weightstats as sms

import matplotlib.pyplot as mpl
import seaborn as sns

import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import numpy as np

### Read Files

Read compounds found on Enceladus (Enceladus_Compounds_and_Concentrations), compounds necessary for life (Freileich), and files associated with each run.

In [2]:
#read seed set KEGG labels and names into a dictionary
with open("Enceladus_Compounds_and_Concentrations.csv") as f2:
  next(f2)
  lb = []
  reader = csv.reader(f2, skipinitialspace=True)
  for row in reader:
    lb.append(tuple([row[0], row[2]]))

lb = dict(lb)
print(lb)

{'C00011': 'CO2', 'C01407': 'C6H6', 'C00014': 'NH3', 'C00283': 'H2S', 'C00282': 'H2', 'C06548': 'C2H4O', 'C01438': 'CH4', 'C00697': 'N2', 'C06547': 'C2H4', 'C00132': 'CH4O', 'C00067': 'CH2O', 'C11505': 'C3H6', 'C01548': 'C2H2', 'C00469': 'C2H6O', 'C00001': 'H20', 'C20783': 'C3H8', 'C01326': 'HCN', 'C00237': 'CO'}


In [29]:
#Revised KEGG compounds and names
data = json.load(open("containskegg_and_majorspecies.json"))
lb = []

cpd_name_df = pd.read_csv("accessible_compounds.csv")
for cpd in data["Contains_KEGGID"]:
    df_row = cpd_name_df[cpd_name_df["Compound"] == cpd]
    lb.append(tuple([cpd, df_row.iloc[0]["Name"]]))
    
lb = dict(lb)
print(lb)
    

{u'C00027': 'Hydrogen peroxide', u'C12568': 'Potassium hydroxide', u'C18606': 'Potassium bicarbonate', u'C00283': 'Hydrogen sulfide', u'C00282': 'Hydrogen', u'C00007': 'Oxygen', u'C00288': 'HCO3-', u'C00001': 'H2O', u'C01380': 'Ethylene glycol', u'C01387': 'Octane', u'C06548': 'Ethylene oxide', u'C19316': 'Allyl chloride', u'C00741': 'Diacetyl', u'C06142': '1-Butanol', u'C01328': 'HO-', u'C00533': 'Nitric oxide', u'C01438': 'Methane', u'C06547': 'Ethylene', u'C11505': 'Propylene', u'C00238': 'Potassium cation', u'C01548': 'Acetylene', u'C00218': 'Methylamine', u'C00479': 'Propanal', u'C13563': 'Sodium chloride', u'C12603': 'Sodium bicarbonate', u'C01326': 'Hydrogen cyanide', u'C00011': 'CO2', u'C00014': 'Ammonia', u'C00132': 'Methanol', u'C01059': '2,5-Dihydroxypyridine', u'C00067': 'Formaldehyde', u'C21390': 'Butane', u'C20783': 'Propane', u'C19572': 'Silica', u'C12567': 'Magnesium oxide', u'C05979': 'Propane-1-ol', u'C12569': 'Sodium hydroxide', u'C01407': 'Benzene', u'C00266': 'Glyc

In [3]:
#define life-specifc reactions
freilich = "links/Freilich09.json"
with open(freilich) as fr:
   datajsonfr = json.load(fr)
    
lc = datajsonfr.keys()
print(lc)

[u'C00025', u'C00024', u'C00148', u'C00105', u'C00362', u'C00020', u'C00360', u'C00008', u'C15672', u'C00006', u'C00005', u'C00004', u'C00003', u'C00002', u'C00144', u'C00079', u'C00364', u'C05980', u'C00458', u'C00399', u'C01050', u'C00407', u'C00239', u'C16221', u'C00748', u'C00054', u'C00055', u'C00350', u'C00641', u'C06040', u'C00073', u'C00062', u'C00234', u'C00063', u'C00097', u'C00135', u'C00037', u'C00078', u'C00035', u'C05764', u'C00015', u'C00016', u'C00112', u'C00065', u'C00116', u'C00152', u'C00131', u'C00064', u'C00001', u'C00075', u'C00123', u'C00459', u'C05899', u'C00082', u'C00249', u'C00255', u'C00286', u'C05890', u'C05894', u'C00043', u'C00041', u'C00047', u'C00188', u'C00044', u'C00049', u'C00183']


### Open and organize expansions

Find all expansions that result in life-specific compounds 

In [10]:
#folder = "5nw", etc... (classification of each analysis)
def multiple_files(folder, n_files):
    #life reaction array
    life_reactions = []

    #files that contain expanded networks
    ex_files = []

    #Open path
    path = "results/formatted/"+folder
    files = os.listdir(path)
    
    for filename in random.sample(files, n_files):
        if ".json" in filename:
            with open(path + "/" + filename) as f:
              datajson = json.load(f)
              #Organize data by each generation
              generations = pd.DataFrame(datajson["generations"])
              generations = generations.transpose()
              generations.index = generations.index.astype(int)
              generations = generations.sort_index()

              #Test
              #generations.head()

              #List of cumulative compounds
              cum_compounds = generations.loc[: , "compounds_cumulative"]
              fc = cum_compounds.tail(1).tolist()

              fc = fc[0]
              #print(fc)

              #find all life-specifc reactions
              life = list(set(fc).intersection(lc))

              #add reactions to the list of total reactions found in previous expansions
              life_reactions.append(life)

              #Get the number of the run if the number of life reactions is greater than 0
              #Purpose - to find the seed compounds in original.dat file
              if (len(life) != 0):
                run = re.findall('\d+', filename)
                ex_files.append(int(run[0]))

    return life_reactions, ex_files


In [5]:
#label compounds from KEGG
#count the number of reactions that occur
def label_and_count(lifereactions, life_reactions):
  #read output KEGG labels and names into a dictionary##
  file = open("links/Freilich09.json")
  file_data = file.read()
  output_labels = json.loads(file_data)

  #NOTE: sum only useful for first time through - after, comment it out
  lifereactions = sum(lifereactions, [])

  #translate KEGG labels into formulas
  for l in lifereactions:
    life_reactions.append(output_labels[l])


  #count the number of times a compound occurs
  life_count = Counter(life_reactions)
  return life_count

### Find distribution of seed sets that result in expanded networks

In [15]:
#access data file
def data_file(file, labels, ex_files):
  path = file

  #list of all generated seeds
  seeds = []
  s = open(path)
  for line in s:
    seeds.append(line.split())
  s.close()

  #find all seed sets that resulted in an expanded network
  life_seeds = []
  lifeseeds = [] #properly labeled version
  for n in ex_files:
    life_seeds.append(seeds[n])
  
  #sum only works first time through
  life_seeds = sum(life_seeds, [])
  
  #translate KEGG labels into formulas
  for l in life_seeds:
    try:
        lifeseeds.append(lb[l])
    except:
        print(l + " doesn't work")

  life_seed_count = Counter(lifeseeds)
  
  return life_seed_count

In [12]:
def analyze(filename, n):
  #analyze the runs stored in a folder
  lifereactions, ex_files = multiple_files(filename, n)
  #count the number of times each compound appears
  life_count = label_and_count(lifereactions, [])
  dat_file = "seeds/rseeds_" + filename + ".txt"
  life_seed_count = data_file(dat_file, lb, ex_files)
 
  return life_seed_count

In [30]:
#analyze data folders
#first analysis
#count_10nw = analyze("10nw", 25)
#count_5nw = analyze("5nw", 25)
#count_5w = analyze("5w", 25)
#count_10w = analyze("10w", 25)
count_10nw_random = analyze("10nw_random", 25)
count_10nw_fixed = analyze("10nw_fixed", 25)

#life_total = count_5nw + count_5w + count_10w + count_10nw
life_total = count_10nw_random + count_10nw_fixed
life_list = []
life_list.append(life_total)

#Loop through 100 random samples (1st is above) to obtain a full sample
for i in range(99):
#     count_5nw = analyze("5nw", 25)
#     count_5w = analyze("5w", 25)
#     count_10w = analyze("10w", 25)
#     count_10nw = analyze("10nw", 25)
    count_10nw_random = analyze("10nw_random", 25)
    count_10nw_fixed = analyze("10nw_fixed", 25)

    #life = count_5nw + count_5w + count_10w + count_10nw
    life = count_10nw_random + count_10nw_fixed
    life_total += life
    life_list.append(life)
    print(str(i) + "\n-----")
    print(life)
    print("-----")
    
print("TOTAL")
print(life_total)

0
-----
Counter({'Ammonia': 31, 'H2O': 31, 'CO2': 29, 'Hydrogen': 28, 'Methane': 27, 'Propylene': 15, 'Magnesium oxide': 11, 'Formaldehyde': 11, 'Oxygen': 11, 'Glycolaldehyde': 11, 'Potassium cation': 10, 'L-Alanine': 10, 'Polybutene': 9, 'Sodium hydroxide': 9, 'Nitric oxide': 9, 'CO': 9, '3-Buten-1-amine': 9, 'Ethylene': 8, 'Methanol': 8, '2,5-Dihydroxypyridine': 8, 'Hydrogen peroxide': 8, 'Carbonic acid': 8, 'HO-': 7, 'Ethanol': 7, '1-Butanol': 7, 'Silica': 7, 'Ethylene oxide': 7, 'Sodium bicarbonate': 6, 'Acetylene': 6, 'HCO3-': 6, 'Nitrogen': 6, 'Ethylene glycol': 6, 'Propane-1-ol': 6, 'Potassium hydroxide': 5, 'Sodium cation': 5, 'Hydrogen cyanide': 5, 'Sodium chloride': 5, 'Diacetyl': 5, 'Propane': 5, 'Methylamine': 5, 'Allyl chloride': 5, 'Dimethylamine': 5, 'Octane': 5, 'Butane': 4, 'Benzene': 4, 'Ethanolamine': 3, 'Propanal': 3, 'Potassium bicarbonate': 3, 'Hydrogen sulfide': 2})
-----
1
-----
Counter({'Ammonia': 34, 'CO2': 33, 'Hydrogen': 30, 'H2O': 29, 'Methane': 27, 'Allyl 

10
-----
Counter({'H2O': 31, 'Methane': 29, 'CO2': 28, 'Hydrogen': 28, 'Ammonia': 27, 'HO-': 11, 'Formaldehyde': 11, 'Sodium chloride': 10, 'Potassium bicarbonate': 10, 'Benzene': 10, 'Acetylene': 10, 'Octane': 10, '2,5-Dihydroxypyridine': 9, '1-Butanol': 9, 'Hydrogen peroxide': 9, 'Potassium cation': 9, 'Polybutene': 8, 'Hydrogen cyanide': 8, 'Propanal': 8, '3-Buten-1-amine': 8, 'Oxygen': 8, 'Ethylene oxide': 8, 'Ethylene glycol': 8, 'Magnesium oxide': 7, 'Diacetyl': 7, 'Hydrogen sulfide': 7, 'Methylamine': 7, 'Sodium hydroxide': 7, 'Nitrogen': 7, 'Propane-1-ol': 7, 'Methanol': 6, 'HCO3-': 6, 'Propylene': 6, 'Butane': 6, 'Glycolaldehyde': 6, 'L-Alanine': 6, 'Potassium hydroxide': 5, 'Sodium cation': 5, 'Silica': 5, 'Propane': 5, 'Nitric oxide': 5, 'Ethanol': 5, 'Allyl chloride': 5, 'Ethanolamine': 5, 'Dimethylamine': 5, 'Sodium bicarbonate': 4, 'Carbonic acid': 4, 'CO': 3, 'Ethylene': 2})
-----
11
-----
Counter({'CO2': 31, 'Ammonia': 31, 'H2O': 29, 'Methane': 28, 'Hydrogen': 27, 'Magn

20
-----
Counter({'Methane': 31, 'H2O': 30, 'CO2': 30, 'Hydrogen': 30, 'Ammonia': 29, 'Carbonic acid': 12, 'Nitrogen': 11, 'Acetylene': 10, 'Ethylene glycol': 10, 'Potassium hydroxide': 9, 'Sodium cation': 9, 'Hydrogen sulfide': 9, 'Potassium cation': 9, 'Sodium chloride': 8, 'Silica': 8, 'HO-': 8, 'Formaldehyde': 8, '3-Buten-1-amine': 8, 'Nitric oxide': 8, 'Oxygen': 8, 'Allyl chloride': 8, 'L-Alanine': 8, 'Magnesium oxide': 7, 'Sodium hydroxide': 7, 'Sodium bicarbonate': 7, 'CO': 7, 'Ethanol': 7, 'Glycolaldehyde': 7, 'Polybutene': 6, 'Methanol': 6, 'Propane': 6, 'Propylene': 6, '1-Butanol': 6, 'Benzene': 6, 'Ethylene oxide': 6, 'HCO3-': 6, 'Hydrogen peroxide': 6, 'Ethylene': 5, 'Diacetyl': 5, 'Propanal': 5, 'Butane': 5, 'Ethanolamine': 5, 'Propane-1-ol': 5, 'Hydrogen cyanide': 4, 'Methylamine': 4, '2,5-Dihydroxypyridine': 4, 'Dimethylamine': 4, 'Octane': 4, 'Potassium bicarbonate': 3})
-----
21
-----
Counter({'Methane': 31, 'Ammonia': 31, 'H2O': 28, 'Hydrogen': 28, 'CO2': 27, 'Potassi

30
-----
Counter({'H2O': 32, 'Ammonia': 32, 'CO2': 31, 'Methane': 30, 'Hydrogen': 30, 'Formaldehyde': 11, 'Ethylene glycol': 11, 'Diacetyl': 10, 'Ethanol': 10, '3-Buten-1-amine': 10, 'Sodium hydroxide': 10, 'Magnesium oxide': 9, 'Sodium cation': 9, 'Hydrogen cyanide': 9, 'CO': 9, 'L-Alanine': 9, 'Ethylene oxide': 9, 'Hydrogen peroxide': 9, 'Carbonic acid': 9, 'Propane-1-ol': 9, 'Silica': 8, 'Sodium bicarbonate': 8, 'Methylamine': 8, 'Hydrogen sulfide': 8, 'Dimethylamine': 8, 'Nitric oxide': 7, '1-Butanol': 7, 'Allyl chloride': 7, 'Propanal': 7, 'Acetylene': 7, 'Octane': 7, 'Potassium hydroxide': 6, '2,5-Dihydroxypyridine': 6, 'Propane': 6, 'HO-': 6, 'Propylene': 6, 'Oxygen': 6, 'Glycolaldehyde': 6, 'Benzene': 6, 'Nitrogen': 6, 'HCO3-': 5, 'Butane': 5, 'Ethanolamine': 5, 'Ethylene': 4, 'Methanol': 4, 'Potassium bicarbonate': 4, 'Polybutene': 3, 'Sodium chloride': 3, 'Potassium cation': 3})
-----
31
-----
Counter({'Hydrogen': 31, 'Ammonia': 31, 'CO2': 29, 'H2O': 29, 'Methane': 27, '3-But

40
-----
Counter({'H2O': 30, 'CO2': 30, 'Hydrogen': 30, 'Methane': 28, 'Ammonia': 28, 'Propylene': 15, 'Potassium hydroxide': 13, 'Oxygen': 13, 'Magnesium oxide': 12, 'L-Alanine': 11, 'Sodium hydroxide': 11, 'HO-': 10, 'CO': 10, 'Glycolaldehyde': 10, 'Allyl chloride': 10, 'Nitrogen': 10, 'Formaldehyde': 9, '3-Buten-1-amine': 9, 'Hydrogen sulfide': 9, 'Benzene': 9, 'Dimethylamine': 9, 'Hydrogen peroxide': 9, 'Ethylene glycol': 9, 'Hydrogen cyanide': 8, '2,5-Dihydroxypyridine': 8, 'Ethanol': 8, 'Ethylene oxide': 8, 'Octane': 8, 'Polybutene': 7, 'HCO3-': 7, 'Ethanolamine': 7, 'Propanal': 7, 'Acetylene': 7, 'Diacetyl': 6, 'Methylamine': 6, 'Potassium bicarbonate': 6, 'Sodium cation': 5, 'Methanol': 5, 'Butane': 5, 'Carbonic acid': 5, 'Sodium chloride': 4, 'Nitric oxide': 4, '1-Butanol': 4, 'Silica': 4, 'Potassium cation': 4, 'Propane-1-ol': 4, 'Ethylene': 3, 'Propane': 3, 'Sodium bicarbonate': 3})
-----
41
-----
Counter({'Ammonia': 32, 'Methane': 29, 'H2O': 28, 'CO2': 28, 'Hydrogen': 28, '

50
-----
Counter({'Ammonia': 32, 'CO2': 31, 'Hydrogen': 31, 'H2O': 29, 'Methane': 28, 'Ethanolamine': 12, 'Methylamine': 11, 'L-Alanine': 11, 'Hydrogen peroxide': 11, 'Sodium chloride': 10, 'Diacetyl': 10, 'Propylene': 10, 'Polybutene': 9, 'Sodium cation': 9, 'Sodium bicarbonate': 9, '3-Buten-1-amine': 9, 'Butane': 9, 'Potassium cation': 9, 'Hydrogen sulfide': 9, 'Propanal': 9, 'Ethylene glycol': 9, 'Propane-1-ol': 9, '2,5-Dihydroxypyridine': 8, 'Potassium bicarbonate': 8, 'Allyl chloride': 8, 'Silica': 8, 'Benzene': 8, 'Dimethylamine': 8, 'Carbonic acid': 8, 'Ethylene': 7, 'Hydrogen cyanide': 7, 'Ethanol': 7, 'Oxygen': 7, 'Magnesium oxide': 6, 'Potassium hydroxide': 6, 'HO-': 6, 'Formaldehyde': 6, 'Ethylene oxide': 6, 'Nitrogen': 6, 'Octane': 6, 'Propane': 5, 'CO': 5, 'Acetylene': 5, 'Sodium hydroxide': 5, 'HCO3-': 4, 'Nitric oxide': 4, '1-Butanol': 4, 'Methanol': 3, 'Glycolaldehyde': 3})
-----
51
-----
Counter({'Hydrogen': 33, 'Ammonia': 32, 'H2O': 31, 'Methane': 30, 'CO2': 28, 'Sodi

60
-----
Counter({'H2O': 34, 'Methane': 30, 'CO2': 28, 'Hydrogen': 28, 'Ammonia': 28, 'Carbonic acid': 15, 'HO-': 13, 'Dimethylamine': 12, 'Nitrogen': 12, 'Octane': 12, 'Polybutene': 11, 'Hydrogen cyanide': 11, 'Propanal': 11, 'Sodium hydroxide': 11, 'CO': 10, 'HCO3-': 9, 'Propylene': 9, 'Benzene': 9, '1-Butanol': 9, 'Ethanolamine': 9, 'Ethylene glycol': 9, 'Ethylene': 8, 'Formaldehyde': 8, '3-Buten-1-amine': 8, 'Potassium cation': 8, 'Glycolaldehyde': 8, 'Methylamine': 8, 'Potassium hydroxide': 7, 'Propane': 7, 'Butane': 7, 'L-Alanine': 7, 'Potassium bicarbonate': 7, 'Sodium bicarbonate': 6, 'Oxygen': 6, 'Hydrogen sulfide': 6, 'Allyl chloride': 6, '2,5-Dihydroxypyridine': 6, 'Magnesium oxide': 5, 'Sodium chloride': 5, 'Methanol': 5, 'Ethylene oxide': 5, 'Propane-1-ol': 5, 'Sodium cation': 4, 'Silica': 4, 'Acetylene': 4, 'Diacetyl': 3, 'Ethanol': 3, 'Hydrogen peroxide': 3, 'Nitric oxide': 1})
-----
61
-----
Counter({'CO2': 31, 'Hydrogen': 31, 'Methane': 30, 'H2O': 30, 'Ammonia': 30, 'D

70
-----
Counter({'Ammonia': 34, 'Hydrogen': 33, 'CO2': 29, 'H2O': 28, 'Methane': 27, 'Diacetyl': 12, 'Octane': 12, 'HO-': 11, 'Benzene': 11, 'Dimethylamine': 11, 'Carbonic acid': 10, 'Acetylene': 10, '2,5-Dihydroxypyridine': 10, 'Nitrogen': 10, 'Magnesium oxide': 9, 'Silica': 9, 'Ethanol': 9, 'Hydrogen peroxide': 9, 'Propane-1-ol': 9, 'Sodium bicarbonate': 8, '1-Butanol': 8, 'L-Alanine': 8, 'Sodium hydroxide': 8, 'Propanal': 7, 'Propylene': 7, 'Ethanolamine': 7, 'Methylamine': 7, 'Ethylene': 6, 'Polybutene': 6, 'Hydrogen cyanide': 6, 'Sodium chloride': 6, 'Methanol': 6, 'HCO3-': 6, 'Propane': 6, 'Nitric oxide': 6, '3-Buten-1-amine': 6, 'Oxygen': 6, 'Hydrogen sulfide': 6, 'Ethylene oxide': 6, 'Potassium cation': 6, 'Sodium cation': 5, 'Formaldehyde': 5, 'Butane': 5, 'Allyl chloride': 5, 'Potassium hydroxide': 4, 'Glycolaldehyde': 4, 'Potassium bicarbonate': 4, 'Ethylene glycol': 4, 'CO': 3})
-----
71
-----
Counter({'CO2': 35, 'H2O': 33, 'Methane': 33, 'Hydrogen': 33, 'Ammonia': 29, 'Pr

80
-----
Counter({'Ammonia': 33, 'H2O': 30, 'Methane': 29, 'CO2': 29, 'Hydrogen': 27, 'Ethylene glycol': 12, 'Dimethylamine': 12, 'Butane': 10, 'Methylamine': 10, 'Hydrogen peroxide': 10, 'Polybutene': 9, 'CO': 9, 'Carbonic acid': 9, 'Potassium cation': 9, 'Propane': 8, 'Benzene': 8, 'Glycolaldehyde': 8, 'Hydrogen sulfide': 8, 'Allyl chloride': 8, 'Magnesium oxide': 7, 'Potassium hydroxide': 7, 'Hydrogen cyanide': 7, 'Potassium bicarbonate': 7, 'Sodium bicarbonate': 7, 'Propylene': 7, 'Formaldehyde': 7, 'Ethanolamine': 7, '2,5-Dihydroxypyridine': 7, 'Acetylene': 7, 'Sodium cation': 6, 'HO-': 6, 'Nitric oxide': 6, '1-Butanol': 6, 'Oxygen': 6, 'Ethylene': 5, 'Sodium chloride': 5, 'Methanol': 5, 'Sodium hydroxide': 5, 'Diacetyl': 5, 'Propanal': 5, 'Ethanol': 5, '3-Buten-1-amine': 5, 'HCO3-': 5, 'Nitrogen': 5, 'Octane': 5, 'Propane-1-ol': 5, 'Silica': 4, 'L-Alanine': 4, 'Ethylene oxide': 4})
-----
81
-----
Counter({'H2O': 34, 'Ammonia': 31, 'Methane': 30, 'Hydrogen': 28, 'CO2': 27, 'Ethano

90
-----
Counter({'Hydrogen': 31, 'H2O': 30, 'CO2': 29, 'Methane': 28, 'Ammonia': 28, 'Carbonic acid': 13, 'Propane': 12, 'L-Alanine': 12, 'Sodium cation': 11, 'Potassium cation': 11, 'Potassium hydroxide': 10, 'HO-': 10, 'Oxygen': 10, 'Acetylene': 10, 'Ethylene oxide': 10, 'Dimethylamine': 10, 'Ethylene': 9, 'Ethanolamine': 9, 'Potassium bicarbonate': 9, 'Butane': 9, 'Hydrogen sulfide': 9, 'Nitrogen': 9, 'Propylene': 8, 'Benzene': 8, 'Silica': 8, 'Hydrogen cyanide': 7, 'Propanal': 7, 'CO': 7, 'Glycolaldehyde': 7, 'Sodium hydroxide': 7, 'Propane-1-ol': 7, 'Methanol': 6, 'Methylamine': 6, 'Hydrogen peroxide': 6, 'Magnesium oxide': 5, 'Polybutene': 5, '2,5-Dihydroxypyridine': 5, 'Formaldehyde': 5, '3-Buten-1-amine': 5, '1-Butanol': 5, 'Ethanol': 5, 'Allyl chloride': 5, 'Ethylene glycol': 5, 'Sodium chloride': 4, 'Diacetyl': 4, 'Sodium bicarbonate': 4, 'Nitric oxide': 4, 'Octane': 4, 'HCO3-': 2})
-----
91
-----
Counter({'Hydrogen': 32, 'CO2': 31, 'Ammonia': 29, 'H2O': 28, 'Methane': 27, '

In [31]:
#For each element: have a numpy array of the expansion sizes (for average and 95% confidence intervals)
for cpd in life_total:
    print(cpd)
    cpd_array = []
    for l in life_list:
        cpd_array.append(int(l[cpd]))
    cpd_array = np.asarray(cpd_array)
    print("Average: " + str(np.average(cpd_array)))
    print("Confidence interval: " + str(sms.DescrStatsW(cpd_array).tconfint_mean(alpha=0.05))+ "\n")

Ethylene
Average: 6.89
Confidence interval: (6.389539922866823, 7.390460077133176)

Magnesium oxide
Average: 7.6
Confidence interval: (7.113150545041968, 8.086849454958031)

Polybutene
Average: 6.89
Confidence interval: (6.436216826888917, 7.343783173111082)

Potassium hydroxide
Average: 6.36
Confidence interval: (5.882620211918406, 6.837379788081595)

Sodium cation
Average: 7.07
Confidence interval: (6.573240763738481, 7.56675923626152)

Hydrogen cyanide
Average: 7.14
Confidence interval: (6.716005726133075, 7.563994273866925)

Sodium chloride
Average: 6.19
Confidence interval: (5.700954058168288, 6.6790459418317125)

Methanol
Average: 6.98
Confidence interval: (6.471589076239621, 7.48841092376038)

Sodium hydroxide
Average: 7.2
Confidence interval: (6.705049332428365, 7.694950667571636)

Diacetyl
Average: 6.17
Confidence interval: (5.693007744620132, 6.6469922553798675)

Ethanolamine
Average: 7.4
Confidence interval: (6.89786997927567, 7.90213002072433)

Propanal
Average: 6.44
Confid