In [1]:
from datetime import datetime # to retrieve the current time and data
import math # how to use math.pow(value, power)
import matplotlib.pyplot as plt # generate plots
import numpy as np
import os # to join strings into directory paths
import pandas as pd # dataframes
import random # generate random numbers
import statistics # to calculate the standard deviation
from scipy import stats # p value calculation - http://python-ds.com/python-p-value
from scipy.optimize import curve_fit


In [2]:
original_measurement_filename = 'original_measurements.txt'
well_description_file = "each_well_description.txt"

spore_count_list = [.1, 1, 10, 100]
alanine_list = [0, 1]


In [3]:
def is_number(a):
    """
    # will be True also for 'NaN'
    """

    try:
        number = float(a)
        return("yes")
    
    except ValueError:
        return("no")


In [4]:
def list_all_well_names():
    """
    Read and sort all the wellnames from the metadata file
    """
    
    # Initialize list of all wellnames 
    list_of_well_names = []
    
    # Open the file describing each well used in the experiment
    with open(well_description_file,'r') as ReadFile:    
        for row in ReadFile:
            row = row.split(",")
            
            # If the row of the file begin with a number
            # Add the second item in the comma separated list to a running list
            a = is_number(row[0]) 
            if a == "yes":
                list_of_well_names.append(row[1])
                
    
    # Return the list of well names
    return(list_of_well_names)
                    

In [5]:
def read_original_measurements():
    """
    
    """
    measurements = []
    readings = []
    
    with open(original_measurement_filename,'r') as ReadFile: 
        
        for row in ReadFile:
            
            row = row.split("\t")
             
            for item in row: 
                
                a = is_number(item)
                    
                if a == "yes":
                    measurements.append(float(item))
                    
                if ":" in str(item):
                    measurements.append(item)
                    
    return(measurements)


In [6]:
def build_dataframe_of_measurements():
    """
    
    """
    measurements = read_original_measurements()
    
    times = []
    readings = []
    for i in range(len(measurements)):
        
        if ":" in str(measurements[i]):
            
            time_clock = measurements[i]
            time_split = time_clock.split(":")
            
            if len(time_split) == 2:
                time_hour = float(time_split[0])*1/60 + float(time_split[1])*1/60*1/60 
            
            if len(time_split) == 3:
                time_hour = float(time_split[0]) + float(time_split[1])*1/60 + float(time_split[2])*1/60*1/60 
            
            times.append(time_hour)
            
            well_readings = [time_hour]
            
            # well_readings = measurements[i+1:i+98]
            
            for item in measurements[i+1:i+98]:
                well_readings.append(item)
            
            readings.append(well_readings)
        
    df = pd.DataFrame(readings)
    
    column_names = ['Time', 'Temperature']
    list_of_well_names = list_all_well_names()
    for well_name in list_of_well_names:
        column_names.append(well_name)
        
    df.columns = column_names

    return(df)

In [7]:
def calculate_coefficient_of_well(time, measurements):
    """
    
    """
    
    coefficient_list = []
    for i in range(len(time)):
        
        if time[i] >= 10 and time[i] <= 20:
        
            coefficient = time[i] * math.log( measurements[i] / min(measurements), 10)
            coefficient_list.append(coefficient)
    
    coefficient_avg = sum(coefficient_list) /len(coefficient_list)
    
    return coefficient_avg

In [8]:
def retrieve_well_description(well_number):
    """
    From the well number, 
    refer to a refereence file also saved in the program folder,
    return the well name, spore count, and alanine molarity
    """
    
     
    well_name, spore_count, alanine_mM  = 0, 0, 0
    
    with open(well_description_file,'r') as ReadFile: 
        
        for row in ReadFile:
            row = row.split(",")
            
            a = is_number(row[0]) 
            
            if a == "yes":
                if int(row[0]) == int(well_number):
                    
                    well_name = row[1]
                    spore_count = float(row[2])
                    alanine_mM = float(row[3])    
    
    return well_name, spore_count, alanine_mM 
    

In [9]:
def retrieve_well_description(well_number):
    """
    From the well number, 
    refer to a refereence file also saved in the program folder,
    return the well name, spore count, and alanine molarity
    """
    
     
    well_name, spore_count, alanine_mM  = 0, 0, 0
    
    with open(well_description_file,'r') as ReadFile: 
        
        for row in ReadFile:
            row = row.split(",")
            
            a = is_number(row[0]) 
            
            if a == "yes":
                if int(row[0]) == int(well_number):
                    
                    well_name = row[1]
                    spore_count = row[2]
                    alanine_mM = row[3]    
    
    return well_name, spore_count, alanine_mM 
    

In [10]:
def assign_color_experiment_parameters(well_num):
    """
    
    """
    color_set = [1,1,1]
    
    well_name, spore_count, alanine_mM = retrieve_well_description(well_num)
    
    # Adjust color for spore count variable
    if float(spore_count) == 100: 
        color_set[1] = 0.05
        
    elif float(spore_count) == 10: 
        color_set[1] = 0.3
    
    elif float(spore_count) == 1: 
        color_set[1] = 0.6
        
    elif float(spore_count) == .1: 
        color_set[1] = 0.95
        
    # Adjust color for alanine variable
    if float(alanine_mM) == 1: 
        color_set[0] = 0.95
        color_set[2] = 0.05

    else: 
        color_set[0] = 0.05
        color_set[2] = 0.95
            
    return(color_set)
    

In [11]:
def build_dataframe_of_coefficients(df_measurements):
    """
    
    """
    
    df_row = []
    
    for i in range(len(df_measurements.iloc[0,:])):
        if i >= 2: 
            coefficient = calculate_coefficient_of_well(df_measurements.iloc[:,0], df_measurements.iloc[:,i])
        
            well_name, spore_count, alanine_mM = retrieve_well_description(i+1)
            
            description_and_coefficients = [well_name, spore_count, alanine_mM, coefficient]
            
            df_row.append(description_and_coefficients)
            
    
    df_coefficients = pd.DataFrame(df_row)
    df_coefficients.columns = ["Well Name", "Spore Count (mln)", "Alanine (mM)", "Coefficient (1/hr)"]
    
    return df_coefficients

In [12]:
def plot_coefficients(df_coefficients):
    """
    
    """
    # Initiate a new figure  
    fig_rows, fig_columns, subplot_number = 2, 2, 0
    fig = plt.figure(figsize=(fig_columns*8, fig_rows*6))
         
   
    spore_count = df_coefficients.iloc[:,1]
    alanine_count = df_coefficients.iloc[:,2]
    coefficient = df_coefficients.iloc[:,3]
            
    subplot_number += 1
    plt.subplot(fig_rows, fig_columns, subplot_number)
    for alanine in alanine_list: 
        summary_spore, summary_coefficient = [], []
        for i in range(len(spore_count)):
            if float(alanine) == float(alanine_count[i]):
                summary_coefficient.append(coefficient[i])
                summary_spore.append(float(spore_count[i]))
        plt.scatter(summary_spore, summary_coefficient, label = str(str(alanine) + " Alanine (mM)"))
      
    plt.xscale('log')
    plt.xlim([.005, 150])
    plt.xlabel("Spore Count (mln)")
    plt.ylabel("Growth Coefficients (1/hr)")
    plt.title("The Effect of Alanine Concentration")
    plt.legend()
    
    
    subplot_number += 1
    plt.subplot(fig_rows, fig_columns, subplot_number)
    for alanine in alanine_list: 
        summary_spore, summary_coefficient = [], []
        
        for spore in spore_count_list: 
        
            for i in range(len(spore_count)):
            
                if float(alanine) == float(alanine_count[i]) and float(spore) == float(spore_count[i]):
                    summary_coefficient.append(coefficient[i])
                    summary_spore.append(spore_count[i])
        
            plt.scatter(summary_spore, summary_coefficient, label = str(str(alanine) + " Alanine (mM)"))
            # avg = sum(summary_coefficient) / len(summary_coefficient)
            # plt.scatter(, avg, label = str("Avg" + str(round(avg,4))), s = 120, marker = 'X')
    
    plt.xscale('log')
    plt.xlim([.005, 150])
    plt.xlabel("Spore Count (mln)")
    plt.ylabel("Growth Coefficients (1/hr)")
    plt.title("The Effect of Alanine Concentration")
    # plt.legend()
    
    subplot_number += 1
    plt.subplot(fig_rows, fig_columns, subplot_number)
    for alanine in alanine_list: 
        summary_spore, summary_coefficient = [], []
        
        for spore in spore_count_list: 
        
            for i in range(len(spore_count)):
            
                if float(alanine) == float(alanine_count[i]) and float(spore) == (spore_count[i]):
                    summary_coefficient.append(coefficient[i])
                    summary_spore.append(spore_count[i])
        
            plt.boxplot(summary_coefficient, spore_count_list.index(spore))
           
    # plt.xscale('log')
    # plt.xlim([.005, 150])
    plt.xlabel("Spore Count (mln)")
    plt.ylabel("Growth Coefficients (1/hr)")
    plt.title("The Effect of Alanine Concentration")
    # plt.legend()
    plt.show()

                

In [13]:
def plot_control_group(df_measurements, df_coefficients, i, group_type):
    """
    
    """
    well_name, spore_count, alanine_mM = retrieve_well_description(i+1)
     
    if group_type != "same": 
            if float(alanine_mM) > 0: 
                alanine_mM = 0 

            else: 
                alanine_mM = 1
                
    color_set = [1, .95, .95]
    if float(alanine_mM) == 0: 
        color_set = [.9, .95, 1]
                    
    description = ( str(spore_count) + '  mln Spores + '+ str(alanine_mM) + ' mM Alanine')
  
    
    time_all, well_readings_all = [], []
    
    for j in range(len(df_coefficients.iloc[:,0])):
                
        spore = df_coefficients.iloc[j,1]
        alanine = df_coefficients.iloc[j,2]
                
        if float(spore) == float(spore_count) and float(alanine) == float(alanine_mM):
            
            time = df_measurements.iloc[:,0]
            well_readings = df_measurements.iloc[:,j+2]
            
            for item in time:
                time_all.append(item)
            
            for item in well_readings:
                well_readings_all.append(item)
        
    plt.scatter(time_all, well_readings_all, c = color_set, label = description)
        

In [14]:
def calculate_best_fit_line(df_measurements, df_coefficients, i):
    """
    
    """
    
    time = df_measurements.iloc[:,0]
    well_readings = df_measurements.iloc[:,i]
    
    well_name, spore_count, alanine_mM = retrieve_well_description(i+1)
    description = ( str(spore_count) + '  mln Spores + '+ str(alanine_mM) + ' mM Alanine')
  
    growth_coefficient = df_coefficients.iloc[i-2,3]
    xx = np.linspace(min(time), max(time), 50)
    
    xx_matched, yy = [], []
    for x in xx: 
    
        y = min(well_readings) *math.exp(x*growth_coefficient)
        
        if y < 1.5*max(well_readings):
            xx_matched.append(x)
            yy.append(y)
        
    color_set = assign_color_experiment_parameters(i-1)  
    plt.scatter(time, well_readings, c = color_set, label = "Measurements")
    plt.plot(xx_matched,yy, c = color_set, linestyle = ':', label = "Best Fit")
        
    return(xx, yy)

In [15]:
def plot_measurements_from_each_well():
    """
    
    """
    
    # Build a dataframe of the timestamped measurements, temperature, and well readings
    df_measurements = build_dataframe_of_measurements()
    
    # Build dataframe with growth coefficient 
    df_coefficients = build_dataframe_of_coefficients(df_measurements)
    
      
        
    for i in range(len(df_measurements.iloc[0,:])):
        if i >= 2: 
            
            # Initiate a new figure  
            fig_rows, fig_columns, subplot_number = 3, 3, 0
            fig = plt.figure(figsize=(fig_columns*8, fig_rows*6))
       
            while subplot_number < fig_columns * fig_rows:
    
                subplot_number += 1
                plt.subplot(fig_rows, fig_columns, subplot_number)
            
                if subplot_number%fig_columns == 1: 
 
                    time = df_measurements.iloc[:,0]
                    well_readings = df_measurements.iloc[:,i]
            
                    color_set = assign_color_experiment_parameters(i-1) 
                    well_name, spore_count, alanine_mM = retrieve_well_description(i+1)
                    description = ('Well ' + str(well_name) + ' (' +  str(spore_count) + '  mln Spores + '+ str(alanine_mM) + ' mM Alanine)')
     
                    if subplot_number > 2:
                        plot_control_group(df_measurements, df_coefficients, i, 'not same')
            
                    if subplot_number > 1: 
                        plot_control_group(df_measurements, df_coefficients, i, 'same')
            
                    if subplot_number > 0:
                        plt.scatter(time, well_readings, c = color_set, label = description)
            
                    plt.xlabel("Time Lapsed (hours)")
                    plt.ylabel("Kinetic Absorbance 650 nm")
                    plt.title("Measurements of " + description)
                    plt.legend()
                    
                
                if subplot_number%fig_columns == 2: 
                    
                    
                
                if subplot_number%fig_columns == 0: 
                    
                    xx, yy = calculate_best_fit_line(df_measurements, df_coefficients, i)
                    
                    plt.xlabel("Time Lapsed (hours)")
                    plt.ylabel("Kinetic Absorbance 650 nm")
                    plt.title("Measurements of " + description)
                    plt.legend()
                
                

            save_figure_name = os.path.join( "Figures", "Each Well", well_name + '.png')
            plt.savefig(save_figure_name, bbox_inches='tight')
            
            save_figure_name = os.path.join( "Report", well_name + '.png')
            plt.savefig(save_figure_name, bbox_inches='tight')
  
            plt.show()
            
    

IndentationError: expected an indented block (<ipython-input-15-5fabe443dbfe>, line 54)

In [None]:
def main():
    """
    
    """
    
    # Is the program executing?
    print("Executing program.")
    
     
    # Build a dataframe of the timestamped measurements, temperature, and well readings
    df_measurements = build_dataframe_of_measurements()
    
    # Build dataframe with growth coefficient 
    df_coefficients = build_dataframe_of_coefficients(df_measurements)
    
    # Plot the measurements and growth coefficients from each well
    plot_measurements_from_each_well()
    
    # Plot the growth coefficients
    plot_coefficients(df_coefficients)
    
    
    
   
     

In [None]:


if __name__ == "__main__":
    main()
    
    