In [1]:
from datetime import datetime # to retrieve the current time and data
import math # how to use math.pow(value, power)
import matplotlib.pyplot as plt # generate plots
import numpy as np
import os # to join strings into directory paths
import pandas as pd # dataframes
import random # generate random numbers
import statistics # to calculate the standard deviation
from scipy import stats # p value calculation - http://python-ds.com/python-p-value
from scipy.optimize import curve_fit


In [2]:
original_measurement_filename = 'exp_11-22-19_progress.txt'
original_measurement_filename = 'original_measurements.txt'
# original_measurement_filename = 'a.txt'


In [3]:
def is_number(a):
    """
    # will be True also for 'NaN'
    """

    try:
        number = float(a)
        return("yes")
    
    except ValueError:
        return("no")


In [4]:
def read_original_measurements():
    """
    
    """
    measurements = []
    readings = []
    
    with open(original_measurement_filename,'r') as ReadFile: 
        
        for row in ReadFile:
            
            row = row.split("\t")
             
            for item in row: 
                
                a = is_number(item)
                    
                if a == "yes":
                    measurements.append(float(item))
                    
                if ":" in str(item):
                    measurements.append(item)
                    
    return(measurements)


In [5]:
def build_dataframe(measurements):
    """
    
    """
    
    times = []
    readings = []
    for i in range(len(measurements)):
        
        if ":" in str(measurements[i]):
            
            time_clock = measurements[i]
            time_split = time_clock.split(":")
            
            if len(time_split) == 2:
                time_hour = float(time_split[0])*1/60 + float(time_split[1])*1/60*1/60 
            
            if len(time_split) == 3:
                time_hour = float(time_split[0]) + float(time_split[1])*1/60 + float(time_split[2])*1/60*1/60 
            
            times.append(time_hour)
            
            well_readings = measurements[i+2:i+98]
            
            readings.append(well_readings)
        
    df = pd.DataFrame(readings)
    
    return(df, times)

In [6]:
def name_well(i):
    """
    
    """
    letter_list = ['A','B','C','D','E','F','G','H','I']
    col_num = int(i/12)
    row_num = int(i%12)
    well_name = str(letter_list[col_num]) + str(row_num+1)
    
    return(well_name)
       

In [7]:
def find_extreme(b):
    """
    
    """
    
    if len(b) > 0:
        mean_b = sum(b)/len(b)
    
    else:
        mean_b = 0
    
    max_b, min_b, index_max_b, index_min_b = mean_b, mean_b, 0, 0
    
    for i in range(len(b)):
        
        if b[i] < min_b:
            min_b = b[i]
            index_min_b = i
            
        if b[i] > max_b:
            max_b = b[i]
            index_max_b = i
            
    return(max_b, min_b, index_max_b, index_min_b)


In [8]:
def truncate_list(b, times):
    """
    
    """
    max_b, min_b, index_max_b, index_min_b = find_extreme(b)
    
    truncated_b, ln_times = [], []
    
    for i in range(len(b)):
        if i >= index_min_b and i <= index_max_b:
            truncated_b.append(b[i])
            ln_times.append(times[i])
            
    return(truncated_b, ln_times )
        

In [9]:
def natural_log_list(b):
    """
    
    """
    
    ln_b = []
    
    for i in range(len(b)):
        value = b[i] / min(b)
        value = math.log(value)
        ln_b.append(value)
        
    return(ln_b)


In [10]:
def truncate_to_slope(ln_b, ln_times):
    """
    
    """
    max_b, min_b, index_max_b, index_min_b = find_extreme(ln_b)
    
    truncated_to_slope_b, truncated_to_slope_times = [], []
    
    for i in range(len(ln_b)):
        
        if ln_b[i] <= max_b:
            
            print(str(ln_b[i] / max_b))
            
            """
            if min_b == 0:
                min_for_slope = 0.99*max_b
                
            else:
                min_for_slope = 2*min_b 
            """
              
            if ln_b[i] >= min_for_slope:
                
                print(str(ln_b[i] / min_for_slope))
                    
                truncated_to_slope_b.append(ln_b[i])
                truncated_to_slope_times.append(ln_times[i]) 
            
    return(truncated_to_slope_b, truncated_to_slope_times )
        
    

In [11]:
def plot_each_well(df, times):
    """
    
    """
    
    for i in range(len(df.iloc[0, :])):
        
        well_name = name_well(i)
            
        # Initiate a new figure  
        fig_rows, fig_columns, subplot_number = 1, 2, 0
        fig = plt.figure(figsize=(fig_columns*6, fig_rows*6))
    
        b = df.iloc[:, i]
        
        max_b, min_b, index_max_b, index_min_b = find_extreme(b)
        truncated_b, ln_times = truncate_list(b, times)
        ln_b = natural_log_list(truncated_b)
        truncated_to_slope_b, truncated_to_slope_times = truncate_to_slope(ln_b, ln_times)
  
        subplot_number += 1
        plt.subplot(fig_rows, fig_columns, subplot_number)
        plt.scatter(times, b,  c = [.8,.8,.8], label = 'All')
        plt.scatter(ln_times, truncated_b, c = [.8,0,0], label = 'Truncated')
        
        plt.xlabel("Time Lapsed (minutes)")
        plt.ylabel("Kinetic Absorbance 650 nm")
        description = ('Well Name ' + str(well_name) + ' (Well No. ' + str(i+1) + ')')
        plt.title(description)
        plt.xlim([0, max(times)])
        plt.legend(loc='center left')
        
        subplot_number += 1
        plt.subplot(fig_rows, fig_columns, subplot_number)
        plt.scatter(ln_times, ln_b, c = [0,.7,.8], label = 'ln(value / intial value)')
        plt.scatter(truncated_to_slope_times, truncated_to_slope_b, c = [0,0,.8], label = 'Log Phase Growth')
        
        slope_list = []
        included_well_list = []
        included_wellname_list = []
        
        if len(ln_times) > 10:
            poly_fit = np.poly1d(np.polyfit(ln_times, ln_b, 1))
            xx = np.linspace(min(ln_times), max(ln_times), 50)
            equation_text = str("y = " + str(round(poly_fit[1],3)) + "x + " + str(round(poly_fit[0],3))) 
            print(poly_fit)
            print(equation_text)
            plt.plot(xx, poly_fit(xx), c=[0,1,0], linestyle='-', label = equation_text)
            
            slope_list.append(poly_fit[1])
            included_well_list.append(i+1)
            included_wellname_list.append(well_name)
            
        if len(truncated_to_slope_b) > 10:
            poly_fit = np.poly1d(np.polyfit(truncated_to_slope_times, truncated_to_slope_b, 1))
            xx = np.linspace(min(truncated_to_slope_times), max(truncated_to_slope_times), 50)
            equation_text = str("y = " + str(round(poly_fit[1],3)) + "x + " + str(round(poly_fit[0],3))) 
            print(poly_fit)
            print(equation_text)
            plt.plot(xx, poly_fit(xx), c=[0,1,1], linestyle='-', label = equation_text)
 
        plt.xlabel("Time Lapsed (minutes)")
        plt.ylabel("Kinetic Absorbance 650 nm")
        description = ('Well Name ' + str(well_name) + ' (Well No. ' + str(i+1) + ')')
        plt.title(description)
        plt.xlim([0, max(times)])
        plt.legend(loc='center left')
        
        save_figure_name = os.path.join( "Figures", well_name + '.jpg')
        plt.savefig(save_figure_name, bbox_inches='tight')
       
        plt.show()
        
    plt.scatter(included_well_list, slope_list)
    plt.xlabel("Well No.")
    plt.ylabel("Slope ")
    plt.show()
    
    save_figure_name = os.path.join( "Figures", 'Summary' + '.jpg')
    plt.savefig(save_figure_name, bbox_inches='tight')


In [12]:
def main():
    """
    
    """ 
    
    measurements = read_original_measurements()
    
    df, times = build_dataframe(measurements)

    plot_each_well(df, times)
    
    
    
    

In [13]:
if __name__ == "__main__":
    main()
    

0.0


NameError: name 'min_for_slope' is not defined