In [None]:
#import relevant packages
%matplotlib inline
import numpy as np
import pandas as pd #to pull data from excel files
from scipy import stats
import matplotlib.pyplot as plt #plotting 

In [None]:
def AMA_parser(file_name, skiprows, ylabel, discard_chunk, discard_misc, blanks, data_labels, plot_title, time_int = 0.25, output = "output_file.png", 
               empties = True, time_point = False, subtraction = True, averaging = False, 
               growth_curves = False, boxplots = True, save = False):
  
    #read in data with proper encoding, skipping metadata in first two and last four rows
    df = pd.read_table(file_name, encoding = "utf-16", skiprows=skiprows, skipfooter=4, engine = 'python')
    
    #discard wells we don't care about, as specified in list of strings "discard"
    #also automatically discard pre-specified empty outer wells unless empties == False
    
    if empties == True:
        
        junk_and_water_wells = ["Temperature(¡C)", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", 
               "H1", "H2", "H3", "H4", "H5", "H6", "H7", "H8", "H9", "H10", "H11", "H12", "B1", "C1", "D1", "E1", 
               "F1", "G1", "B12", "C12", "D12", "E12", "F12", "G12"]
        
        df = df.drop(junk_and_water_wells, axis = 1)
        df = df.drop(df.loc[:,discard_chunk[0]:discard_chunk[1]], axis = 1)  
        df = df.drop(discard_misc, axis = 1)  
        
    #Rename Time column and blank columns - blanks specified by user
    df = df.rename(columns = {blanks[0]:"Blank 1", blanks[1]: "Blank 2", blanks[2]:"Blank 3"})
     
    #Take average of 3 blanks and subtract from all data
    if subtraction == True:
        
        blank = (df["Blank 1"] + df["Blank 2"] + df["Blank 3"])/3
                
        d2 = []

        for i in range(len(df.columns)):
    
            if i+1 < len(df.columns):
                d = df.iloc[:,i+1] - blank
                d2.append(d)

        df = pd.DataFrame(d2).T
        
        
    #average triplicate data
    if averaging == True:
        
        aves = []
        
        for i in range(len(data_labels)):
            ave = (df.iloc[:,3*i] + df.iloc[:,3*i+1] + df.iloc[:,3*i+2])/3
            aves.append(ave)
        
        df = pd.DataFrame(aves, index = data_labels).T
        
    #Overwrite time as float in hours
    time = []
    
    for i in range(len(df)): 
        if i == 0:
            time.append(i)
        else:  
            time.append(time[i-1] + time_int)
        
    df["Time (hours)"] = time
    
    #optional plotting of triplicate-averaged growth curves when growth_curves == True
    if growth_curves == True:
        for i in range(len(df.columns)):
                df.plot.scatter(x = "Time (hours)", y = df.columns[i])
                
    #default boxplots of all data (leaves out blank)
    if boxplots == True:
        
        aves = []
        
        t = df.index[df["Time (hours)"] == time_point].values
        
        WT = (df.loc[t, 3].values + df.loc[t, 4].values + df.loc[t, 5].values)/3
        
        for i in range(len(data_labels)):
            ave = [100*df.loc[t, 3*i+3].values/WT, 
                    100*df.loc[t, 3*i+4].values/WT, 
                    100*df.loc[t, 3*i+5].values/WT]
            flattened = np.hstack(ave).tolist()
            aves.append(flattened)
        
        print(pvalues)
        
        fig = plt.figure()
        ax = fig.add_subplot(1,1,1) 
        ax.boxplot(aves, labels = data_labels)
        plt.ylabel(ylabel)
        plt.xticks(rotation = 60)
        plt.ylim(0, 125)
        plt.title(plot_title)
        plt.tight_layout()
        
    if save == True:
        plt.savefig(output)

In [None]:
#User inputs
#############################################################################################

file_name = "170517_AMA_chMRP126_dose_curve.txt"

#range of wells not used - removes all wells in between the two wells specified (inclusive)
discard_chunk = ["C11","G11"]

#additional wells to discard that don't fall within discard_chunk
discard_misc = ["B11"]

#labels corresponding to wells containing blank media
blanks = ["B2", "B3", "B4"]

#desired data labels - must include blank 
data_labels = ["WT S. epi", "2.5 uM chMRP126", "5 uM chMRP126", "7.5 uM chMRP126", "10 uM chMRP126"]

#give output file a name
output = "170524_chMRP126_dose_curve_plot.png"

#give boxplot a title
plot_title = "Chicken MRP126 antimicrobial activity vs. S. epidermidis"

ylabel = "% of WT growth at 12 hours"

ch_data = AMA_parser(file_name, 2, ylabel, discard_chunk, discard_misc, blanks, data_labels, plot_title = plot_title, 
                     output = output, time_point = 12, save = True)

ch_data

In [None]:
data_labels = ["Blank", "WT S. epi", "2.5 uM chMRP126", "5 uM chMRP126", "7.5 uM chMRP126", "10 uM chMRP126"]



ch_data2 = AMA_parser(file_name, 2, discard_chunk, discard_misc, blanks, data_labels, plot_title, averaging = True,
                 growth_curves = True, boxplots = False)

ch_data2