In [20]:
# imports
import pandas as pd
import numpy as np
import glob, os
import matplotlib.pyplot as plt

In [24]:
# fetch excel file names
path = "IJ3"
excel_files = []
for file in os.listdir(path):
    if file.endswith(".xlsx"):
        excel_files.append(os.path.join(path, file))
print(excel_files)

['IJ3/IJ2_1_lyo.xlsx', 'IJ3/IJ2_1.xlsx', 'IJ3/IJ3.xlsx', 'IJ3/IJ2_1_v2.xlsx']


In [25]:
# read in excel files into dataframes
dfs = []
for excel_file in excel_files:
    print(excel_file)
    xls = pd.ExcelFile(excel_file)
    dfs.append(pd.read_excel(xls, 'Raw Data', header=23)[23:].reset_index().drop('index', axis=1))

IJ3/IJ2_1_lyo.xlsx
IJ3/IJ2_1.xlsx
IJ3/IJ3.xlsx
IJ3/IJ2_1_v2.xlsx


In [26]:
# truncate start data
truncate_index = []
for df in dfs:
    for i in range(len(df)):
        if (df.iloc[i]['Response Trace 4'] > -100):
            truncate_index.append(i)
            break
for i in range(len(dfs)):
    dfs[i] = dfs[i][truncate_index[i]:].dropna().reset_index().drop('index', axis=1)

In [27]:
dfs_new = []
for j in range(len(dfs)):
    rt_corrected = []
    for i in range(len(dfs[j])):
        x = dfs[j]['RT (mins)'][i]
        y_curr = dfs[j]['Response Trace 4'][i]
        rt_corrected.append(y_curr - (dfs[j].loc[0]['Response Trace 4'] + dfs[j].loc[len(dfs[j]) - 1]['Response Trace 4'])/(35)*x)
    dfs_new.append(pd.concat([dfs[j], pd.DataFrame({'RT Corrected': rt_corrected})], axis=1, sort=False))

In [28]:
# plot data on separate plots and save as png 
for i in range(len(dfs)):
    plt.figure()
    #plt.plot(dfs[i]['RT (mins)'], dfs[i]['Response Trace 1'], label='RT 1')
    #plt.plot(dfs[i]['RT (mins)'], dfs[i]['Response Trace 2'], label='RT 2')
    #plt.plot(dfs[i]['RT (mins)'], dfs[i]['Response Trace 3'], label='RT 3')
    #plt.plot(dfs[i]['RT (mins)'], dfs[i]['Response Trace 4'], 'k-', label='RT 4')
    plt.plot(dfs_new[i]['RT (mins)'], dfs_new[i]['RT Corrected'], 'k-', lw=0.5, label='RT Corrected')
    plt.xlabel("time (mins)")
    plt.ylabel("nRIU")
    #plt.legend()
    #plt.grid()
    plt.savefig(excel_files[i].replace(".xlsx", ""), dpi=300, bbox_inches='tight')
plt.close('all')

In [29]:
# plot all data on 1 overlay plot and save as png 
plt.figure()
plt.xlabel("time (mins)")
plt.ylabel("nRIU")
#plt.grid(b=None)
for i in range(len(dfs)):
    plt.plot(dfs_new[i]['RT (mins)'], dfs_new[i]['RT Corrected'], '-', lw=0.5, label=excel_files[i].replace(".xlsx", ""))
plt.legend()
plt.savefig(path + '/overlay', dpi=300, bbox_inches='tight')
plt.close('all')

In [30]:
# zoomed in region of overlay plot saved as a png 
L_cut = 2500
R_cut = 3050
plt.figure()
plt.xlabel("time (mins)")
plt.ylabel("nRIU")
#plt.grid(b=None)
for i in range(len(dfs)):
    plt.plot(dfs_new[i]['RT (mins)'][L_cut:R_cut], dfs_new[i]['RT Corrected'][L_cut:R_cut], '-', lw=0.5, label=excel_files[i].replace(".xlsx", ""))
plt.legend()
plt.savefig(path + '/zoomed_overlay', dpi=300, bbox_inches='tight')
plt.close('all')

In [36]:
# integrate zoomed region (i.e. peaks) to get area ratios
for i in range(len(dfs)):
    print(excel_files[i].replace(".xlsx", "") + ' Integrated Peak Area:', np.sum(dfs_new[i]['RT Corrected'][L_cut:R_cut]))

IJ3/IJ2_1_lyo Integrated Peak Area: 591054.9354821619
IJ3/IJ2_1 Integrated Peak Area: 540565.0381226097
IJ3/IJ3 Integrated Peak Area: 230695.35791081557
IJ3/IJ2_1_v2 Integrated Peak Area: 632872.6023414368


In [42]:
#IJ3 Specific: take ratio of pure RHP peak : Leftover RHP peak areas
print('% RHP Lost =', (1 - 230695.35791081557/ ((591054.9354821619+540565.0381226097+632872.6023414368)/3))*100 )
  

% RHP Lost = 60.77704813457117
