In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pyflowbat as pfb

In [None]:
mpl.style.use(pfb.pyflowbat._std_vals.std_pfb_style)

In [None]:
pfb_data = pd.read_csv("ebrc-example-output.csv")

In [None]:
flowjo_data = pd.read_csv("flowjo-analysis-formatted.csv")

In [None]:
cellline_dict = {
    373: "no synTF",
    664: "VP64 ZF1",
    665: "VPR ZF1",
    666: "p65 ZF1",
    667: "p65trunc2 ZF1",
    668: "VP64 ZF6",
    669: "VP64 ZF8",
    670: "VP64 ZF9",
    671: "VP64 ZF10",
    672: "VP64 ZF15"
}

In [None]:
import numpy as np

slopes = []
rsquared_adjs = []

for cellline in [373, 664, 665, 666, 667, 668, 669, 670, 671, 672]:
    print(f"cell line: {cellline}")
    for col in ["MEFLs", "MEPTRs"]:
        print(f"col: {col}")
        xdata = np.array(flowjo_data.loc[flowjo_data['line'] == cellline][col])
        ydata = np.array(pfb_data.loc[pfb_data['line'] == cellline][col])
        model = sm.OLS(xdata, ydata)
        results = model.fit()
        slope = results.params[0]
        slopes.append(slope)
        rsquared_adj = results.rsquared_adj
        rsquared_adjs.append(rsquared_adj)
        print(f"slope: {slope}")
        print(f"adj r^2: {rsquared_adj}")
        fig = plt.figure()
        ax = plt.gca()
        x = xdata
        ax.plot(x, x, 'k:', label="y=x", zorder=0, linewidth=2)
        ax.plot(x, x * slope, 'b', label=f"y={slope:.2f}x; rsquard_adj={rsquared_adj:.3f}", zorder=1, linewidth=2)
        ax.scatter(xdata, ydata, color='red', marker='o', zorder=2)
        ax.set_yscale('log')
        ax.set_xscale('log')
        ax.set_title(f"{cellline_dict[cellline]}: {col} expression")
        ax.set_ylabel("PyFlowBAT quantified expression")
        ax.set_xlabel("Manual analysis quantified expression")
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),
          fancybox=True, shadow=True, ncol=5)
        fig.savefig(f"parityplot_{cellline}_{col}.png", dpi=500, bbox_inches ="tight")
    print()
        

In [None]:
print(f"Slopes:\nmean: {np.mean(slopes)}\nstd: {np.std(slopes)}\nsem: {np.std(slopes, ddof=1) / np.sqrt(np.size(slopes))}")

In [None]:
print(f"Adjusted R^2's:\nmean: {np.mean(rsquared_adjs)}\nstd: {np.std(rsquared_adjs)}\nsem: {np.std(rsquared_adjs, ddof=1) / np.sqrt(np.size(rsquared_adjs))}")