In [None]:
import os
import json
import matplotlib.pyplot as plt
import numpy as np

In [None]:
treatments = ["Untreated","Myriocin","Rapamycin","BPS"]
conds = ["WT","NCR1","NPC2"]
def path2cat(fpath):
    fpath = fpath.lower()
    treatment = "Untreated" # default, if we find another, change
    for t in treatments:
        if t.lower() in fpath:
            treatment = t
    cond = "WT"
    for c in conds:
        if c.lower() in fpath:
            cond = c.upper()
    return f"{treatment} {cond}"

In [None]:
json_data = {}
for t in treatments:
    for c in conds:
        json_data[t + " " + c] = {"Healthy":0, "Reject":0, "Sick":0}

for f in os.listdir("."):
    if ".json" in f:
        with open(f, 'r') as fp:
            cat = path2cat(f)
            data = json.load(fp)
            for k,v in data.items():
                json_data[cat][k] += v

In [None]:
json_data

In [None]:
# RUN TO COMPENSATE FOR CONFUSION MATRIX
def get_delta(data):
    delta = {'h': 0, 'r': 0, 's': 0}
    # Compensate for rejection errors.
    
    # how many of the invalid images are actually rejected?
    r_sens = 0.87538 # this many
    # find the number of 
    missing_rej = (data['Reject']/r_sens) * (1-r_sens)
    # subtract the wrongly marked valid 
    delta['h'] -= missing_rej * 0.3
    delta['s'] -= missing_rej * 0.7
    
    # how many of the non-rejected images are actually valid?
    r_spec = 0.91774 # this many
    # find the number of rejected images which should have been valid
    wrongly_rej = (data['Reject']/r_spec) * (1-r_spec)
    delta['h'] += missing_rej * 0.1
    delta['s'] += missing_rej * 0.9
    
    
    # Compensate for classification errors.
    c_sens = 0.743 # we detect this amount of sick
    missing_sick = (data['Sick']/c_sens) * (1-c_sens)
    delta['h'] -= missing_sick
    delta['s'] += missing_sick
    c_spec = 0.915
    wrong_sick = (data['Sick']/c_spec) * (1-c_spec)
    delta['h'] += wrong_sick
    delta['s'] -= wrong_sick
    return delta
for sample, data in json_data.items():
    delta = get_delta(data)
    json_data[sample] = {
        "Healthy": int(data["Healthy"] + delta["h"]),
        "Reject": int(data["Reject"] + delta["r"]),
        "Sick": int(data["Sick"] + delta["s"])
    }

In [None]:
import numpy as np
# from confusion matrix
tn, fn, fp, tp = (6757, 865, 581, 2143)
healthy_prop = (tn + fn)/(tn + fn + tp + fp) # negatives are healthy 

def sample_distribution(n=10000):
    healthy = np.random.uniform(0, 1, n) <= healthy_prop
    return (healthy * 1).mean()

# generate emprical p-value from a reference difference (e.g what is the probability that we can find a difference of at least ref_dif randomly?)
def generate_p_value(ref_dif, n=100000, sample_size=10000):
    res = 0
    for i in range(n):
        if abs(sample_distribution(n=sample_size) - sample_distribution(n=sample_size)) >= ref_dif:
            res += 1
    return res/n


for sample, data in json_data.items():
    sample_prop = data['Healthy']/(data['Healthy'] + data['Sick'])
    dif = abs(sample_prop - healthy_prop)
    sample_size = data['Healthy'] + data['Sick']
    p = generate_p_value(dif, sample_size=sample_size)
    print(f"{sample} p-value: {p}")
    
    # just calc for all, and we filter later
    for other_sample, other_data in json_data.items():
        if other_sample != sample:
            other_prop = other_data['Healthy']/(other_data['Healthy'] + other_data['Sick'])
            dif = abs(sample_prop - other_prop)
            other_size = other_data['Healthy'] + other_data['Sick']
            p2 = generate_p_value(dif, sample_size=min(sample_size, other_size))
            print(f"{sample} vs {other_sample} p-value: {p2}")

In [None]:
attributes = ["Healthy","Sick"]
labels = ["fully fused", "partially fused"]
conditions = json_data.keys()
values = {
    "Healthy": [],
    "Sick": []
}

label_map = {
    "Healthy": "Fully fused",
    "Sick": "Partially fused",
}

for condition, data in json_data.items():
    data_sum = data["Healthy"] + data["Sick"]
    for k,v in data.items():
        if k in values.keys():
            values[k].append(v/data_sum)

width = 0.5  # the width of the bars
x = np.arange(0,len(conditions)*2,2)  # the label locations

fig, ax = plt.subplots(layout='constrained', figsize=(10,5))
offset_mul = 0
for status, count in values.items():
    offset = width*offset_mul
    rects = ax.bar(x + offset, count, width, label=label_map[status])
    #ax.bar_label(rects, padding=3)
    offset_mul += 1

ax.set_ylabel('Class proportion')
ax.set_title('Partially and fully fused vacuole distribution for multiple conditions (expected error compensated)')
ax.set_xticks(x + width/2, conditions,rotation=25)
ax.set_xlabel("Sample condition")
ax.set_ylim(0, 1)
ax.legend(loc='upper left')

plt.savefig("healthy-sick-compensated.svg")
plt.show()


In [None]:
df = pd.DataFrame(columns=['acq','Healthy','Rejected','Sick'])

for acq, data in reversed(json_data.items()):
    acq_sum = data["Healthy"] + data["Sick"]
    df.loc[-1] = [acq,data["Healthy"]/acq_sum, data["Reject"],data["Sick"]/acq_sum]
    df.index = df.index + 1
    df = df.sort_index()
df_plot = df.plot(x="acq", y=["Healthy","Sick"], kind="bar", figsize=(20, 6), title="Healthy & Sick proportions")
print(df_plot)

In [None]:
df = pd.DataFrame(columns=['acq','Healthy','Rejected','Sick'])

for acq, data in json_data.items():
    acq_sum = data["Healthy"] + data["Sick"] + data["Reject"]
    df.loc[-1] = [acq,data["Healthy"]/acq_sum, data["Reject"]/acq_sum, data["Sick"]/acq_sum]
    df.index = df.index + 1
    df = df.sort_index()
df_plot = df.plot(x="acq", y=["Healthy","Sick","Rejected"], kind="bar", figsize=(20, 6), title="Class proportions")
df_plot.get_figure().savefig("all.png")

In [None]:
df = pd.DataFrame(columns=['acq','Healthy','Rejected','Sick'])

for acq, data in json_data.items():
    acq_sum = data["Healthy"] + data["Sick"]
    df.loc[-1] = [acq,(data["Healthy"]/acq_sum), data["Reject"]/acq_sum, data["Sick"]/acq_sum]
    df.index = df.index + 1
    df = df.sort_index()
df_plot = df.plot(x="acq", y=["Healthy"], kind="bar", figsize=(20, 6), title="Healthy (%)")
df_plot.get_figure().savefig("healthy-sick-diff.png")