## Covid cell infection statistics calculation

### folder and files viariables

In [None]:
import os
image_folder = '/home/haoxu/data/Images'
processed_folder = image_folder + "_processed"
quantify_csv_file = os.path.join(processed_folder, 'quantify_all.csv')
condition_xlsx_file = '../../data/Plate map test plates 2020-11-25.xlsx'
os.path.exists(condition_xlsx_file)

### run segmentation and quantification

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from cellpose_applications.covid_screening.covid_screening_quantify import quantify_all_fovs, combine_all_quantify

In [None]:
#quantify_all_fovs(image_folder)

### combine csv data of all images

In [None]:
#combine_all_quantify(processed_folder)

### write the treatments to data

In [None]:
import os
import pandas as pd
import numpy as np
from cellpose_applications.covid_screening.covid_screening_quantify import write_conditions

In [None]:
# write the conditions to csv
write_conditions(quantify_csv_file, condition_xlsx_file)

### define cell infection

In [None]:
import pandas as pd
import os
from imageio import imread
from matplotlib import pyplot as plt
from skimage import segmentation, morphology, exposure
import numpy as np

In [None]:
df = pd.read_csv(quantify_csv_file)

In [None]:
# use the max 4 percent in infection channel
plt.hist(df.last4percentmean.to_list(), range=(0, 15000), bins=800) # adjust the range for better visualization

In [None]:
#plt.hist(df.cell_mean.to_list(), range=(0, 6000), bins=800)

In [None]:
#plt.hist(df.cell_integ.to_list(), range=(0, 4E6), bins=800)

In [None]:
# define the threshold value based on the plot above
threshold_value = 4200

In [None]:
# define which cells are infected
df  = df.assign(**dict.fromkeys(["Infected"], 0))
df.loc[df.last4percentmean > threshold_value, 'Infected'] = 1

### verify cell _infection

In [None]:
well_fovs = list(set(df.well_fov.to_list()))
#well_fovs

In [None]:
import random
from cellpose_applications.covid_screening.covid_screening_quantify import show_infections

In [None]:
# show a random sample to see if the infected cells are correctly picked out.
# if not, fine tune the threshold_value and run
well_fov = random.choice(well_fovs) #'r14c24f02'
print(well_fov)
img = show_infections(df, well_fov, dim_er=True, contour=True, show_er=True)
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(40, 40))
axes.imshow(img/img.max())

### plot plate map for infections

In [None]:
from cellpose_applications.covid_screening.covid_screening_quantify import plate_plots

In [None]:
plate_plots(processed_folder, infection_threshold=threshold_value)

### get analysis result

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
infection_data = os.path.join(processed_folder, 'infection_data.csv')
df_infection = pd.read_csv(infection_data)
#df_infection

In [None]:
# plot the layout for treatments for the plate
compound_array = np.full((16, 24), 'Empty')
for row in range(1, 17):
    for column in range(1, 25):
        well_id = f"r{row:02d}c{column:02d}"
        #print(well_id)
        df_well = df_infection[df_infection.well_id==well_id]
        if len(df_well):
            compound_array[row - 1, column - 1] = df_well.Compound.to_list()[0]
            #compound_array.append(df_well.Compound.to_list()[0])
            #print(a)
        else:
            continue

fig, axs = plt.subplots(16, 24, figsize=(28, 16))
rows = 'abcdefghijklmnop'.upper()
for row in range(16):
    for column in range(24):
        axs[row, column].text(
            0.5,
            0.5,
            compound_array[row, column],
            transform=axs[row, column].transAxes,
            fontsize=12,
            horizontalalignment='center',
            verticalalignment='center')
        axs[row, column].set_yticks([])
        axs[row, column].set_xticks([])
        if not column:
            axs[row, column].set_ylabel(rows[row], rotation=0, fontsize=25, verticalalignment='center')
        if row==15:
            #axs[row, column].invert_yaxis()
            axs[row, column].set_xlabel(str(column + 1), rotation=0, fontsize=25)
plt.tight_layout()
plt.show()

### calculate z-prime statistic

In [None]:
df_DMSO = df_infection.loc[df_infection['Compound']=='DMSO']
df_DMSO = df_DMSO.assign(**dict.fromkeys(["DMSO_control"], np.nan))

DMSO_control_columns = ['23', '24']
df_DMSO["DMSO_control"] = df_DMSO.well_id.apply(lambda item: 1 if item[4:] in DMSO_control_columns else 0)

wells_noninfection = df_DMSO[df_DMSO["DMSO_control"]==1]
wells_noninfection_rates = wells_noninfection.infection_rates.to_list()
DMSO_control_rates_stat = {
    'mean': np.mean(wells_noninfection_rates),
    'std': np.std(wells_noninfection_rates)
}

wells_infection = df_DMSO[df_DMSO["DMSO_control"]==0]
wells_infection_rates = wells_infection.infection_rates.to_list()
DMSO_infection_rates_stat = {
    'mean': np.mean(wells_infection_rates),
    'std': np.std(wells_infection_rates)
}

print('DMSO_infection_rates_stat: ', DMSO_infection_rates_stat)
print('DMSO_control_rates_stat: ', DMSO_control_rates_stat)
z_factor = 1 - (
    3 * (DMSO_infection_rates_stat['std'] + DMSO_control_rates_stat['std']) / \
    np.abs(DMSO_infection_rates_stat['mean'] - DMSO_control_rates_stat['mean'])
)
print('z factor: ', z_factor)