In [1]:
# how do aggregated differ across time and space
# aggregates have regions of dead and alive cells - a couple images to show that - showed
# aggregates are made up of multiple nuclei - image to show that. python or something - showed
# aggregate composition may be different across different treatement types and times...across
# across time and treatment type nuclei may be of different intensities, distributed across the cells

# Are these linear relationships or not 
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import tiffile as tiff
import glob
import re
import random
from skimage.feature import SIFT as sift
import itertools
import anndata as ad
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import datetime

random.seed(0)


def blockshaped(arr, nrows, ncols):
    """
    Return an array of shape (n, nrows, ncols) where
    n * nrows * ncols = arr.size

    If arr is a 2D array, the returned array should look like n subblocks with
    each subblock preserving the "physical" layout of arr.
    """
    h, w = arr.shape
    assert h % nrows == 0, f"{h} rows is not evenly divisible by {nrows}"
    assert w % ncols == 0, f"{w} cols is not evenly divisible by {ncols}"
    return (arr.reshape(h//nrows, nrows, -1, ncols)
               .swapaxes(1,2)
               .reshape(-1, nrows, ncols))


def entropy(binary_image):
    cancer_cells = np.sum(binary_image * 1)
    col_divided = blockshaped(binary_image,100,100)
    
    list_of_entropies = []
    
    for instance in col_divided:
        number_of_cells = np.sum(instance * 1)
        #print(number_of_cells)
        if number_of_cells != 0:
            small_n = np.sum(number_of_cells * 1)
            ent_time = small_n / cancer_cells
            frame = ent_time * np.log(ent_time)
            list_of_entropies.append(-frame)
        else:
            list_of_entropies.append(0)
    return(list_of_entropies)


def sorted_nicely( l ): 
    """ Sort the given iterable in the way that humans expect.""" 
    convert = lambda text: int(text) if text.isdigit() else text 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)



pixel_thresh = 3.5
lop_off_inteval = 116

In [2]:
rasa_slider = []

high_et = ['B','D','F','H','J']
for i in range(2,14,2):
    rasas = [str(i)]
    combos = list(itertools.product(high_et, rasas))
    newlist = ["".join(item) for item in combos ]
    rasa_slider.append(newlist)

print(rasa_slider)

low_high_rasa = [rasa_slider[1],rasa_slider[-2]]

# use very small subset of wells for proof of concept
# low_high_rasa = [['B4', 'D4'], ['B10', 'D10']]

# create list of integer donor IDs that we wish to use
donor_ids = [1,2,3,4]
# donor_ids = [1]

[['B2', 'D2', 'F2', 'H2', 'J2'], ['B4', 'D4', 'F4', 'H4', 'J4'], ['B6', 'D6', 'F6', 'H6', 'J6'], ['B8', 'D8', 'F8', 'H8', 'J8'], ['B10', 'D10', 'F10', 'H10', 'J10'], ['B12', 'D12', 'F12', 'H12', 'J12']]


In [3]:
low_high_rasa

[['B4', 'D4', 'F4', 'H4', 'J4'], ['B10', 'D10', 'F10', 'H10', 'J10']]

In [4]:
def plot_representative_fn(well_id, donor_id, time_point, trim=100, lab_folder_path='/gladstone/engelhardt/lab/', save_path=None):
    '''
    Given a well id, donor id, and time point, plot the representative bright field + RFP image
    '''
    # create a list of all the image file paths that correspond to this donor_id
    donor_location_phase = lab_folder_path + "MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor{}/phase_registered/*tif".format(donor_id)
    donor_location_red = lab_folder_path + "MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor{}/red_registered/*tif".format(donor_id)
    files_phase = glob.glob(donor_location_phase)
    files_red = glob.glob(donor_location_red)

    # find the list of file paths that correspond to the specified well_id
    # store all image paths from this well in a list, sorted by timepoint
    #phase
    matching = [s for s in files_phase if (well_id + "_") in s]
    sorted_file_list_phase = (sorted_nicely(matching))
    # #red
    matching = [s for s in files_red if (well_id + "_") in s]
    sorted_file_list_red = (sorted_nicely(matching))

    # subset the list to the specified time point and load the images
    resized_latish_phase = tiff.imread(sorted_file_list_phase[time_point])
    resized_latish_red = tiff.imread(sorted_file_list_red[time_point])
    # threshold the red channel into a binary mask
    red_frame = resized_latish_red > 3.5

    # normalize intensity of the phase image
    phase_frame = cv.normalize(resized_latish_phase, None, 0, 255, cv.NORM_MINMAX).astype('uint8') 

    # plot the phase image with the red mask superimposed
    plt.figure(figsize=(6,6))
    trim=100
    plt.imshow(red_frame[trim:-trim,trim:-trim], cmap='Reds', alpha = 1.0)
    plt.imshow(phase_frame[trim:-trim,trim:-trim], cmap='gray', alpha = .75)
    plt.title('Well: {}, Donor: {}, Time: {}'.format(well_id,donor_id,time_point), size = 12)
    # remove the axes ticks and labels
    plt.axis('off')

    if save_path is not None:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.show()
    
    return


# plot_representative_fn(well_id='B4', donor_id=1, time_point=25, save_path=None)

In [5]:


# store a list of all the adata objects from each image
adata_list = []

# set the appropriate lab folder path based on the location of the data
# lab_folder_path = '/Volumes/Engelhardt-Lab/'
lab_folder_path = '/gladstone/engelhardt/lab/'

for donor_number in donor_ids:
    donor_location_phase = lab_folder_path + "MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor{}/phase_registered/*tif".format(donor_number)
    donor_location_red = lab_folder_path + "MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor{}/red_registered/*tif".format(donor_number)

    print(donor_location_phase)
    files_phase = glob.glob(donor_location_phase)
    files_red = glob.glob(donor_location_red)

    rasa_status = 0
    for rasa_selection in low_high_rasa:
        for sliding_rasa in rasa_selection:

            # find all the file names for the red and phase channels of this donor and well
            # phase
            matching = [s for s in files_phase if (sliding_rasa + "_") in s]
            sorted_file_list_phase = (sorted_nicely(matching))
            # red
            matching = [s for s in files_red if (sliding_rasa + "_") in s]
            sorted_file_list_red = (sorted_nicely(matching))
            print(sliding_rasa,donor_number,len(sorted_file_list_red))

            # loop through the time points
            max_times = len(sorted_file_list_phase)
            
            # # arbitrarily set max_times to 2 for now
            # max_times = 2

            # loop through all the time points
            for t in range(max_times):

                # load the images
                resized_latish_phase = tiff.imread(sorted_file_list_phase[t])
                resized_latish_red = tiff.imread(sorted_file_list_red[t])
                # threshold the red channel into a binary mask
                aggregate_threshed = resized_latish_red > 3.5    
                
                # normalize intensity of the phase image
                phase_frame = cv.normalize(resized_latish_phase, None, 0, 255, cv.NORM_MINMAX).astype('uint8') 

                # compute the entropy and area of the red mask
                red_frame = aggregate_threshed
                red_resized = red_frame[16:-16,16:-16]
                red_entropy = np.sum(entropy(red_resized))
                p_areas = np.sum((red_frame * 1).ravel())

                # detect SIFT keypoints and extract descriptors from the phase image
                descriptor_extractor = sift()
                descriptor_extractor.detect_and_extract(phase_frame)
                keypoints1 = descriptor_extractor.keypoints
                descriptors1 = descriptor_extractor.descriptors
                scales1 = descriptor_extractor.octaves

                # create AnnData object from the SIFT descriptors
                temp_adata = ad.AnnData(X=descriptors1)

                # add metadata to the AnnData object
                temp_adata.obs['donor_id'] = donor_number
                temp_adata.obs['time'] = t
                temp_adata.obs['rasa'] = rasa_status
                temp_adata.obs['well_id'] = sliding_rasa
                temp_adata.obs['entropy'] = red_entropy
                temp_adata.obs['p_areas'] = p_areas
                temp_adata.obs['filename'] = sorted_file_list_phase[t]
                temp_adata.obs['scales'] = scales1
                temp_adata.obs['x'] = keypoints1[:,0]
                temp_adata.obs['y'] = keypoints1[:,1]

                # add a sift_ prefix to the variable names
                temp_adata.var_names = ['sift_{}'.format(i) for i in range(128)]

                # store the adata object for this image
                adata_list.append(temp_adata)

                # raise ValueError("stop here")

            rasa_status = rasa_status + 1
    

# concanenate all the adata objects by row
adata = ad.concat(adata_list, join='outer')

adata

/gladstone/engelhardt/lab/MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor1/phase_registered/*tif
B4 1 67


D4 1 67


F4 1 67


H4 1 67


J4 1 67


B10 1 67


D10 1 67


F10 1 67


H10 1 67


J10 1 67


/gladstone/engelhardt/lab/MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor2/phase_registered/*tif
B4 2 66


D4 2 66


F4 2 66


H4 2 66


J4 2 66


B10 2 66


D10 2 66


F10 2 66


H10 2 67


J10 2 67


/gladstone/engelhardt/lab/MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor3/phase_registered/*tif
B4 3 68


D4 3 68


F4 3 68


H4 3 68


J4 3 68


B10 3 68


D10 3 68


F10 3 68


H10 3 68


J10 3 68


/gladstone/engelhardt/lab/MarsonLabIncucyteData/AnalysisFiles/4DonorAssay/registered_images/Donor4/phase_registered/*tif
B4 4 68


D4 4 68


F4 4 68


H4 4 68


J4 4 68


B10 4 68


D10 4 68


F10 4 68


H10 4 68


J10 4 68


  utils.warn_names_duplicates("obs")


AnnData object with n_obs × n_vars = 7202103 × 128
    obs: 'donor_id', 'time', 'rasa', 'well_id', 'entropy', 'p_areas', 'filename', 'scales', 'x', 'y'

In [6]:
# print the number of rows in adata that have well_id of B4 and t=1
adata.obs[(adata.obs['well_id'] == 'B4') & (adata.obs['time'] == 0)].shape

(4731, 10)

In [7]:
# normalize the SIFT detectors
scaler = StandardScaler()
X=adata.X
scaler.fit(X)
X=scaler.transform(X)    

# run pca using 30 components
pca = PCA(n_components=30)
x_new = pca.fit_transform(X)

# add the pca coordinates to the adata object
adata.obsm['X_pca'] = x_new



In [8]:
# extract the current date in the format YYYYMMDD
current_date = datetime.datetime.now().strftime("%Y%m%d")
current_date

# save the adata object
adata.write('/gladstone/engelhardt/lab/adamw/saft_figuren/analysis/adata_{}.h5ad'.format(current_date))