In [14]:
from umap import UMAP
from PIL import Image
import joblib
import numpy as np
import pandas as pd
import torch
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import SpectralClustering
import copy
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [3]:
class Cells:
    def __init__(self, mask_path):
        self.mask_path = mask_path
        self.cell_masks = Image.open(mask_path)
        self.cell_masks = torch.from_numpy(np.array(self.cell_masks))
        self.cell_number = np.unique(self.cell_masks).max()
        self.masks = [0]* self.cell_number
        self.MSI = None
        # self.profile = [0]* self.cell_number
    
    # use .cell_mask(cell_number) to call the calculation, use .masks[cell_number] to get the result afterward
    def cell_mask(self, cell_serial):
        return  self.cell_masks == cell_serial+1
    
    # add MSI profile 
    def add_profile(self, data):
        self.MSI = data
        return print('Profile added')
    
    # use .cell_profile(cell_number) to call the calculation, use .profile[cell_number] to get the result afterward
    def cell_profile(self, cell_serial):
        factor = int(self.cell_masks.shape[1]/self.MSI.shape[2])
        assert type(cell_serial) == int
        mask = self.cell_masks
        positions = torch.nonzero(mask == cell_serial+1)
        y_max = positions[:, 0].max(); y_min = positions[:, 0].min()
        x_max = positions[:, 1].max(); x_min = positions[:, 1].min()
        cell_profile =  torch.zeros(self.MSI.shape[0], y_max-y_min+1, x_max-x_min+1)
        for i in range(positions.shape[0]):
            y, x = positions[i, 0], positions[i, 1]
            cell_profile[:, y-y_min, x-x_min] = self.MSI[:, y//factor, x//factor]/(factor**2)
        return cell_profile
    
    def ave_profile(self, cell_serial):
        mask = self.cell_masks
        positions = torch.nonzero(mask == cell_serial+1)
        size = positions.shape[0]
        ave_profile =  torch.sum(self.cell_profile(cell_serial), dim=(1,2))/size 
        return ave_profile
    
    def cell_position(self, cell_serial):
        mask = self.cell_masks
        positions = torch.nonzero(mask == cell_serial+1)
        size = positions.shape[0]
        y_max = positions[:, 0].max(); y_min = positions[:, 0].min()
        x_max = positions[:, 1].max(); x_min = positions[:, 1].min()
        return (y_min, y_max, x_min, x_max), size
    
    def abstract_profile(self, cell_serial):
        factor = int(self.cell_masks.shape[1]/self.MSI.shape[2])
        assert type(cell_serial) == int
        mask = self.cell_masks
        positions, counts = torch.unique(torch.nonzero(mask == cell_serial+1)//factor, dim=0, return_counts=True)
        y_max = positions[:, 0].max(); y_min = positions[:, 0].min()
        x_max = positions[:, 1].max(); x_min = positions[:, 1].min()
        cell_profile =  torch.zeros(self.MSI.shape[0], y_max-y_min+1, x_max-x_min+1)
        for i in range(positions.shape[0]):
            y, x = positions[i, 0], positions[i, 1]
            cell_profile[:, y-y_min, x-x_min] = self.MSI[:, y, x] * (counts[i].item() / (factor**2))
        return cell_profile


In [6]:
mask_img = Image.open(r'E:\CloudDrive\OneDrive - genseccoltd\Projects\cellpose\gancao30\mask\aufn moved 8 bit_cp_masks 7262.png')
mask = np.array(mask_img).astype(float)

In [7]:
data = joblib.load(r'E:\CloudDrive\OneDrive - genseccoltd\Projects\vscode-jupyter\gancaofusion-data\size_list, data, data_dr, Profile_list_dr')[1]

In [10]:
cells = Cells(r'E:\CloudDrive\OneDrive - genseccoltd\Projects\cellpose\gancao30\mask\aufn moved 8 bit_cp_masks 7262.png')
cells.add_profile(torch.tensor(data))

Profile added


In [19]:
df = pd.DataFrame(columns=range(901), index=range(7262))

for i in range(7262):
    df.iloc[i, :] = cells.ave_profile(i)
    
    if (i+1)%1000 == 0:
        print(i, ' done')

999  done
1999  done
2999  done
3999  done
4999  done
5999  done
6999  done


In [21]:
joblib.dump(df, r'E:\CloudDrive\OneDrive - genseccoltd\Projects\vscode-jupyter\gancaofusion-data\df_901_ave_profile')

['E:\\CloudDrive\\OneDrive - genseccoltd\\Projects\\vscode-jupyter\\gancaofusion-data\\df_901_ave_profile']

In [22]:
data_dr = joblib.load(r'E:\CloudDrive\OneDrive - genseccoltd\Projects\vscode-jupyter\gancaofusion-data\size_list, data, data_dr, Profile_list_dr')[2]
cells = Cells(r'E:\CloudDrive\OneDrive - genseccoltd\Projects\cellpose\gancao30\mask\aufn moved 8 bit_cp_masks 7262.png')
cells.add_profile(torch.tensor(data_dr))

Profile added


In [23]:
df_dr = pd.DataFrame(columns=range(10), index=range(7262))

for i in range(7262):
    df_dr.iloc[i, :] = cells.ave_profile(i)
    
    if (i+1)%1000 == 0:
        print(i, ' done')

999  done
1999  done
2999  done
3999  done
4999  done
5999  done
6999  done


In [24]:
joblib.dump(df_dr, r'E:\CloudDrive\OneDrive - genseccoltd\Projects\vscode-jupyter\gancaofusion-data\df_10_ave_profile')

['E:\\CloudDrive\\OneDrive - genseccoltd\\Projects\\vscode-jupyter\\gancaofusion-data\\df_10_ave_profile']

In [26]:
df_dr.shape

(7262, 10)

In [30]:
def get_image(mask, arr, cell_number):
    img = np.zeros(mask.shape)
    for i in range(cell_number):
        img[mask == i+1] = arr[i]
    return img

def segmentation1(dr_df, cluster_numbers, mask, cell_number):
    
    seglist = []

    for n_clusters in cluster_numbers:
        
        cluster = KMeans(n_clusters=n_clusters, random_state=0).fit(dr_df)
        y_pred = cluster.labels_ + 1

        seg_pic = get_image(mask, y_pred, cell_number)

        seglist.append(seg_pic)
        
    return seglist

In [None]:
segs = segmentation1(df_dr, range(3,19), mask, 7262)

In [None]:
fig, axes = plt.subplots(4,4
                       ,figsize=(40,35)
                       ,subplot_kw = {"xticks":[],"yticks":[]} 
                       )
for i in range(0,16):
    picture=[*axes.flat][i].imshow(data.seg_pics_group_1['normal_seg'][i], cmap=plt.cm.get_cmap('tab20', i+4))
    divider = make_axes_locatable([*axes.flat][i])
    cax = divider.append_axes("bottom", size="5%", pad=0.05)
    plt.colorbar(picture, cax=cax, ticks=range(0,i+4), orientation = "horizontal")
plt.tight_layout()