In [33]:
# import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import re
import h5py
import napari
import tifffile as tiff
import seaborn as sns
import pickle

In [34]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
p_dir = (Path().cwd().parents[0]).absolute()

module_path = str(p_dir / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

In [36]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
fig_dir = (Path().cwd().parents[0] / 'figure').absolute()

df_meta_path = data_dir / '9PPI Cell Culture' / 'Whole' / 'metadata' /  'imgs_reg_norm.csv'
df_imgs = pd.read_csv(df_meta_path)

# Motif counts 

In [37]:
import networkx as nx
from sklearn.preprocessing import OneHotEncoder

def read_pkl(path):
    with open(path, 'rb') as f:
        item = pickle.load(f)
    return item

In [38]:
# Get encoder 
PPI_save_path =  data_dir / '9PPI Cell Culture' / 'Whole' / 'PPI'

# Read PPi info 
dfs = []
for path in os.listdir(PPI_save_path):
    if 'csv' in path:
        df = pd.read_csv(PPI_save_path / path)
        dfs.append(df)
        
df = pd.concat(dfs)
df = df[df.PPI.isin(['Cyclin D1 & CDK2', 'NF-Kb & p-P90rsk', 'Sox2 & Oct4',
       'Bim & Tom20', 'Mcl-1 & BAK', 'p-ERK & c-MYC', 'TEAD1 & YAP',
       'Cyclin E & CDK4', 'P-AKT & mTOR'])]

# Create label 
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(df['PPI'].to_numpy().reshape(-1, 1))
labels = enc.transform(df['PPI'].to_numpy().reshape(-1, 1)).toarray().astype(np.uint8)

In [39]:
mappings = {}
for ppi in df['PPI'].unique():
    mappings[ppi]=enc.transform(np.array([ppi]).reshape(-1, 1)).toarray().astype(np.uint8)[0]

In [40]:
g = df.groupby(['Condition', 'FOV', 'PPI', 'Cyto']).size()
df_cell = pd.DataFrame({'Count Cyto': g}).reset_index()
df_cell = df_cell[df_cell.Cyto != 0]
df_cell.columns = ['Condition', 'FOV', 'PPI', 'Id', 'Count_cell']

g = df.groupby(['Condition', 'FOV', 'PPI', 'Nuclei']).size()
df_nuclei = pd.DataFrame({'Count Nuclei': g}).reset_index()
df_nuclei= df_nuclei[df_nuclei.Nuclei != 0]
df_nuclei.columns = ['Condition', 'FOV', 'PPI', 'Id', 'Count_nuclei']


In [41]:
df_all = df_cell.merge(df_nuclei, how='left', on=['Condition', 'FOV', 'PPI', 'Id']).fillna(0)

In [42]:
df_all['Count_nuclei'] = df_all['Count_nuclei'].astype(int)
df_all['Count_cyto'] = df_all['Count_cell'] - df_all['Count_nuclei']

In [43]:
# Filter out by maximum number of counts per cell
min_count = 100
max_count = 400

df_all = df_all.groupby(['Condition', 'FOV', 'Id']).filter(lambda x: x['Count_cell'].sum() > min_count)
df_all = df_all.groupby(['Condition', 'FOV', 'Id']).filter(lambda x: (x['Count_cell'] < max_count).all())
df_all.groupby(['Condition', 'FOV', 'Id']).sum().describe()

Unnamed: 0,Count_cell,Count_nuclei,Count_cyto
count,1340.0,1340.0,1340.0
mean,336.203731,122.502985,213.700746
std,142.753623,50.508549,118.238184
min,102.0,0.0,-32.0
25%,225.5,90.0,125.0
50%,315.0,119.0,194.0
75%,426.0,151.0,286.25
max,1202.0,412.0,1202.0


In [44]:
df_all = df_all[df_all['Count_cyto']>=0]

In [45]:
df_cell = df_all.drop_duplicates(['Condition', 'FOV', 'Id'])[['Condition', 'FOV', 'Id']]
df_cell = df_cell.astype(str)

In [46]:
import PLA

graph_save_path =  data_dir / '9PPI Cell Culture' / 'Whole' / 'graphs' / 'raw'
contour_save_path =  data_dir / '9PPI Cell Culture' / 'Whole' / 'contours'

cell_data = PLA.cell_data(contour_save_path, graph_save_path)

Loading graphs
Loading contours


In [47]:
df_new = df_cell.merge(cell_data.df, how='left', left_on=['Condition', 'FOV', 'Id'], 
              right_on=['Condition', 'FOV', 'Cell'])

In [48]:
from itertools import combinations_with_replacement
from grandiso import find_motifs

def generate_list_motifs(PPI_pairs, n=3):
    '''
    Generate list of possible motifs of size n from PPI pairs
    '''
    
    comb = []
    for i in range(2,n+1):
        comb += list(combinations_with_replacement(PPI_pairs, i))
    return comb 

def count_motifs(network, comb, mappings=mappings):
    motifs = []
    counts = []
    for c in comb:
        motif=nx.complete_graph(len(c))
        labels = {i:mappings[marker].tolist() for i,marker in enumerate(c)}
        nx.set_node_attributes(motif, labels, "labels")

        motifs.append(motif)
        counts.append(len(find_motifs(motif, network)))
        
    return motifs, counts

In [None]:
PPI_pairs = list(mappings.keys())
comb = generate_list_motifs(PPI_pairs, n=3)
counts_all = []
ids = []
conditions = []
for i in range(len(df_new)):
    # Info
    row = df_new.iloc[i]
    key = '_'.join(row.iloc[:2])
    
    cell = row.iloc[2]
    conditions.append(row.iloc[0])
    # Get graph
    network = read_pkl(row.Path)
    
    if not nx.is_connected(network):
        print(row)
    
    # Get PPI counts
    network.remove_edges_from(nx.selfloop_edges(network))
    motifs, counts = count_motifs(network, comb)
    counts_all.append(counts)
    ids.append(key+'_'+cell)


In [None]:
df_count_motifs = pd.DataFrame(counts_all, index=ids)
df_count_motifs['Condition'] = conditions
df_count_motifs = df_count_motifs.loc[:, (df_count_motifs != 0).any(axis=0)]
df_labels = pd.DataFrame(comb, columns=['PPI1', 'PPI2', 'PPI3'])

In [54]:
df_count_motifs

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,201,202,203,204,205,206,207,208,209,Condition
HCC827Ctrl_FW1_15,406,65,83,43,0,60,7,3,3,42,...,0,0,0,1,0,0,0,2,0,HCC827Ctrl
HCC827Ctrl_FW1_16,146,55,45,12,4,45,4,0,0,54,...,0,0,0,0,0,0,0,0,0,HCC827Ctrl
HCC827Ctrl_FW1_17,204,62,52,48,0,33,2,0,0,28,...,0,0,0,0,0,0,0,0,0,HCC827Ctrl
HCC827Ctrl_FW1_18,606,144,133,112,0,15,0,6,0,48,...,0,0,0,0,0,0,0,0,0,HCC827Ctrl
HCC827Ctrl_FW1_19,300,62,67,55,0,47,0,0,0,24,...,0,0,0,0,0,0,0,0,0,HCC827Ctrl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCC827Osim_FW2_343,650,113,153,55,0,17,0,3,0,86,...,0,0,0,0,0,0,0,0,0,HCC827Osim
HCC827Osim_FW2_344,428,108,75,49,0,22,0,1,0,70,...,0,0,0,0,0,0,0,0,0,HCC827Osim
HCC827Osim_FW2_345,564,97,158,32,0,24,0,0,0,22,...,0,0,0,0,0,0,0,0,0,HCC827Osim
HCC827Osim_FW2_182,6,0,2,0,0,1,3,0,29,2,...,0,0,0,0,0,0,0,0,2622,HCC827Osim


In [None]:
save_path = data_dir / '9PPI Cell Culture' / 'Whole' / 'metadata' /  'motifs.csv'
df_count_motifs.to_csv(save_path, index=False)

save_path = data_dir / '9PPI Cell Culture' / 'Whole' / 'metadata' /  'motifs_labels.csv'
df_labels.to_csv(save_path)

In [None]:
save_path