Merge pull request #8 from aron0093/development

Release v0.2.0
aron0093 · Jul 18, 2023 · c6e8e31 · c6e8e31
2 parents 5bc826f + 2178db2
commit c6e8e31
Show file tree

Hide file tree

Showing 10 changed files with 350 additions and 487 deletions.
diff --git a/README.md b/README.md
@@ -17,12 +17,13 @@ cytopath depends on *scvelo* to process the data and may require additional depe
 * python>=3.7
 * numpy
 * scipy
-* pandas==1.4.4
+* pandas==1.4.1
 * anndata
 * scvelo>=0.1.25
 * joblib
 * fastdtw
 * hausdorff
+* scikit-learn==0.22.2
 * networkit
 * numba
 * tqdm

diff --git a/cytopath/markov_chain_sampler.py b/cytopath/markov_chain_sampler.py
@@ -87,8 +87,8 @@ def iterate_state_probability(adata, matrix_key='T_forward', init=None, stationa
     return state_history, state_history_max_iter, convergence_check
 
 def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'louvain', max_steps=10, min_sim_ratio=0.6, rounds_limit=10, traj_number=50, sim_number=500,
-                end_point_probability=0.99, root_cell_probability=0.99, end_points=None, root_cells=None, end_clusters=None, root_clusters=None, min_clusters=3, tol=1e-3,
-                normalize=False, unique=True, num_cores=1, copy=False):
+             end_point_probability=0.99, root_cell_probability=0.99, end_points=None, root_cells=None, end_clusters=None, root_clusters=None, min_clusters=2, 
+             max_iter=200, tol=1e-3, normalize=False, unique=True, num_cores=1, copy=False):
 
     """Markov sampling of cell sequences starting from defined root cells to defined terminal regions based on a cell-cell transition probability matrix.
     Arguments
@@ -123,8 +123,10 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
         Cluster IDs to be considered end points. Precendence over end_point_probability.
     root_clusters: `list` (default: None)
         Cluster IDs to be considered root states. Precendence over root_state_probability.
-    min_clusters: `integer` (default:3)
+    min_clusters: `integer` (default:2)
         Minium number of clusters covered by each simulation. (cluster_key)
+    max_iter: `integer` (default: 200)
+        Maximum simulation iterations to test to auto-select max_steps.
     tol: `float` (default:1e-3)
         Convergence criteria for Markov sampling.
     normalize: 'Boolean' (default: False)
@@ -235,7 +237,7 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
                 end_points = np.asarray(np.where(np.asarray(adata.obs["end_points"]) >= end_point_probability))[0]
                 #TODO
                 '''
-                 # Recluster end points if based on probability
+                # Recluster end points if based on probability
                 end_points_adata = adata[end_points]
                 scvelo.tl.louvain(end_points_adata)
                 adata.obs['updated_'+cluster_key] = adata.obs[cluster_key].astype(str).values
@@ -255,6 +257,9 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
     adata.uns['run_info']['end_points'] = end_points
     end_point_clusters = adata.obs[cluster_key][end_points].astype(str).values
     end_clusters_ = np.unique(end_point_clusters)
+
+    if len(end_clusters_)==0:
+        raise ValueError('Terminal cluster set is empty.')
 
     # Adjust simulation parameters based on data
     if auto_adjust:
@@ -270,7 +275,7 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
         max_steps = iterate_state_probability(adata, matrix_key=matrix_key, 
                                               init = (adata.obs['root_cells']/adata.obs['root_cells'].sum()).values, 
                                               stationary=(adata.obs['end_points']/adata.obs['end_points'].sum()).values, 
-                                              max_iter=200, tol=tol)[-1]
+                                              max_iter=max_iter, tol=tol)[-1]
         print('Number of initial simulation steps (max_steps) set to {}'.format(max_steps)) 
 
     # Initialize all empty lists
@@ -284,7 +289,6 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
     ratio_obtained = 0
 
     count = 0 # Iterate sampling runs
-    old_step_increment=0
     # Resample samples until the number of trajectories for each end point has been reached.
     while (min(traj_num) < traj_number): # or (ratio_obtained < 0.1)
 
@@ -354,7 +358,7 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
         ratio_min = np.round(min(traj_num)/traj_number, 4)
 
         if count==rounds_limit+1:
-            if (min(traj_num) < min_sim_ratio*traj_number) or (ratio_obtained < 0.1):
+            if (min(traj_num) < min_sim_ratio*traj_number): #or (ratio_obtained < 0.1):
 
                 print('{} % of required simulations obtained for lagging end point {}.'.format(np.round(ratio_min*100, 2),
                                                                                                             end_clusters_[np.argmin(traj_num)]))

diff --git a/cytopath/plotting.py b/cytopath/plotting.py
@@ -2,8 +2,10 @@
 from .plotting_functions.plot_sampling import pl_cytopath_sampling
 from .plotting_functions.plot_radial_heatmap import radial_heatmap
 
-def plot_trajectories(data, directory='', basis='', size=10, figsize=(12,3), save_type='svg', smoothing=False):
-    pl_cytopath_alignment(data, folder=directory, basis=basis, size=size, figsize=figsize, smoothing=smoothing, save_type=save_type)
+def plot_trajectories(data, basis='', smoothing=False, figsize=(15,4), size=3, 
+                      show=True, save=False, save_type='png', directory=''):
+    pl_cytopath_alignment(data, basis=basis, smoothing=smoothing, figsize=figsize, size=size, 
+                          show=show, save=save, save_type=save_type, folder=directory,)
 
 def plot_sampling(data, directory='', basis=''):
     pl_cytopath_sampling(data, folder=directory, basis=basis)
diff --git a/cytopath/plotting_functions/plot_alignment.py b/cytopath/plotting_functions/plot_alignment.py
@@ -1,8 +1,32 @@
 import numpy as np
+
 from scipy import spatial
+
 import matplotlib.pyplot as plt
 
-def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_type='svg', size = 10, smoothing=False):
+def fft_smoothing(coords):
+
+    #TODO: More relevant procedure required
+    signal = coords[:,0] + 1j*coords[:,1]
+
+    # FFT and frequencies
+    fft = np.fft.fft(signal)
+    freq = np.fft.fftfreq(signal.shape[-1])
+
+    # filter
+    cutoff = 0.1
+    fft[np.abs(freq) > cutoff] = 0
+
+    # IFFT
+    signal_filt = np.fft.ifft(fft)
+
+    coords[:,0] = signal_filt.real
+    coords[:,1] = signal_filt.imag
+
+    return coords
+
+def pl_cytopath_alignment(adata, basis="umap", smoothing=False, figsize=(15,4), size = 3, 
+                          show=True, save=False,save_type='png', folder=""):
 
     map_state = adata.obsm['X_'+basis]
     av_allign_score_glob=[]
@@ -12,9 +36,11 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t
     fate_prob = adata.uns['trajectories']['cell_fate_probability']
 
     sequence=0
-
+
+    # TODO: Separate per step average alignment score calculation from plotting
     for end_point_cluster in adata.uns['run_info']["end_point_clusters"]:
-        trajectories = adata.uns['trajectories']["cells_along_trajectories_each_step"][np.where(adata.uns['trajectories']["cells_along_trajectories_each_step"]["End point"]==end_point_cluster)[0]]
+        trajectories = adata.uns['trajectories']["cells_along_trajectories_each_step"]\
+                       [np.where(adata.uns['trajectories']["cells_along_trajectories_each_step"]["End point"]==end_point_cluster)[0]]
         for i in range(adata.uns['run_info']['trajectory_count'][end_point_cluster]):
 
             av_trajectories=trajectories[np.where(trajectories["Trajectory"]==i)[0]]
@@ -24,8 +50,6 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t
                 av_allign_score[l]=np.average((av_trajectories[np.where(av_trajectories["Step"]==l)[0]]["Allignment Score"]))
                 std_allign_score[l]=np.std((av_trajectories[np.where(av_trajectories["Step"]==l)[0]]["Allignment Score"]))
 
-            # TODO: Separate per step average alignment score calculation from plotting
-
             # Plotting
             path = folder+"_end_point_"+end_point_cluster+"_cytopath_"+str(i)+\
                  "occurance"+str(adata.uns['run_info']["trajectories_sample_counts"][end_point_cluster][i])+"."+save_type
@@ -38,49 +62,33 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t
             ax1.set_xlabel('Steps')
 
             # Plot step size for aligned cells
-            sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="grey")
-            sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size, c=step_time[sequence,:], cmap='YlGnBu')
+            sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="whitesmoke")
+            sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size, 
+                                  vmin=0, vmax=np.nanmax(step_time), c=step_time[sequence,:], cmap='YlGnBu')
             fig.colorbar(sc_step, ax=ax2, label='Step time')
 
             ax2.set_ylabel(basis.upper()+' 2')
             ax2.set_xlabel(basis.upper()+' 1')
 
+            ax2.set_title('End point: {}-{} Support: {}/{}'.format(end_point_cluster, i, 
+                                                                adata.uns['run_info']['trajectories_sample_counts'][end_point_cluster][i],
+                                                                int(adata.uns['samples']['cell_sequences'].shape[0]/\
+                                                                adata.uns['run_info']['end_point_clusters'].shape[0])))
+
             # Plot alignment score
-            sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="grey")
-            sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size, c=fate_prob[sequence,:], cmap='YlGnBu')
+            sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="whitesmoke")
+            sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size, 
+                                   vmin=0, vmax=1, c=fate_prob[sequence,:], cmap='Reds')
             fig.colorbar(sc_score, ax=ax3, label='Cell fate probability')
 
             ax3.set_ylabel(basis.upper()+' 2')
             ax3.set_xlabel(basis.upper()+' 1')
 
             # Plot trajectory
             if basis in adata.uns['run_info']['trajectory_basis']:
-
                 coords = np.array(adata.uns['trajectories']['trajectories_coordinates'][end_point_cluster]['trajectory_'+str(i)+'_coordinates'])
 
-                if smoothing == True:
-                    #TODO: More relevant procedure required
-                    signal = coords[:,0] + 1j*coords[:,1]
-
-                    # FFT and frequencies
-                    fft = np.fft.fft(signal)
-                    freq = np.fft.fftfreq(signal.shape[-1])
-
-                    # filter
-                    cutoff = 0.1
-                    fft[np.abs(freq) > cutoff] = 0
-
-                    # IFFT
-                    signal_filt = np.fft.ifft(fft)
-
-                    coords[:,0] = signal_filt.real
-                    coords[:,1] = signal_filt.imag
-
-                ax2.plot(coords[:, 0], coords[:, 1], color='black')
-                ax3.plot(coords[:, 0], coords[:, 1], color='black')
-
             elif ('pca' in adata.uns['run_info']['trajectory_basis']) and (basis != 'pca'):
-
                 coords_ = np.array(adata.uns['trajectories']['trajectories_coordinates'][end_point_cluster]['trajectory_'+str(i)+'_coordinates'])
 
                 cell_sequences=[]
@@ -89,31 +97,23 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t
 
                 coords = map_state[cell_sequences]
 
-                if smoothing == True:
-                    #TODO: More relevant procedure required
-                    signal = coords[:,0] + 1j*coords[:,1]
-
-                    # FFT and frequencies
-                    fft = np.fft.fft(signal)
-                    freq = np.fft.fftfreq(signal.shape[-1])
-
-                    # filter
-                    cutoff = 0.1
-                    fft[np.abs(freq) > cutoff] = 0
-
-                    # IFFT
-                    signal_filt = np.fft.ifft(fft)
-
-                    coords[:,0] = signal_filt.real
-                    coords[:,1] = signal_filt.imag
+            if smoothing == True:
+                coords = fft_smoothing(coords)
+
+            ax2.plot(coords[:, 0], coords[:, 1], color='black')
+            ax3.plot(coords[:, 0], coords[:, 1], color='black')
 
-                ax2.plot(coords[:, 0], coords[:, 1], color='black')
-                ax3.plot(coords[:, 0], coords[:, 1], color='black')
+            plt.tight_layout()
 
-            fig.savefig(path, bbox_inches='tight', dpi=300)
+            if save:
+                fig.savefig(path, bbox_inches='tight', dpi=300)
+            if show:
+                plt.show()
             # End plotting
 
             sequence+=1
 
             av_allign_score_glob.append(av_allign_score)                                
-            std_allign_score_glob.append(std_allign_score)
+            std_allign_score_glob.append(std_allign_score)
+
+