Skip to content

Commit

Permalink
Merge pull request #8 from aron0093/development
Browse files Browse the repository at this point in the history
Release v0.2.0
  • Loading branch information
aron0093 committed Jul 18, 2023
2 parents 5bc826f + 2178db2 commit c6e8e31
Show file tree
Hide file tree
Showing 10 changed files with 350 additions and 487 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ cytopath depends on *scvelo* to process the data and may require additional depe
* python>=3.7
* numpy
* scipy
* pandas==1.4.4
* pandas==1.4.1
* anndata
* scvelo>=0.1.25
* joblib
* fastdtw
* hausdorff
* scikit-learn==0.22.2
* networkit
* numba
* tqdm
Expand Down
18 changes: 11 additions & 7 deletions cytopath/markov_chain_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ def iterate_state_probability(adata, matrix_key='T_forward', init=None, stationa
return state_history, state_history_max_iter, convergence_check

def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'louvain', max_steps=10, min_sim_ratio=0.6, rounds_limit=10, traj_number=50, sim_number=500,
end_point_probability=0.99, root_cell_probability=0.99, end_points=None, root_cells=None, end_clusters=None, root_clusters=None, min_clusters=3, tol=1e-3,
normalize=False, unique=True, num_cores=1, copy=False):
end_point_probability=0.99, root_cell_probability=0.99, end_points=None, root_cells=None, end_clusters=None, root_clusters=None, min_clusters=2,
max_iter=200, tol=1e-3, normalize=False, unique=True, num_cores=1, copy=False):

"""Markov sampling of cell sequences starting from defined root cells to defined terminal regions based on a cell-cell transition probability matrix.
Arguments
Expand Down Expand Up @@ -123,8 +123,10 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
Cluster IDs to be considered end points. Precendence over end_point_probability.
root_clusters: `list` (default: None)
Cluster IDs to be considered root states. Precendence over root_state_probability.
min_clusters: `integer` (default:3)
min_clusters: `integer` (default:2)
Minium number of clusters covered by each simulation. (cluster_key)
max_iter: `integer` (default: 200)
Maximum simulation iterations to test to auto-select max_steps.
tol: `float` (default:1e-3)
Convergence criteria for Markov sampling.
normalize: 'Boolean' (default: False)
Expand Down Expand Up @@ -235,7 +237,7 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
end_points = np.asarray(np.where(np.asarray(adata.obs["end_points"]) >= end_point_probability))[0]
#TODO
'''
# Recluster end points if based on probability
# Recluster end points if based on probability
end_points_adata = adata[end_points]
scvelo.tl.louvain(end_points_adata)
adata.obs['updated_'+cluster_key] = adata.obs[cluster_key].astype(str).values
Expand All @@ -255,6 +257,9 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
adata.uns['run_info']['end_points'] = end_points
end_point_clusters = adata.obs[cluster_key][end_points].astype(str).values
end_clusters_ = np.unique(end_point_clusters)

if len(end_clusters_)==0:
raise ValueError('Terminal cluster set is empty.')

# Adjust simulation parameters based on data
if auto_adjust:
Expand All @@ -270,7 +275,7 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
max_steps = iterate_state_probability(adata, matrix_key=matrix_key,
init = (adata.obs['root_cells']/adata.obs['root_cells'].sum()).values,
stationary=(adata.obs['end_points']/adata.obs['end_points'].sum()).values,
max_iter=200, tol=tol)[-1]
max_iter=max_iter, tol=tol)[-1]
print('Number of initial simulation steps (max_steps) set to {}'.format(max_steps))

# Initialize all empty lists
Expand All @@ -284,7 +289,6 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
ratio_obtained = 0

count = 0 # Iterate sampling runs
old_step_increment=0
# Resample samples until the number of trajectories for each end point has been reached.
while (min(traj_num) < traj_number): # or (ratio_obtained < 0.1)

Expand Down Expand Up @@ -354,7 +358,7 @@ def sampling(data, auto_adjust=True, matrix_key = 'T_forward', cluster_key = 'lo
ratio_min = np.round(min(traj_num)/traj_number, 4)

if count==rounds_limit+1:
if (min(traj_num) < min_sim_ratio*traj_number) or (ratio_obtained < 0.1):
if (min(traj_num) < min_sim_ratio*traj_number): #or (ratio_obtained < 0.1):

print('{} % of required simulations obtained for lagging end point {}.'.format(np.round(ratio_min*100, 2),
end_clusters_[np.argmin(traj_num)]))
Expand Down
6 changes: 4 additions & 2 deletions cytopath/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from .plotting_functions.plot_sampling import pl_cytopath_sampling
from .plotting_functions.plot_radial_heatmap import radial_heatmap

def plot_trajectories(data, directory='', basis='', size=10, figsize=(12,3), save_type='svg', smoothing=False):
pl_cytopath_alignment(data, folder=directory, basis=basis, size=size, figsize=figsize, smoothing=smoothing, save_type=save_type)
def plot_trajectories(data, basis='', smoothing=False, figsize=(15,4), size=3,
show=True, save=False, save_type='png', directory=''):
pl_cytopath_alignment(data, basis=basis, smoothing=smoothing, figsize=figsize, size=size,
show=show, save=save, save_type=save_type, folder=directory,)

def plot_sampling(data, directory='', basis=''):
pl_cytopath_sampling(data, folder=directory, basis=basis)
106 changes: 53 additions & 53 deletions cytopath/plotting_functions/plot_alignment.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,32 @@
import numpy as np

from scipy import spatial

import matplotlib.pyplot as plt

def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_type='svg', size = 10, smoothing=False):
def fft_smoothing(coords):

#TODO: More relevant procedure required
signal = coords[:,0] + 1j*coords[:,1]

# FFT and frequencies
fft = np.fft.fft(signal)
freq = np.fft.fftfreq(signal.shape[-1])

# filter
cutoff = 0.1
fft[np.abs(freq) > cutoff] = 0

# IFFT
signal_filt = np.fft.ifft(fft)

coords[:,0] = signal_filt.real
coords[:,1] = signal_filt.imag

return coords

def pl_cytopath_alignment(adata, basis="umap", smoothing=False, figsize=(15,4), size = 3,
show=True, save=False,save_type='png', folder=""):

map_state = adata.obsm['X_'+basis]
av_allign_score_glob=[]
Expand All @@ -12,9 +36,11 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t
fate_prob = adata.uns['trajectories']['cell_fate_probability']

sequence=0


# TODO: Separate per step average alignment score calculation from plotting
for end_point_cluster in adata.uns['run_info']["end_point_clusters"]:
trajectories = adata.uns['trajectories']["cells_along_trajectories_each_step"][np.where(adata.uns['trajectories']["cells_along_trajectories_each_step"]["End point"]==end_point_cluster)[0]]
trajectories = adata.uns['trajectories']["cells_along_trajectories_each_step"]\
[np.where(adata.uns['trajectories']["cells_along_trajectories_each_step"]["End point"]==end_point_cluster)[0]]
for i in range(adata.uns['run_info']['trajectory_count'][end_point_cluster]):

av_trajectories=trajectories[np.where(trajectories["Trajectory"]==i)[0]]
Expand All @@ -24,8 +50,6 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t
av_allign_score[l]=np.average((av_trajectories[np.where(av_trajectories["Step"]==l)[0]]["Allignment Score"]))
std_allign_score[l]=np.std((av_trajectories[np.where(av_trajectories["Step"]==l)[0]]["Allignment Score"]))

# TODO: Separate per step average alignment score calculation from plotting

# Plotting
path = folder+"_end_point_"+end_point_cluster+"_cytopath_"+str(i)+\
"occurance"+str(adata.uns['run_info']["trajectories_sample_counts"][end_point_cluster][i])+"."+save_type
Expand All @@ -38,49 +62,33 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t
ax1.set_xlabel('Steps')

# Plot step size for aligned cells
sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="grey")
sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size, c=step_time[sequence,:], cmap='YlGnBu')
sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="whitesmoke")
sc_step = ax2.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size,
vmin=0, vmax=np.nanmax(step_time), c=step_time[sequence,:], cmap='YlGnBu')
fig.colorbar(sc_step, ax=ax2, label='Step time')

ax2.set_ylabel(basis.upper()+' 2')
ax2.set_xlabel(basis.upper()+' 1')

ax2.set_title('End point: {}-{} Support: {}/{}'.format(end_point_cluster, i,
adata.uns['run_info']['trajectories_sample_counts'][end_point_cluster][i],
int(adata.uns['samples']['cell_sequences'].shape[0]/\
adata.uns['run_info']['end_point_clusters'].shape[0])))

# Plot alignment score
sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="grey")
sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size, c=fate_prob[sequence,:], cmap='YlGnBu')
sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.6, s=size, color="whitesmoke")
sc_score = ax3.scatter(map_state[:,0], map_state[:,1], alpha=0.9, s=size,
vmin=0, vmax=1, c=fate_prob[sequence,:], cmap='Reds')
fig.colorbar(sc_score, ax=ax3, label='Cell fate probability')

ax3.set_ylabel(basis.upper()+' 2')
ax3.set_xlabel(basis.upper()+' 1')

# Plot trajectory
if basis in adata.uns['run_info']['trajectory_basis']:

coords = np.array(adata.uns['trajectories']['trajectories_coordinates'][end_point_cluster]['trajectory_'+str(i)+'_coordinates'])

if smoothing == True:
#TODO: More relevant procedure required
signal = coords[:,0] + 1j*coords[:,1]

# FFT and frequencies
fft = np.fft.fft(signal)
freq = np.fft.fftfreq(signal.shape[-1])

# filter
cutoff = 0.1
fft[np.abs(freq) > cutoff] = 0

# IFFT
signal_filt = np.fft.ifft(fft)

coords[:,0] = signal_filt.real
coords[:,1] = signal_filt.imag

ax2.plot(coords[:, 0], coords[:, 1], color='black')
ax3.plot(coords[:, 0], coords[:, 1], color='black')

elif ('pca' in adata.uns['run_info']['trajectory_basis']) and (basis != 'pca'):

coords_ = np.array(adata.uns['trajectories']['trajectories_coordinates'][end_point_cluster]['trajectory_'+str(i)+'_coordinates'])

cell_sequences=[]
Expand All @@ -89,31 +97,23 @@ def pl_cytopath_alignment(adata, basis="umap", folder="", figsize=(12,3), save_t

coords = map_state[cell_sequences]

if smoothing == True:
#TODO: More relevant procedure required
signal = coords[:,0] + 1j*coords[:,1]

# FFT and frequencies
fft = np.fft.fft(signal)
freq = np.fft.fftfreq(signal.shape[-1])

# filter
cutoff = 0.1
fft[np.abs(freq) > cutoff] = 0

# IFFT
signal_filt = np.fft.ifft(fft)

coords[:,0] = signal_filt.real
coords[:,1] = signal_filt.imag
if smoothing == True:
coords = fft_smoothing(coords)

ax2.plot(coords[:, 0], coords[:, 1], color='black')
ax3.plot(coords[:, 0], coords[:, 1], color='black')

ax2.plot(coords[:, 0], coords[:, 1], color='black')
ax3.plot(coords[:, 0], coords[:, 1], color='black')
plt.tight_layout()

fig.savefig(path, bbox_inches='tight', dpi=300)
if save:
fig.savefig(path, bbox_inches='tight', dpi=300)
if show:
plt.show()
# End plotting

sequence+=1

av_allign_score_glob.append(av_allign_score)
std_allign_score_glob.append(std_allign_score)
std_allign_score_glob.append(std_allign_score)


Loading

0 comments on commit c6e8e31

Please sign in to comment.