-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
CD4/Monocyte Analysis Updates for Revision
- Loading branch information
Showing
44 changed files
with
3,855 additions
and
343 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
rule runHotspot_k_sensitivity: | ||
input: | ||
loom=data(data_file), | ||
latent=rules.SCVI_hvg.output.latent, | ||
params: | ||
model='danb', | ||
n_neighbors='{k}', | ||
n_cells_min=10, | ||
output: | ||
results="k_sensitivity/k_{k}/hotspot.txt" | ||
script: "../../pipelineScripts/hotspot/runHotspot.py" | ||
|
||
k_values = [5, 10, 30, 50, 100, 300, 500, 1000] | ||
|
||
rule runHotspot_k_sensitivity_all: | ||
input: | ||
expand(rules.runHotspot_k_sensitivity.output.results, k=k_values) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
subworkflow data: | ||
workdir: | ||
"../../data/10x_PBMC_w_proteins/" | ||
|
||
data_file = "cd4/downsampled_{rate}/data.loom" | ||
data_file_test = "cd4/data_test.loom" | ||
|
||
include: "../Snakefile_downsampling" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../pipelineScripts/ |
185 changes: 185 additions & 0 deletions
185
Transcriptomics/Figures/CD4_Correlation/ModuleConsistency.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
import os | ||
import pandas as pd | ||
from tqdm import tqdm | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
|
||
plt.rcParams['svg.fonttype'] = 'none' | ||
|
||
# Pick which set of results to plot - CD4 or Monocytes | ||
# base_dir = '../../CD4_w_protein' | ||
base_dir = '../../Mono_w_protein' | ||
|
||
datasets = [ | ||
{ | ||
'Name': 'Hotspot', | ||
'Train': os.path.join(base_dir, 'train/hotspot_hs/modules.txt'), | ||
'Test': os.path.join(base_dir, 'test/hotspot_hs/modules.txt'), | ||
}, | ||
{ | ||
'Name': 'WGCNA', | ||
'Train': os.path.join(base_dir, 'train/wgcna_hs/modules.txt'), | ||
'Test': os.path.join(base_dir, 'test/wgcna_hs/modules.txt'), | ||
}, | ||
{ | ||
'Name': 'ICA5', | ||
'Train': os.path.join(base_dir, 'train/ica5/modules.txt'), | ||
'Test': os.path.join(base_dir, 'test/ica5/modules.txt'), | ||
}, | ||
# # Don't need so many ICA versions as this one isn't that different from ICA10 | ||
# { | ||
# 'Name': 'ICA8', | ||
# 'Train': os.path.join(base_dir, 'train/ica8/modules.txt'), | ||
# 'Test': os.path.join(base_dir, 'test/ica8/modules.txt'), | ||
# }, | ||
{ | ||
'Name': 'ICA10', | ||
'Train': os.path.join(base_dir, 'train/ica10/modules.txt'), | ||
'Test': os.path.join(base_dir, 'test/ica10/modules.txt'), | ||
}, | ||
{ | ||
'Name': 'Grnboost', | ||
'Train': os.path.join(base_dir, 'train/grnboost/modules.txt'), | ||
'Test': os.path.join(base_dir, 'test/grnboost/modules.txt'), | ||
}, | ||
] | ||
|
||
for data in datasets: | ||
train = pd.read_table(data['Train'], index_col=0) | ||
test = pd.read_table(data['Test'], index_col=0) | ||
|
||
train = train.Cluster.to_dict() | ||
test = test.Cluster.to_dict() | ||
data['TrainDict'] = train | ||
data['TestDict'] = test | ||
|
||
|
||
def eval_module_consistency(data): | ||
consistency = ( | ||
eval_module_consistency_inner(data['TrainDict'], data['TestDict']) + | ||
eval_module_consistency_inner(data['TestDict'], data['TrainDict']) | ||
) / 2 | ||
|
||
data['Consistency'] = consistency | ||
|
||
|
||
def eval_module_consistency_inner(dict_a, dict_b): | ||
|
||
all_genes = set(dict_a.keys()) & set(dict_b.keys()) | ||
|
||
# For each pairs of genes that are in the same module in 'A', how many are in the same module in 'B'? | ||
|
||
denom = 0 | ||
num = 0 | ||
for ga in all_genes: | ||
for gb in all_genes: | ||
|
||
if ga == gb: continue | ||
|
||
if dict_a[ga] == dict_a[gb] and dict_a[ga] != -1 and dict_a[gb] != -1: # Same module in A | ||
denom += 1 | ||
|
||
if dict_b[ga] == dict_b[gb] and dict_b[ga] != -1 and dict_b[gb] != -1: # Same module in B | ||
num += 1 | ||
|
||
num = num/2 | ||
denom = denom/2 | ||
|
||
consistent_rate = num/denom | ||
|
||
return consistent_rate | ||
|
||
|
||
def eval_num_modules(data): | ||
num_modules = ( | ||
pd.Series(data['TrainDict']).unique().size - 1 + | ||
pd.Series(data['TestDict']).unique().size - 1 | ||
) / 2 | ||
|
||
data['NumModules'] = num_modules | ||
|
||
|
||
def eval_num_assigned(data): | ||
assigned = ( | ||
(pd.Series(data['TrainDict']) != -1).sum()/2 + | ||
(pd.Series(data['TestDict']) != -1).sum()/2 | ||
) | ||
|
||
data['NumAssigned'] = assigned | ||
|
||
|
||
for data in tqdm(datasets): | ||
train = pd.read_table(data['Train'], index_col=0) | ||
test = pd.read_table(data['Test'], index_col=0) | ||
|
||
train = train.Cluster.to_dict() | ||
test = test.Cluster.to_dict() | ||
data['TrainDict'] = train | ||
data['TestDict'] = test | ||
|
||
eval_module_consistency(data) | ||
eval_num_modules(data) | ||
eval_num_assigned(data) | ||
|
||
|
||
# %% Consolidate into a nice dataframe | ||
columns = [ | ||
'Name', | ||
'Consistency', | ||
'NumModules', | ||
'NumAssigned' | ||
] | ||
|
||
results = [] | ||
for data in datasets: | ||
results.append( | ||
[data[x] for x in columns] | ||
) | ||
|
||
results = pd.DataFrame(results, columns=columns) | ||
|
||
|
||
# %% Plot it | ||
order = ['ICA5', 'ICA10', 'Grnboost', 'WGCNA', 'Hotspot'] | ||
colors = sns.color_palette('deep')[:len(order)] | ||
plot_data = results.set_index('Name').loc[order] | ||
|
||
fig, axs = plt.subplots(1, 3, figsize=(12, 4)) | ||
|
||
plt.sca(axs[0]) | ||
|
||
plt.bar( | ||
plot_data.index, plot_data.Consistency, alpha=0.9, color=colors | ||
) | ||
plt.xticks(rotation=45) | ||
plt.ylabel('Proportion of Gene Pairs Which\nReplicate Across Data Split') | ||
plt.title('Reproducibility') | ||
plt.gca().set_axisbelow(True) | ||
plt.grid(color='#CCCCCC', lw=0.5, axis='y', ls=(0, (5, 5))) | ||
|
||
plt.sca(axs[1]) | ||
|
||
plt.bar( | ||
plot_data.index, plot_data.NumModules, alpha=0.9, color=colors | ||
) | ||
plt.xticks(rotation=45) | ||
plt.ylabel('Modules') | ||
plt.title('# Modules') | ||
plt.gca().set_axisbelow(True) | ||
plt.grid(color='#CCCCCC', lw=0.5, axis='y', ls=(0, (5, 5))) | ||
|
||
plt.sca(axs[2]) | ||
|
||
plt.bar( | ||
plot_data.index, plot_data.NumAssigned, alpha=0.9, color=colors | ||
) | ||
plt.xticks(rotation=45) | ||
plt.ylabel('Genes') | ||
plt.title('# Genes Assigned') | ||
plt.gca().set_axisbelow(True) | ||
plt.grid(color='#CCCCCC', lw=0.5, axis='y', ls=(0, (5, 5))) | ||
|
||
plt.subplots_adjust(bottom=.25, wspace=0.4, left=0.1, right=0.9) | ||
# plt.savefig('CD4_Module_TrainTest.svg') | ||
plt.savefig('Monocyte_Module_TrainTest.svg') | ||
# plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.