Skip to content

Commit

Permalink
SlideSeq Panel Updates for revision
Browse files Browse the repository at this point in the history
  • Loading branch information
deto committed Dec 17, 2020
1 parent 53376b9 commit 98c6b16
Show file tree
Hide file tree
Showing 14 changed files with 1,181 additions and 56 deletions.
24 changes: 12 additions & 12 deletions SlideSeq/Figures/Supp1/IDR_Curves.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ pair_fun <- function(file_pair, stat_col, stat_colb=stat_col){
# %% Run for Hotspot pairs

files <- c(
"../Puck_180819_9/hotspot/hotspot_300.txt",
"../Puck_180819_10/hotspot/hotspot_300.txt",
"../Puck_180819_11/hotspot/hotspot_300.txt",
"../Puck_180819_12/hotspot/hotspot_300.txt"
"../Puck_180819_9/hotspot/hotspot.txt",
"../Puck_180819_10/hotspot/hotspot.txt",
"../Puck_180819_11/hotspot/hotspot.txt",
"../Puck_180819_12/hotspot/hotspot.txt"
)

stat_col <- "Z"
Expand Down Expand Up @@ -76,10 +76,10 @@ jsonlite::write_json(


combos <- list(
c("../Puck_180819_9/hotspot/hotspot_300.txt", "../Puck_180819_9/spatialDE/spatialDE_fixed.txt"),
c("../Puck_180819_10/hotspot/hotspot_300.txt", "../Puck_180819_10/spatialDE/spatialDE_fixed.txt"),
c("../Puck_180819_11/hotspot/hotspot_300.txt", "../Puck_180819_11/spatialDE/spatialDE_fixed.txt"),
c("../Puck_180819_12/hotspot/hotspot_300.txt", "../Puck_180819_12/spatialDE/spatialDE_fixed.txt")
c("../Puck_180819_9/hotspot/hotspot.txt", "../Puck_180819_9/spatialDE/spatialDE_fixed.txt"),
c("../Puck_180819_10/hotspot/hotspot.txt", "../Puck_180819_10/spatialDE/spatialDE_fixed.txt"),
c("../Puck_180819_11/hotspot/hotspot.txt", "../Puck_180819_11/spatialDE/spatialDE_fixed.txt"),
c("../Puck_180819_12/hotspot/hotspot.txt", "../Puck_180819_12/spatialDE/spatialDE_fixed.txt")
)

stat_colA <- "Z"
Expand Down Expand Up @@ -121,10 +121,10 @@ idr_group <- function(files, stat_col) {
}

files <- c(
"../Puck_180819_9/hotspot/hotspot_300.txt",
"../Puck_180819_10/hotspot/hotspot_300.txt",
"../Puck_180819_11/hotspot/hotspot_300.txt",
"../Puck_180819_12/hotspot/hotspot_300.txt"
"../Puck_180819_9/hotspot/hotspot.txt",
"../Puck_180819_10/hotspot/hotspot.txt",
"../Puck_180819_11/hotspot/hotspot.txt",
"../Puck_180819_12/hotspot/hotspot.txt"
)

stat_col <- "Z"
Expand Down
20 changes: 6 additions & 14 deletions SlideSeq/Figures/Supp1/plotMeanVar.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['svg.fonttype'] = 'none'


ds = loompy.connect("../../../data/SlideSeq/Puck_180819_12/data.loom", mode="r")

scaled = ds.layer['scaled'][:, :]
counts = ds.layer[''][:, :]
symbol = ds.row_attrs['Symbol'][:]
scaled = scaled / 45 * 10000
scaled = scaled / 45 * 10000 # Scaled is counts/45, but we want counts/10k here

ds.close()

Expand All @@ -28,17 +30,6 @@
disp_c = (var_c - mu_c) / (mu_c**2)
fano_c = var_c / mu_c


# %% Plot

plt.figure()
plt.plot(mu, fano, 'o', ms=1)
plt.xscale('log')
plt.yscale('log')
plt.xlabel(r'Mean ($\frac{Counts}{10k}$)')
plt.ylabel('Variance / Mean')
plt.show()

# %% Load hotspot results

hs_file = "../../Puck_180819_12/hotspot/hotspot.txt"
Expand All @@ -56,15 +47,16 @@
color_too_low = '#DDDDDD'
color_not_sig = '#CCCCCC'

plt.figure(figsize=(4, 4))
plt.figure(figsize=(6, 2.5))
plt.plot(mu[hs_xx], fano[hs_xx], 'o', ms=1, color=color_too_low, rasterized=True)
plt.plot(mu[~hs_xx], fano[~hs_xx], 'o', ms=1, color=color_not_sig, rasterized=True)
ll = plt.plot(mu[hs_ii], fano[hs_ii], 'o', ms=1, color=color_hs, rasterized=True)[0]
plt.xscale('log')
plt.yscale('log')
plt.xlabel(r'Mean ($\frac{Counts}{10k}$)')
plt.ylabel('Variance / Mean')
plt.legend([ll], ['HS-Selected'], markerscale=3, loc='lower right')
plt.legend([ll], ['Hotspot-Selected'], markerscale=3, loc='lower right')
plt.subplots_adjust(left=.2, bottom=.2)
# plt.show()
plt.savefig('slideseq_meanvar.svg', dpi=300)
plt.close()
25 changes: 0 additions & 25 deletions SlideSeq/Figures/Supp1/plotPR.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,31 +98,6 @@ def getMarkers(df, N):
plt.ylim(0, 1)
plt.show()

# %% Ok - this is very promising. What if we just looked at average expression tho...
pucks = [
"Puck_180819_9",
"Puck_180819_10",
"Puck_180819_11",
"Puck_180819_12",
]

puck_means = {}
for puck in pucks:
ls = loompy.connect("../../../data/SlideSeq/{}/data.loom".format(puck), mode="r")
counts = ls.layers['scaled'][:, :]
gene_info = ls.ra['EnsID', 'Symbol']
ls.close()

# Have to do this because data_slideseq makes it a numpy array
gene_info = pd.DataFrame(
gene_info, columns=['EnsID', 'Symbol']).set_index('EnsID')
counts = pd.DataFrame(counts, index=gene_info.index)

# need counts, latent, and num_umi
valid_genes = (counts > 0).sum(axis=1) > 50
gene_means = counts.loc[valid_genes].mean(axis=1)
puck_means[puck] = gene_means
del counts

# %%

Expand Down
121 changes: 121 additions & 0 deletions SlideSeq/Figures/Supp1/plotPR_k_sensitivity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, auc
from functools import reduce
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import loompy

plt.rcParams['svg.fonttype'] = 'none'

# %% First get the markers from DropViz data

marker_de = pd.read_table("../../DropViz/ranksums_markers_1vAll.txt")

groups = {
l: marker_de.loc[marker_de.level == l].set_index('gene')
for l in marker_de.level.unique()
}


def getMarkers(df, N):

markers = df.loc[df.FDR < .1].sort_values('AUC').index.tolist()
if len(markers) > N:
markers = markers[:N]

return markers


N = 100
markers = {l: getMarkers(v, N) for l, v in groups.items()}

all_markers = reduce(
lambda x, y: x | set(y), markers.values(), set()
)

all_markers_norm = set([x.upper() for x in all_markers])

print(len(all_markers))
print(len(all_markers_norm))


# %% Define some lists...

k_values = [5, 10, 30, 50, 100, 300, 500, 1000]

hs_files = {
k: "../../Puck_180819_12/k_sensitivity/k_{k}/hotspot.txt".format(k=k) for k in k_values
}


# %% Parse data for AUC and ROC curves

aucs = []

for k in hs_files.keys():
ff = hs_files[k]
res = pd.read_table(ff, index_col=0)

y_score = res.Z.values
y_true = np.array(
[x.upper() in all_markers_norm for x in res.index]
).astype('int')

auc_i = roc_auc_score(y_true, y_score)

aucs.append([k, auc_i])

aucs = pd.DataFrame(aucs, columns=['K', 'AUROC'])

# %%

plt.figure()
ax = plt.gca()
plt.plot(aucs['K'], aucs['AUROC'], 'o-')
plt.xscale('log')
plt.xticks(aucs['K'].unique())
ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
ax.xaxis.set_ticks([], minor=True)
plt.xlabel('# of Neighbors')
plt.ylabel('Area Under ROC Curve')
plt.gca().set_axisbelow(True)
plt.grid(color='#CCCCCC', ls=(0, (5, 5)), lw=.5)
plt.savefig('AU_ROC_k_sensitivity.svg', dpi=300)

# %% What about the precision recall curve?

auprs = []

for k in hs_files.keys():
ff = hs_files[k]
res = pd.read_table(ff, index_col=0)

y_score = res.Z.values
y_true = np.array(
[x.upper() in all_markers_norm for x in res.index]
).astype('int')

precision, recall, thresholds = precision_recall_curve(y_true, y_score)

aupr = auc(recall, precision)
auprs.append([k, aupr])


auprs = pd.DataFrame(auprs, columns=['K', 'AUPRC'])

# %%

plt.figure()
ax = plt.gca()
plt.plot(auprs['K'], auprs['AUPRC'], 'o-')
plt.xscale('log')
plt.xticks(auprs['K'].unique())
ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
ax.xaxis.set_ticks([], minor=True)
plt.ylabel('Area Under\nPrecision-Recall Curve')
plt.xlabel('# of Neighbors')
plt.gca().set_axisbelow(True)
plt.grid(color='#CCCCCC', ls=(0, (5, 5)), lw=.5)
plt.savefig('AU_PR_k_sensitivity.svg', dpi=300)
146 changes: 146 additions & 0 deletions SlideSeq/Figures/Supp5_HVG_vs_HS/hvg_vs_hs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import loompy
from bio_utils.plots import hover_plot
from matplotlib.colors import LinearSegmentedColormap
plt.rcParams['svg.fonttype'] = 'none'

# %% Load results

hs = pd.read_table("../../Puck_180819_12/hotspot/hotspot.txt", index_col=0)
hvg = pd.read_table("../../Puck_180819_12/genes/hvg_info.txt", index_col=0)
hvg = hvg.loc[hs.index]


# %% Plot one vs other

plot_data = hs[['Z']].join(hvg[['gene.dispersion.scaled']])

plt.figure(figsize=(5, 5))

plt.plot(
plot_data['gene.dispersion.scaled'],
plot_data['Z'],
'o', ms=2, rasterized=True
)

plt.gca().set_axisbelow(True)
plt.xlabel('Scaled Dispersion')
plt.ylabel('Autocorrelation Z')
plt.grid(color='#BBBBBB', ls=(0, (5, 5)), lw=.5)
plt.subplots_adjust(left=0.15, right=1-0.15, bottom=0.15, top=1-0.15)

plt.ylim(-10, 260) # Zoom in to see the points better
plt.xlim(-4, 8)
plt.savefig('autocorrelation_vs_hvg.svg', dpi=300)

# %%

# hover_plot(
# plot_data['gene.dispersion.scaled'],
# plot_data['Z'],
# plot_data.index,
# 'o', ms=2
# )
# plt.show()

# %% Plot rank vs rank


# %%

loom_file = "../../../data/SlideSeq/Puck_180819_12/data.loom"
with loompy.connect(loom_file, 'r') as ds:
barcodes = ds.ca['Barcode'][:]
scaled = ds.layers['scaled'][:, :]
gene_info = ds.ra['EnsID', 'Symbol']
num_umi = ds.ca['NumUmi'][:]

# Have to do this because data_slideseq makes it a numpy array
gene_info = pd.DataFrame(
gene_info, columns=['EnsID', 'Symbol']).set_index('EnsID')
scaled = pd.DataFrame(scaled, index=gene_info.index, columns=barcodes)

positions = pd.read_table(
"../../Puck_180819_12/positions/positions.txt", index_col=0
)

positions = positions.loc[scaled.columns]

# %%
gene_cmap = LinearSegmentedColormap.from_list(
"grays", ['#dddddd', '#000000']
)


def plot_gene(gene, positions, scaled, ax=None):

if ax is None:
ax = plt.gca()
else:
plt.sca(ax)

vals = np.log2(scaled.loc[gene]+1)

vmin = 0
vmax = np.percentile(vals[vals > 0], 80)

plot_data = pd.DataFrame({
'X': positions.Comp1,
'Y': positions.Comp2,
'Expression': vals,
}).sort_values('Expression')

plot_data['S'] = 0.5
plot_data.loc[plot_data.Expression > 0, 'S'] = 1

plt.scatter(
x=plot_data.X,
y=plot_data.Y,
c=plot_data.Expression,
s=plot_data.S,
vmin=vmin, vmax=vmax, cmap=gene_cmap,
alpha=0.7, rasterized=True, edgecolors='none',
)
for sp in ax.spines.values():
sp.set_visible(False)

plt.xticks([])
plt.yticks([])
plt.title(gene)


# %%

# top_genes_hs = hs.Z.sort_values(ascending=False).index[0:6]

# top_genes_hvg = hvg['gene.dispersion.scaled'].sort_values(ascending=False).index[0:6]
# top_genes_hvg = hvg.loc[hvg['gene.mean'] > 1] \
# .sort_values('gene.dispersion.scaled', ascending=False).index[0:6]


top_genes_hs = ['Plp1', 'Enpp2', 'Cartpt', 'Pcp2', 'Car8', 'Calb1']
top_genes_hvg = ['Ube2q1', 'Rexo4', 'Letm1', 'Cnksr2', 'Xrn2', 'Prmt5']


fig, axs = plt.subplots(2, 3, figsize=(6.5, 4))


for gene, ax in zip(top_genes_hs, axs.ravel()):

plot_gene(gene, positions, scaled, ax=ax)

# plt.show()
plt.savefig('top_genes_hs.svg', dpi=600)

# %%

fig, axs = plt.subplots(2, 3, figsize=(6.5, 4))


for gene, ax in zip(top_genes_hvg, axs.ravel()):

plot_gene(gene, positions, scaled, ax=ax)

plt.savefig('top_genes_hvg.svg', dpi=600)
Loading

0 comments on commit 98c6b16

Please sign in to comment.