In [None]:
import matplotlib as mpl
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline

In [None]:
sns.set_style("ticks")

# Disable top and right spines.
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False

# Display and save figures at higher resolution for presentations and manuscripts.
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['figure.dpi'] = 120

# Display text at sizes large enough for presentations and manuscripts.
mpl.rcParams['font.weight'] = "normal"
mpl.rcParams['axes.labelweight'] = "normal"
mpl.rcParams['font.size'] = 14
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['legend.fontsize'] = 10
mpl.rcParams['xtick.labelsize'] = 14
mpl.rcParams['ytick.labelsize'] = 14
mpl.rcParams['axes.titlesize'] = 14
mpl.rc('text', usetex=False)

## Define inputs, outputs, and parameters

In [None]:
scatterplot_metadata = snakemake.input.scatterplot_metadata
scatterplot_pca = snakemake.input.scatterplot_pca
scatterplot_mds = snakemake.input.scatterplot_mds
scatterplot_tsne = snakemake.input.scatterplot_tsne
scatterplot_umap = snakemake.input.scatterplot_umap

In [None]:
scatterplot_chart = snakemake.output.scatterplot

## Load data

In [None]:
metadata = pd.read_csv(scatterplot_metadata)

In [None]:
metadata

In [None]:
pca = pd.read_csv(scatterplot_pca)

In [None]:
pca.head()

In [None]:
pca.shape

In [None]:
mds = pd.read_csv(scatterplot_mds)

In [None]:
mds.head()

In [None]:
tsne = pd.read_csv(scatterplot_tsne)

In [None]:
tsne.head()

In [None]:
umap = pd.read_csv(scatterplot_umap)

In [None]:
umap.head()

## Plot scatterplots per embedding

In [None]:
fig = plt.figure(figsize=(8, 8), constrained_layout=False)
fig.tight_layout(pad=0.5)
gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.4, wspace=0.4)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, 0])
ax4 = fig.add_subplot(gs[1, 1])

# PCA
metadata_pca = metadata.query("embedding == 'pca'")
mean_pca = metadata_pca["mean"].values[0]
std_pca = metadata_pca["std"].values[0]

ax1.plot(pca["genetic"], pca["euclidean"], "o", alpha=0.25)
ax1.plot(pca["LOWESS_x"], pca["LOWESS_y"], label="LOESS")

ax1.set_xlabel("Genetic distance")
ax1.set_ylabel("Euclidean distance")
ax1.set_xticks(np.arange(min(pca["genetic"]), max(pca["genetic"])+10, 40.0))
ax1.set_title(f"PCA ($R^2={mean_pca:.3f} +/- {std_pca:.3f}$)")

# MDS
metadata_mds = metadata.query("embedding == 'mds'")
mean_mds = metadata_mds["mean"].values[0]
std_mds = metadata_mds["std"].values[0]

ax2.plot(mds["genetic"], mds["euclidean"], "o", alpha=0.25)
ax2.plot(mds["LOWESS_x"], mds["LOWESS_y"], label="LOESS")

ax2.set_xlabel("Genetic distance")
ax2.set_ylabel("Euclidean distance")
ax2.set_xticks(np.arange(min(mds["genetic"]), max(mds["genetic"])+10, 40.0))
ax2.set_title(f"MDS ($R^2={mean_mds:.3f} +/- {std_mds:.3f}$)")

# t-SNE
metadata_tsne = metadata.query("embedding == 't-sne'")
mean_tsne = metadata_tsne["mean"].values[0]
std_tsne = metadata_tsne["std"].values[0]

ax3.plot(tsne["genetic"], tsne["euclidean"], "o", alpha=0.25)
ax3.plot(tsne["LOWESS_x"], tsne["LOWESS_y"], label="LOESS")

ax3.set_xlabel("Genetic distance")
ax3.set_ylabel("Euclidean distance")
ax3.set_xticks(np.arange(min(tsne["genetic"]), max(tsne["genetic"])+10, 40.0))
ax3.set_title(f"t-SNE ($R^2={mean_tsne:.3f} +/- {std_tsne:.3f}$)")

# UMAP
metadata_umap = metadata.query("embedding == 'umap'")
mean_umap = metadata_umap["mean"].values[0]
std_umap = metadata_umap["std"].values[0]

ax4.plot(umap["genetic"], umap["euclidean"], "o", alpha=0.25)
ax4.plot(umap["LOWESS_x"], umap["LOWESS_y"], label="LOESS")

ax4.set_xlabel("Genetic distance")
ax4.set_ylabel("Euclidean distance")
ax4.set_xticks(np.arange(min(umap["genetic"]), max(umap["genetic"])+10, 40.0))
ax4.set_title(f"UMAP ($R^2={mean_umap:.3f} +/- {std_umap:.3f}$)")

plt.savefig(scatterplot_chart, dpi=300)