In [None]:
# auto-reload modules
%load_ext autoreload
%autoreload 2
# %config InlineBackend.figure_formats = ['svg']

In [None]:
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.ndimage import gaussian_filter

sys.path.append("../")

from per_analysis import config
from per_analysis.figures import cmap, cplot, savefig, WarpedAxes
from per_analysis.stats import estimate_mode_1d
from per_analysis.timeseries import asymmetric_hysteresis

In [None]:
plt.style.use("../styles/custom.mplstyle")

## Load data

In [None]:
csv_file = "../data/WT_50%.csv"
df = pd.read_csv(csv_file)
discard = (
    "20190904-1",   # trial 10 only has 36 frames
    "20190904-10",  # inaccurate tracking (e.g., 20190904-10-007 to 20190904-10-09)
    "20190905-5",   # fly fell off in trial 6
)

df = df[~df.fly.isin(discard)]
df["time"] = df.trial * 15 + df.frame / 20
df

Subtract baseline angle

In [None]:
angles = [angle for angle in df.columns if "angle" in angle]

baseline = {
    angle: {
        fly: estimate_mode_1d(df_fly[angle], dx=1)
        for fly, df_fly in df.groupby("fly")
    }
    for angle in angles
}

for angle in angles:
    for fly in df.fly.unique():
        df.loc[df.fly == fly, angle] -= baseline[angle][fly]

## PER detection

In [None]:
df_per = pd.DataFrame()

for fly, df_fly in df.groupby("fly"):
    y = gaussian_filter(df_fly.rostrum_angle, 5, mode="nearest")
    intervals = asymmetric_hysteresis(y, 5, 10, 15)
    df_fly_per = pd.DataFrame(intervals, columns=("start", "stop"))
    df_fly_per["fly"] = fly
    df_per = df_per.append(df_fly_per, ignore_index=True)

df_per.sort_index(axis=1, inplace=True)
df_per

Filter angle

In [None]:
for fly, df_fly in df.groupby("fly"):
    for angle in angles:
        y = gaussian_filter(df_fly[angle], 2, mode="nearest")
        df.loc[df.fly == fly, angle] = y

Plot PER detection results. Results are saved in ../figures/per_detection.

In [None]:
vline_kws = dict(
    marker=6,
    markersize=3,
    markeredgewidth=0,
    markevery=2,
    clip_on=False,
    mfc="k",
    lw=0,
)

for fly, df_fly in df.groupby("fly"):
    ax = WarpedAxes(bins=np.linspace(0, 405, 4), figsize=(10, 3))
    ax.plot(df_fly.time, df_fly.rostrum_angle, lw=1, c="k")
    ax.set_xmargin(0)
    ax.set_xticks(np.arange(0, 405, 45) + 22.5)
    for i, axi in enumerate(ax):
        axi.set_xticks(np.arange(0, 135, 45) + i * 135 + 22.5)
        axi.set_xticklabels(df_fly.odor.unique()[i*3:i*3+3])
        axi.xaxis.tick_top()
        axi.tick_params(axis="x", length=0)
        for x in np.arange(0, 135, 45) + i * 135:
            axi.axvline(x, lw=1, c="k", ls="--", alpha=.5)
        for j in range(9):
            axi.axvline(j * 15 + 2 + i * 135, **vline_kws)
            axi.axvline(j * 15 + 4 + i * 135, **vline_kws)

    for row in df_per[df_per.fly == fly].itertuples():
        ax.axvspan(row.start / config.FRAME_RATE, row.stop / config.FRAME_RATE, alpha=.2)

    plt.tight_layout()
    savefig(f"../figures/per_detection/{fly}.png")
    plt.close()

In [None]:
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

### PCA
Dimension reduction from 3 dimensions (3 angles) to 2 dimensions.

In [None]:
n_components = 2

# Normalize before passing to PCA
pipeline = make_pipeline(StandardScaler(), PCA(n_components))

# Fit and transform angles
pc_columns = [f"pc{i}" for i in range(n_components)]
Z = pipeline.fit_transform(df[angles])

for i in range(n_components):
    df[f"pc{i}"] = Z[:, i]

Extract PCs and angles during PERs

In [None]:
per_pcs = []
per_angles = []

for fly, df_fly_per in df_per.groupby("fly"):
    df_fly = df[df.fly == fly]

    for row in df_fly_per.itertuples():
        per_pcs.append(df_fly.iloc[row.start:row.stop][pc_columns].values)
        per_angles.append(df_fly.iloc[row.start:row.stop][angles[:2]].values)

per_pcs = np.array(per_pcs, dtype=object)
per_angles = np.array(per_angles, dtype=object)

Define PERs that are initiated within 5 seconds after odor presentation as odor-associated PER.
(Only PERs that were initiated within t = 2 to 7 s are counted as odor-triggered PERs).

In [None]:
max_per_delay = 5
odor_start = config.ODOR_INTERVAL[0]
odor_associated_per_interval = (odor_start, odor_start + max_per_delay)
odor_associated_per_interval

Assign odor identity to each PER

In [None]:
df_per["odor"] = ""

for fly, df_fly_per in df_per.groupby("fly"):
    df_fly = df[df.fly == fly]

    for i, row in df_fly_per.iterrows():
        t_start = (row["start"] / config.FRAME_RATE) % config.TRIAL_DURATION

        if odor_associated_per_interval[0] < t_start < odor_associated_per_interval[1]:
            df_per.loc[i, "odor"] = df_fly.iloc[row["start"]].odor

df_per.head(5)

Plot principal component (PC) trajectory grouped by odor for individual flies.
Figures are saved in `../figures/odor_trajectory_pcs`.

In [None]:
from per_analysis.figures import plot_per_trajectory

# font size settings
fontsize = 12
plt.rcParams["font.size"] = fontsize
plt.rcParams["axes.titlesize"] = fontsize
plt.rcParams["axes.labelsize"] = fontsize
plt.rcParams["xtick.labelsize"] = fontsize
plt.rcParams["ytick.labelsize"] = fontsize

# maximum value for mapping time to color
t_max = np.quantile([len(i) for i in per_pcs], .9) / config.FRAME_RATE

xlim, ylim = np.array([np.concatenate(per_pcs).min(0), np.concatenate(per_pcs).max(0)]).T

for fly, df_fly_per in df_per.groupby("fly"):
    fig = plot_per_trajectory(df_fly_per, per_pcs, t_max, figsize=(4.5, 4.5),
                              spines=False, xlim=xlim, ylim=ylim, xlabel="PC1", ylabel="PC2")
    fig.subplots_adjust(wspace=0, hspace=0.5)
    savefig(f"../figures/odor_trajectory_pcs/{fly}.png")
    plt.close()

Summary of PC trajectory grouped by odors

In [None]:
fig = plot_per_trajectory(df_per, per_pcs, t_max, figsize=(4.5, 4.5),
                          spines=False, xlim=xlim, ylim=ylim, xlabel="PC1", ylabel="PC2")
fig.subplots_adjust(wspace=0, hspace=0.5)
savefig(f"../figures/odor_trajectory_pcs.svg")

Plot angle trajectory grouped by odor for individual flies.
Figures are saved in `../figures/odor_trajectory_angles`.

In [None]:
xlim, ylim = np.array([np.concatenate(per_angles).min(0), np.concatenate(per_angles).max(0)]).T

for fly, df_fly_per in df_per.groupby("fly"):
    fig = plot_per_trajectory(df_fly_per, per_angles, t_max, figsize=(4.5, 4.5),
                              spines=False, xlim=xlim, ylim=ylim, xlabel="Haustellum angle", ylabel="Rostrum angle")
    fig.subplots_adjust(wspace=0, hspace=0.5)
    savefig(f"../figures/odor_trajectory_angles/{fly}.png")
    plt.close()

Summary of angle trajectory grouped by odors

In [None]:
fig = plot_per_trajectory(df_per, per_angles, t_max, figsize=(4.5, 4.5),
                          spines=False, xlim=xlim, ylim=ylim, xlabel="Haustellum angle", ylabel="Rostrum angle")
fig.subplots_adjust(wspace=0, hspace=0.5)
ax = fig.axes[6]
theta = 45
ax.plot([0, 0], [0, theta], transform=ax.get_yaxis_transform(), c="k", lw=2)
ax.plot([0, theta], [0, 0], transform=ax.get_xaxis_transform(), c="k", lw=2)
ax.text(theta / 2, -0.04, f"{theta}°", ha="center", va="top", transform=ax.get_xaxis_transform())
ax.text(0, theta / 2, f"{theta}°", ha="right", va="center", rotation=90, transform=ax.get_yaxis_transform())
savefig(f"../figures/odor_trajectory_angles.svg")

Compute distance between each pair of PERs.
Dynamic time warping (DTW) distance is chosen because PER sequences may have different lengths.

In [None]:
from pathlib import Path
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
from tqdm import tqdm

n_per = len(df_per)
recompute_matrix = False
distance_matrix_path = Path(csv_file).with_suffix(".npy")

D = None

if distance_matrix_path.exists() and not recompute_matrix:
    D = np.load(distance_matrix_path)

    if D.shape != (n_per, n_per):
        D = None

if D is None:
    D = np.zeros((n_per,) * 2)

    for i in tqdm(range(n_per)):
        for j in range(i, n_per):
            D[i, j] = D[j, i] = fastdtw(per_pcs[i], per_pcs[j], radius=2, dist=euclidean)[0]

    np.save(distance_matrix_path, D)

## PERMANOVA analysis
See
- https://en.wikipedia.org/wiki/Permutational_analysis_of_variance
- Anderson, Marti J. “A new method for non-parametric multivariate analysis of variance.” Austral Ecology 26.1 (2001): 32-46.

Test for difference in PER between 9 odors

In [None]:
from skbio.stats.distance import permanova, DistanceMatrix

odors = df_per.odor.values

permutations = 1000
cond = np.isin(odors, list(config.ODOR_ID_TO_NAME.values()))
results_9_odors = permanova(distance_matrix=DistanceMatrix(D[cond][:, cond]), grouping=odors[cond],
                            permutations=permutations)
results_9_odors

Test for difference in PER between 6 odors (all except Air, Water, MO)

In [None]:
cond = np.isin(odors,  ("EBR", "2-PT", "IPA", "MCH","BNZ", "OCT"))
results_6_odors = permanova(distance_matrix=DistanceMatrix(D[cond][:, cond]), grouping=odors[cond],
                            permutations=permutations)
results_6_odors

Test for difference in PER between EBR and BNZ

In [None]:
cond = np.isin(odors,  ("EBR", "BNZ"))
results_ebr_bnz = permanova(distance_matrix=DistanceMatrix(D[cond][:, cond]), grouping=odors[cond],
                            permutations=permutations)
results_ebr_bnz

Pairwise PERMANOVA

In [None]:
P = np.full((9, 9), np.nan)
permutations = 10000

for i in tqdm(range(9)):
    for j in range(i):
        cond = np.isin(odors, (config.ODOR_ID_TO_NAME[i+1], config.ODOR_ID_TO_NAME[j+1]))
        results = permanova(distance_matrix=DistanceMatrix(D[cond][:, cond]), grouping=odors[cond],
                            permutations=permutations)
        P[i, j] = results["p-value"]

Plot p-value matrix as a heatmap.
- \* : $p \leq 0.05$
- ** : $p \leq 0.01$
- *** : $p \leq 0.001$

In [None]:
from matplotlib import cm
from per_analysis.figures.utils import add_sub_axes

fig, ax = plt.subplots()
mappable = ax.imshow(P, cmap=cm.Reds, vmin=0, vmax=1)
ax.set_xticks(np.arange(9))
ax.set_xticklabels(list(config.ODOR_ID_TO_NAME.values()), rotation=90)
ax.set_yticks(np.arange(9))
ax.set_yticklabels(list(config.ODOR_ID_TO_NAME.values()))

for i in range(9):
    for j in range(i):
        p = P[i, j]
        if p <= 0.001:
            ax.text(j, i, "***", ha="center", va="center")
        elif p <= 0.01:
            ax.text(j, i, "**", ha="center", va="center")
        elif p <= 0.05:
            ax.text(j, i, "*", ha="center", va="center")

for spine in ax.spines.values():
    spine.set_visible(False)

ax.set_xlim(-.5, 7.5)
ax.set_ylim(8.5, .5)
ax.tick_params(axis="both", length=0)
cax = add_sub_axes(ax, [0.74375, .625, .025, .25])
cb = fig.colorbar(mappable, cax=cax)
cax.set_title("$p$-value", pad=10)
cb.set_ticks([0, 1])
savefig("../figures/permanova_p_value.svg")

## Clustering PER (exploratory)
Perform hierarchical clustering on the distance matrix with the Ward's method.
Cut the dendrogram at the half-maximum distance to obtain the clusters.

In [None]:
import scipy.cluster.hierarchy as sch
from scipy.spatial.distance import squareform

d = squareform(D)
L = sch.linkage(d, method="ward")
threshold = d.max() / 2
threshold
labels = sch.fcluster(L, threshold, criterion="distance") - 1

Plot the sorted distance matrix with the dendrogram

In [None]:
import seaborn as sns
from matplotlib.colors import to_hex

n_clusters = sch.fcluster(L, threshold, criterion="distance").max()
sch.set_link_color_palette([to_hex(i) for i in sns.palettes.husl_palette(n_clusters)])
n_clusters
wspace = .02

with plt.rc_context({'lines.linewidth': 1}):
    fig, axes = plt.subplots(1, 2, figsize=(5, 4), gridspec_kw=dict(width_ratios=(.2 - wspace, .8), wspace=wspace))
    ax = axes[0]
    dend = sch.dendrogram(L, ax=ax, orientation='left', color_threshold=threshold,
                          no_labels=True, above_threshold_color="gray",)
    ax.invert_yaxis()
    ax.axis('off')
    ax.axvline(threshold, c="k", ls="--")

    ax = axes[1]
    argsort = dend["leaves"]
    mappable = ax.imshow(D[argsort][:, argsort], cmap="magma_r")
    ax.axis("off")
    fig.subplots_adjust(left=0, bottom=0, right=1, top=1)
    cax = add_sub_axes(ax, [1.02, 0, .02, .2])
    cax.set_title("Distance", loc="left")
    fig.colorbar(mappable, cax=cax)
    savefig("../figures/clustering/heatmap_dendrogram.svg")

Plot angle trajectory by cluster

In [None]:
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable

norm = Normalize(vmin=0, vmax=t_max, clip=True)
n_col = 3
n_row = int(np.ceil(n_clusters / n_col))

fig, axes = plt.subplots(n_row, n_col, sharex="all", sharey="all", figsize=(n_col * 1.5, n_row * 1.5))

for k in range(n_clusters):
    ax = axes.ravel()[k]

    for X in per_angles[labels == k]:
        cplot(X, c=np.arange(len(X)) / config.FRAME_RATE, norm=norm, cmap=cmap.rainbow, ax=ax, linewidths=.5)

for ax in axes.ravel():
    ax.axis("off")

ax = axes[-1, 0]
theta = 45
ax.plot([0, 0], [0, theta], transform=ax.get_yaxis_transform(), c="k", lw=2)
ax.plot([0, theta], [0, 0], transform=ax.get_xaxis_transform(), c="k", lw=2)
ax.text(theta / 2, -0.04, f"{theta}°", ha="center", va="top", transform=ax.get_xaxis_transform())
ax.text(0, theta / 2, f"{theta}°", ha="right", va="center", rotation=90, transform=ax.get_yaxis_transform())
fig.subplots_adjust(wspace=0, hspace=0, bottom=0, left=0, right=1, top=1)

cax = fig.add_axes([1, .3, .02, .4])
mappable = ScalarMappable(cmap=cmap.rainbow, norm=norm)
cb = fig.colorbar(mappable, cax=cax)
cb.set_label("Time (s)", rotation=270, va="bottom")
fig.supxlabel("Haustellum angle", x=.5, y=-0.02, va="top")
fig.supylabel("Rostrum angle", x=-0.02, y=.5, ha="right")
savefig("../figures/clustering/angles.svg")