In [1]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_formats = ["svg"]

import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.ndimage import gaussian_filter, percentile_filter

sys.path.append("../")

from per_analysis import config
from per_analysis.figures import cmap, cplot, qplot, scale_bar, \
    plot_per_trajectory2
from per_analysis.timeseries import hysteresis, dtw

In [2]:
pd.options.display.max_rows = 5
plt.style.use("../styles/custom.mplstyle")

In [3]:
def preprocess(df, percentile, size, sigma, discard=()):
    df = df[~df.fly.isin(discard)].copy()
    df["time"] = df.trial * config.TRIAL_DURATION + df.frame / config.FRAME_RATE
    angles = [angle for angle in df.columns if "angle" in angle]

    for fly, df_fly in df.groupby("fly"):
        for angle in angles:
            x = df_fly[angle]
            
            # compute baseline by a percentile filter
            baseline = percentile_filter(x, percentile, size * config.FRAME_RATE)
            
            # smooth the signal with a Gaussian filter
            filtered = gaussian_filter(x - baseline, sigma, mode="nearest")
            df.loc[df.fly == fly, angle] = filtered

    return df


def detect_per(df, angle, low, high):
    df_per = pd.DataFrame()

    for fly, df_fly in df.groupby("fly"):
        # get the PER intervals by hysteresis thresholding
        intervals = hysteresis(df_fly[angle], low, high)

        if len(intervals) > 0:
            df_fly_per = pd.DataFrame(intervals, columns=("start", "stop"))
            df_fly_per["fly"] = fly
            df_per = df_per.append(df_fly_per, ignore_index=True)

    df_per.sort_index(axis=1, inplace=True)

    return df_per


def assign_per_stimulus(df_per, odor_per_interval):
    for fly, df_fly_per in df_per.groupby("fly"):
        df_fly = df[df.fly == fly]

        for i, row in df_fly_per.iterrows():
            t_start = (row["start"] / config.FRAME_RATE) % config.TRIAL_DURATION

            if df_fly.iloc[row["start"]].stimulus == "Sucrose" or \
                    (odor_per_interval[0] < t_start < odor_per_interval[1]):
                df_per.loc[i, "stimulus"] = df_fly.iloc[row.start].stimulus
                df_per.loc[i, "percentage"] = df_fly.iloc[row.start].percentage


def angles_pca(angles, n_components):
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA


    # Normalize before passing to PCA
    pipeline = make_pipeline(StandardScaler(), PCA(n_components))

    # Fit and transform angles
    pcs = pipeline.fit_transform(angles)

    return pcs


def extract_per_angles_pcs(df, df_per):
    per_angles = []
    per_pcs = []

    for fly, df_fly_per in df_per.groupby("fly"):
        df_fly = df[df.fly == fly]

        for row in df_fly_per.itertuples():
            per_angles.append(df_fly.iloc[row.start : row.stop][angle_cols[:2]].values)
            per_pcs.append(df_fly.iloc[row.start : row.stop][pc_cols].values)

    per_angles = np.array(per_angles, dtype=object)
    per_pcs = np.array(per_pcs, dtype=object)

    return per_angles, per_pcs


def visualize_clusters(per_angles, labels, t_max=3, n_col=3, w=1.5, h=1.5, bar_length=45, rasterized=True):
    from matplotlib.cm import ScalarMappable
    from matplotlib.colors import Normalize

    unique_labels = np.sort(np.unique(labels))
    n_clusters = len(unique_labels)

    norm = Normalize(vmin=0, vmax=t_max, clip=True)
    n_row = int(np.ceil(n_clusters / n_col))

    fig, axes = plt.subplots(n_row, n_col, sharex="all", sharey="all", figsize=(n_col * w, n_row * h))

    for i, k in enumerate(unique_labels):
        ax = axes.ravel()[i]

        for X in per_angles[labels == k]:
            artist = cplot(X, c=np.arange(len(X)) / config.FRAME_RATE, norm=norm, cmap=cmap.rainbow, ax=ax, linewidths=.5)
            artist.set_rasterized(rasterized)

    for ax in axes.ravel():
        ax.axis("off")

    ax = axes[-1, 0]

    x0, y0 = np.min([i.min(0) for i in per_angles], 0)
    scale_bar(x0, bar_length, unit="°", text_pad=-.04, axis="x", ax=ax)
    scale_bar(y0, bar_length, unit="°", axis="y", ax=ax)

    fig.subplots_adjust(wspace=0, hspace=0, bottom=0, left=0, right=1, top=1)

    cax = fig.add_axes([1, .3, .02, .4])
    mappable = ScalarMappable(cmap=cmap.rainbow, norm=norm)
    cb = fig.colorbar(mappable, cax=cax)
    cb.set_label("Time (s)", rotation=270, va="bottom")
    fig.supxlabel("Haustellum angle", x=.5, y=-0.02, va="top")
    fig.supylabel("Rostrum angle", x=-0.02, y=.5, ha="right")

### Data preprocessing

In [4]:
# flies to be excluded for analysis (due to tracking error, missing data etc.)
discard = ("20210827-12", "20210827-13", "20210827-14", "20210827-37",
           "20190904-1", "20190904-10", "20190905-5")
df = pd.read_csv("../data/data.csv")

# preprocess data
df = preprocess(df, percentile=25, size=5, sigma=1,
                discard=discard)

# PCA
angle_cols = ["haustellum_angle", "rostrum_angle", "labella_angle"]
pc_cols = ["pc1", "pc2"]
df[pc_cols] = angles_pca(df[angle_cols], n_components=2)
df

Unnamed: 0,fly,stimulus,trial,frame,haustellum_angle,rostrum_angle,labella_angle,percentage,time,pc1,pc2
7836,20190904-2,EBR,0,0,0.961496,1.388101,-0.119086,50.0,0.00,-0.667417,0.106704
7837,20190904-2,EBR,0,1,0.942490,1.619320,-0.251009,50.0,0.05,-0.664107,0.107258
...,...,...,...,...,...,...,...,...,...,...,...
215734,20210827-36,Sucrose,2,298,-0.041171,0.505626,0.619724,1.0,44.90,-0.715403,0.025043
215735,20210827-36,Sucrose,2,299,-0.721209,-0.021923,0.466575,1.0,44.95,-0.776523,-0.002453


### PER detection

In [5]:
df_per = detect_per(df, angle="rostrum_angle", low=5, high=15)
assign_per_stimulus(df_per, odor_per_interval=(2, 7))
per_angles, per_pcs = extract_per_angles_pcs(df, df_per)
df_per

Unnamed: 0,fly,start,stop,stimulus,percentage
0,20190904-2,74,123,EBR,50.0
1,20190904-2,300,325,,
...,...,...,...,...,...
1743,20210827-36,75,129,Sucrose,1.0
1744,20210827-36,235,262,Sucrose,1.0


### Compute DTW distance matrix

In [None]:
# time window for dynamic time warping, in frames
w = 5

try:
    D = np.load(f"../data/D{w}.npy")
except FileNotFoundError:
    n_per = len(df_per)
    D = np.zeros((n_per,) * 2)

    for i in range(n_per):
        for j in range(i, n_per):
            D[i, j] = D[j, i] = dtw(per_pcs[i], per_pcs[j], w)

    np.save(f"../data/D{w}.npy")

### Clustering
Clusters were obtained from the distance matrix using the K-Medoids algorithm, which allows the use of arbitrary dissimilarity measures. The number of clusters was determined from the inertia curve by the elbow method .

In [None]:
from sklearn_extra.cluster import KMedoids

n_clusters_range = np.arange(2, 10)
n_rep = 5
inertia = np.mean([
    [KMedoids(n_clusters, metric="precomputed", method="pam", random_state=i).fit(D).inertia_
     for n_clusters in n_clusters_range]
    for i in range(n_rep)
], axis=0)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(2, 1.5))
ax.plot(n_clusters_range, inertia, marker=".", lw=1, markersize=3, c="k")

for side in ("top", "right"):
    ax.spines[side].set_visible(False)

ax.set_xticks(n_clusters_range)
ax.set_yticks([])
ax.set_xlabel("K")
ax.set_ylabel("Inertia")

Plot all trajectories in each cluster

In [None]:
n_clusters = 5
km = KMedoids(n_clusters, metric="precomputed", method="pam", random_state=0).fit(D)
visualize_clusters(per_angles, km.labels_, t_max=2)

Plot trajectories of cluster medoids

In [None]:
fig, ax = plt.subplots(figsize=(2.5, 2.5))
for i, X in enumerate(per_angles[km.medoid_indices_]):
    qplot(*X.T, lw=1, c=f"C{i}")
    ax.text(*X.max(0) + (2, -2), i+1, color=f"C{i}")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xlabel("Haustellum angle (°)")
ax.set_ylabel("Rostrum angle (°)")

For each odor, choose the PERs that are closest to the cluster medoids as exemplars

In [None]:
min_length = 5
df_per["cluster"] = km.labels_
idx_exemplars = []

for (stimulus, k), df_ in df_per.groupby(["stimulus", "cluster"]):
    df_ = df_[(df_["stop"] - df_["start"]) >= min_length]
    
    # ensure the number of PER belonging to that cluster to be not less than 5
    if len(df_) >= 5:
        idx = np.argmin(D[km.medoid_indices_[k]][df_.index])
        idx_exemplars.append(df_.index.values[idx])

In [None]:
def plot_odor_per_exemplars(per_angles, df_per, idx, wspace=0,  hspace=.5, figsize=(4.5, 4.5), include_sucrose=False):
    df_per_odor = df_per.iloc[idx].copy()
    if include_sucrose:
        ncol = 4
        order = config.ODOR_COL_ORDER[:3] + ("0.25% Sucrose",) + \
                config.ODOR_COL_ORDER[3:6] + ("1% Sucrose",) + \
                config.ODOR_COL_ORDER[6:9]
        df_per_odor.loc[(df_per_odor.stimulus == "Sucrose") & (df_per_odor.percentage == 0.25), "stimulus"] = "0.25% Sucrose"
        df_per_odor.loc[(df_per_odor.stimulus == "Sucrose") & (df_per_odor.percentage == 1.0), "stimulus"] = "1% Sucrose"
        df_per_odor = df_per_odor[df_per_odor.stimulus.isin(order)]
    else:
        ncol = 3
        order = config.ODOR_COL_ORDER

    xlim, ylim = np.array([np.concatenate(per_angles[idx]).min(0),
                           np.concatenate(per_angles[idx]).max(0)]).T
    fig = plot_per_trajectory2(df_per_odor, per_angles, figsize=figsize,
                               spines=False, xlim=xlim, ylim=ylim,
                               xlabel="Haustellum angle",
                               ylabel="Rostrum angle",
                               quiver=True, order=order, ncol=ncol)

    fig.subplots_adjust(wspace=wspace, hspace=hspace)
    axes = np.array(fig.axes).reshape(-1, ncol)
    plt.sca(axes[-1, 0])
    scale_bar(xlim[0], 45, "°", pad=-.05, text_pad=-.08, axis="x", lw=1)
    scale_bar(ylim[0], 45, "°", pad=-.05, text_pad=-.05, axis="y", lw=1)

Plot cluster exemplars for each odor

In [None]:
plot_odor_per_exemplars(per_angles, df_per, idx_exemplars)

Plot cluster exemplars for each odor and for sucrose

In [None]:
plot_odor_per_exemplars(per_angles, df_per, idx_exemplars, include_sucrose=True)