In [None]:
import sklearn
import pandas as pd
import numpy as np
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
tips = sns.load_dataset("tips")

sns.swarmplot(x=tips["total_bill"])

In [None]:
from matplotlib.patches import Circle


fig, ax = plt.subplots(figsize=(12,8))

ax = sns.swarmplot(x=tips["total_bill"], s=10)


coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')


for quantile in [0.1, 0.5, 0.9]:
    idx_q = round(coll.shape[0] * quantile)
    xy = coll.iloc[idx_q, :]
    circ = Circle((xy['x'], xy['y']), radius=0.4, fill=False, edgecolor=sns.color_palette()[9])
    ax.add_patch(circ)

In [None]:
def get_aspect(ax):
    figW, figH = ax.get_figure().get_size_inches()
    s_l, s_b, s_r, s_t = ax.get_position().bounds
    d_l, d_r = sorted(ax.get_xlim())
    d_b, d_t = sorted(ax.get_ylim())
    width_per_unit =  (s_r - s_l) * figW / (d_r - d_l)
    height_per_unit =  (s_t - s_b) * figH / (d_t - d_b)
    return width_per_unit / height_per_unit

In [None]:
from matplotlib.patches import Ellipse


fig, ax = plt.subplots(figsize=(12,8))

ax = sns.swarmplot(x=tips["total_bill"], s=10)

coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')

aspect = get_aspect(ax)


for quantile in [0.1, 0.5, 0.9]:
    idx_q = round(coll.shape[0] * quantile)
    xy = coll.iloc[idx_q, :]
    radius = 0.05
    circ = Ellipse((xy['x'], xy['y']), height=radius, width=radius/aspect, fill=False, edgecolor=sns.color_palette()[9], linewidth=4, alpha=0.8)
    ax.add_patch(circ)

In [None]:
from matplotlib.patches import Ellipse


fig, ax = plt.subplots(figsize=(12,8))

ax = sns.swarmplot(x=tips["total_bill"], s=10)

coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')

aspect = get_aspect(ax)


for quantile in [0.1, 0.5, 0.9]:
    idx_q = round(coll.shape[0] * quantile)
    xy = coll.iloc[idx_q, :]
    radius = 0.05
    circ = Ellipse((xy['x'], xy['y']), height=radius, width=radius/aspect, fill=False, edgecolor='black', linewidth=3, alpha=0.8, zorder=4)
    ax.add_patch(circ)

In [None]:
from matplotlib.patches import Ellipse


fig, ax = plt.subplots(figsize=(12,8))

ax = sns.swarmplot(x=tips["total_bill"], s=10)

coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')

aspect = get_aspect(ax)
for quantile in [0.1, 0.5, 0.9]:
    idx_q = round(coll.shape[0] * quantile)
    xy = coll.iloc[idx_q, :]
    radius = 0.04
    print(radius / aspect)
    subplot = coll[coll['x'].apply(lambda val: abs(val - xy['x']) * 3 <= radius / aspect)]
    idx = subplot['y'].idxmin()
    xy = coll.iloc[idx, :]
    circ = Ellipse((xy['x'], xy['y']), height=radius, width=radius/aspect, fill=False, edgecolor='black', linewidth=3, alpha=0.8, zorder=4)
    ax.add_patch(circ)

In [None]:
from matplotlib.patches import Ellipse


fig, ax = plt.subplots(figsize=(12,8))

ax = sns.swarmplot(x=tips["total_bill"], s=10)

coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')

aspect = get_aspect(ax)
for quantile in [0.1, 0.5, 0.9]:
    idx_q = round(coll.shape[0] * quantile)
    xy = coll.iloc[idx_q, :]
    radius = 0.05
    subplot = coll[coll['x'].apply(lambda val: abs(val - xy['x']) * 3 <= radius / aspect)]
    idx = subplot['y'].idxmin()
    xy = coll.iloc[idx, :]
    circ = Ellipse((xy['x'], xy['y']), height=radius, width=radius/aspect, fill=False, edgecolor='black', linewidth=2.5, zorder=4)
    ax.add_patch(circ)
    ax.text(xy['x'], xy['y'] - 3.25 * radius, s='{} (q={})'.format(xy['x'], quantile), ha='center', fontsize=11)
    ax.vlines(xy['x'], xy['y'] - 2.75 * radius, xy['y'] - 0.75 * radius, lw=3, linestyle='--', color='black')

In [None]:
def annotated_swarm(data, quantiles=[0.1, 0.5, 0.9], s=10, figsize=(12,8)):
    radius = s / 200
    fig, ax = plt.subplots(figsize=figsize)

    ax = sns.swarmplot(x=data, s=s, ax=ax)

    coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')
    
    aspect = get_aspect(ax)
    for quantile in quantiles:
        idx_q = round(coll.shape[0] * quantile)
        xy = coll.iloc[idx_q, :]
        subplot = coll[coll['x'].apply(lambda val: abs(val - xy['x']) * 3 <= radius / aspect)]
        idx = subplot['y'].idxmin()
        xy = subplot.loc[idx, :]
        circ = Ellipse((xy['x'], xy['y']), height=radius, width=radius/aspect, fill=False, edgecolor='black', linewidth=2.5, zorder=4)
        ax.add_patch(circ)
        ax.text(xy['x'], xy['y'] - 3.25 * radius, s='{} (q={})'.format(xy['x'], quantile), ha='center', fontsize=11)
        ax.vlines(xy['x'], xy['y'] - 2.75 * radius, xy['y'] - 0.75 * radius, lw=3, linestyle='--', color='black')

In [None]:
annotated_swarm(tips['total_bill'])

In [None]:
tips.head()

In [None]:
annotated_swarm(tips['tip'])

In [None]:
def annotated_swarm(data, quantiles=[0.1, 0.5, 0.9], s=10, figsize=(12,8)):
    radius = s / 200
    fig, ax = plt.subplots(figsize=figsize)

    ax = sns.swarmplot(x=data, s=s, ax=ax)

    coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')
    
    aspect = get_aspect(ax)
    for quantile in quantiles:
        idx_q = round(coll.shape[0] * quantile)
        xy = coll.iloc[idx_q, :]
        subplot = coll[coll['x'].apply(lambda val: abs(val - xy['x']) * 3 <= radius / aspect)]
        idx = subplot['y'].idxmin()
        xy = subplot.loc[idx, :]
        circ = Ellipse((xy['x'], xy['y']), height=radius, width=radius/aspect, fill=False, edgecolor='black', linewidth=2.5, zorder=4)
        ax.add_patch(circ)
        ax.text(xy['x'], xy['y'] - 3.25 * radius, s='{:.2g} (q={})'.format(xy['x'], quantile), ha='center', fontsize=11)
        ax.vlines(xy['x'], xy['y'] - 2.75 * radius, xy['y'] - 0.75 * radius, lw=3, linestyle='--', color='black')

In [None]:
annotated_swarm(tips['tip'])

In [None]:
iris = sns.load_dataset('iris')

In [None]:
annotated_swarm(iris['sepal_length'])

In [None]:
penguins = sns.load_dataset('penguins')
penguins.head()

In [None]:
annotated_swarm(penguins['body_mass_g'])

In [None]:
def annotated_swarm(data, quantiles=[0.1, 0.5, 0.9], s=10, figsize=(12,8)):
    radius = s / 200
    fig, ax = plt.subplots(figsize=figsize)

    ax = sns.swarmplot(x=data, s=s, ax=ax, alpha=0)

    coll = pd.DataFrame(ax.collections[0].get_offsets(), columns=['x', 'y']).sort_values(by='x')
    
    aspect = get_aspect(ax)
    for quantile in quantiles:
        idx_q = round(coll.shape[0] * quantile)
        xy = coll.iloc[idx_q, :]
        subplot = coll[coll['x'].apply(lambda val: abs(val - xy['x']) * 3 <= radius / aspect)]
        idx = subplot['y'].idxmin()
        xy = subplot.loc[idx, :]
        circ = Ellipse((xy['x'], xy['y']), height=radius, width=radius/aspect, fill=False, edgecolor='black', linewidth=2.5, zorder=5)
        ax.add_patch(circ)
        ax.text(xy['x'], xy['y'] - 3.25 * radius, s='{:.2g} (q={})'.format(xy['x'], quantile), ha='center', fontsize=11)
        ax.vlines(xy['x'], xy['y'] - 2.75 * radius, xy['y'] - 0.75 * radius, lw=3, linestyle='--', color='black', zorder=5)
    sns.scatterplot(data=coll, x='x', y='y', hue='x', zorder=4, ax=ax, s=s*10)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)
    ax.set_ylabel('')
    return ax

In [None]:
ax = annotated_swarm(penguins['body_mass_g'])
ax.set_xlabel('Body Mass')

In [None]:
ax = annotated_swarm(tips['total_bill'])
ax.set_xlabel("Total Bill ($)")
plt.show()