In [12]:
import numpy as np
import pandas as pd
from scipy.spatial import ConvexHull

from bokeh.plotting import figure
from bokeh.layouts import gridplot
from bokeh.io import show, output_notebook
output_notebook()

# Learn convex hulls

In [13]:
# Generate dataset:

eg_dfs = []
for i in range(3):
    points = np.random.rand(30, 2)   # 30 random points in 2-D
    hull = ConvexHull(points)
    eg_df = pd.DataFrame(points, columns=["X", "Y"])
    eg_df["ID"] = i
    
    eg_dfs.append(eg_df)

eg_concat_df = pd.concat(eg_dfs)

In [14]:
def compute_eg_convex_hull(df):
    hull = ConvexHull(df[['X','Y']])
    return pd.Series({"hull_obj": hull, 
                      "hull_area": hull.area, 
                      "hull_vertices": hull.vertices,
                      "len_hull_vertices": len(hull.vertices)})

In [15]:
# Split, apply, combine:
grouped = eg_concat_df.groupby(["ID"])
eg_hulls_df = grouped.apply(compute_eg_convex_hull).reset_index()

In [16]:
eg_hulls_df

Unnamed: 0,ID,hull_obj,hull_area,hull_vertices,len_hull_vertices
0,0,<scipy.spatial.qhull.ConvexHull object at 0x7f...,3.119855,"[19, 14, 29, 6, 20, 11, 0, 22]",8
1,1,<scipy.spatial.qhull.ConvexHull object at 0x7f...,3.213713,"[12, 9, 16, 23, 25, 6, 11, 1, 26, 20, 2]",11
2,2,<scipy.spatial.qhull.ConvexHull object at 0x7f...,3.281091,"[5, 28, 24, 3, 17, 10, 9]",7


In [17]:
# First one:
df_0 = eg_concat_df.loc[eg_concat_df["ID"]==0]
hull_0 = ConvexHull(df_0[["X", "Y"]])
x = df_0.iloc[hull_0.vertices]["X"]
y = df_0.iloc[hull_0.vertices]["Y"]

p1 = figure(background_fill_color="#efe8e2",
           width=200,
           height=200)
p1.patch(x, y, alpha=0.5)

# Second one:
df_1 = eg_concat_df.loc[eg_concat_df["ID"]==1]
hull_1 = ConvexHull(df_1[["X", "Y"]])
x = df_1.iloc[hull_1.vertices]["X"]
y = df_1.iloc[hull_1.vertices]["Y"]

p2 = figure(background_fill_color="#efe8e2",
           width=200,
           height=200)
p2.patch(x, y, alpha=0.5)

# Third one:
df_2 = eg_concat_df.loc[eg_concat_df["ID"]==2]
hull_2 = ConvexHull(df_2[["X", "Y"]])
x = df_2.iloc[hull_2.vertices]["X"]
y = df_2.iloc[hull_2.vertices]["Y"]

p3 = figure(background_fill_color="#efe8e2",
           width=200,
           height=200)
p3.patch(x, y, alpha=0.5)

from bokeh.layouts import gridplot
# show(row(p1, p2, p3))
show(gridplot([[p1, p2, p3]]), ncols=3)

In [37]:
all_vertices = []
for _, row in eg_hulls_df.iterrows():
    vertices = row["hull_obj"].vertices
    all_vertices.append(vertices)

# Replace with zip!
ps = []
for name, group in grouped:
    x_vertices = group.iloc[all_vertices[name]]["X"]
    y_vertices = group.iloc[all_vertices[name]]["Y"]

    p = figure(background_fill_color="#efe8e2",
           width=200,
           height=200)
    p.patch(x_vertices, y_vertices, alpha=0.5)
    p.circle(group["X"], group["Y"])
    ps.append(p)
    
show(gridplot([ps]), ncols=3)

Sometimes, the key to your `groupby` object will be sorted according to multiple columns, in which case, the key will be a tuple, rather than an integer. To get around this situation, we can use the handy `enumerate` function. Observe the parantheses:

In [36]:
# TODO: zip my iterables!

all_vertices = []
for _, row in eg_hulls_df.iterrows():
    vertices = row["hull_obj"].vertices
    all_vertices.append(vertices)

ps = []
for i, (name, group) in enumerate(grouped):
    x_vertices = group.iloc[all_vertices[i]]["X"]
    y_vertices = group.iloc[all_vertices[i]]["Y"]

    p = figure(background_fill_color="#efe8e2",
           width=200,
           height=200)
    p.patch(x_vertices, y_vertices, alpha=0.5)
    p.circle(group["X"], group["Y"])
    ps.append(p)
    
show(gridplot([ps]), ncols=3)