In [1]:
from typing import Any, Optional
import numpy as np
from time import time

import pandas as pd
import scipy
from tqdm import tqdm

from common.evaluate import evaluate_pose_error_J3d_P2d
from paik.solver import NSF, PAIK, Solver, get_solver
from ikp import get_robot, numerical_inverse_kinematics_batch, compute_mmd, gaussian_kernel, inverse_multiquadric_kernel

import torch

# set the same random seed for reproducibility
np.random.seed(0)
torch.manual_seed(0)

ikflow/config.py | Using device: 'cuda:0'


<torch._C.Generator at 0x7fe0984e1050>

In [None]:
from functools import partial
import os
from numpy import ndarray
from tqdm import tqdm, trange
import itertools
from tqdm.contrib import itertools as tqdm_itertools

from paik.file import load_pickle, save_pickle
from latent_space_sampler import Retriever


def solver_batch(solver, P, num_sols, std=0.001, retriever: Optional[Retriever] = None, J_ref=None, radius=0.0, num_clusters=30, use_samples=int(5e6), verbose=False, retr_type='cluster'):
    # shape: (num_sols, num_poses, m)
    P_num_sols = np.expand_dims(P, axis=0).repeat(num_sols, axis=0)
    # shape: (num_sols*num_poses, n)
    P_num_sols = P_num_sols.reshape(-1, P.shape[-1])
    
    J_ref_num_sols = None
    if J_ref is not None:
        J_ref_num_sols = np.expand_dims(J_ref, axis=0).repeat(num_sols, axis=0)
        J_ref_num_sols = J_ref_num_sols.reshape(-1, J_ref.shape[-1])

    if isinstance(solver, PAIK):
        solver.base_std = std
        F = solver.get_reference_partition_label(P=P, J=J_ref, num_sols=num_sols)
        # shape: (1, num_sols*num_poses, n)
        J_hat = solver.generate_ik_solutions(P=P_num_sols, F=F, verbose=verbose)
    elif isinstance(solver, NSF):
        if retriever is None:
            solver.base_std = std
            J_hat = solver.generate_ik_solutions(P=P, num_sols=num_sols)
        else:
            if retr_type == 'cluster':
                latents = retriever.cluster_retriever(seeds=J_ref, num_poses=P.shape[0], num_sols=num_sols, max_samples=use_samples, radius=radius, n_clusters=num_clusters)
            elif retr_type == 'random':
                latents = retriever.random_retriever(seeds=J_ref, num_poses=P.shape[0], max_samples=use_samples, num_sols=num_sols, radius=radius)
            elif retr_type == 'numerical':
                latents = retriever.numerical_retriever(poses=P, seeds=J_ref, num_sols=num_sols, radius=radius)
            J_hat = solver.generate_ik_solutions(P=P_num_sols, latents=latents, verbose=verbose)
    else:
        J_hat = np.empty((num_sols, P.shape[0], solver.robot.n_dofs))
        P_torch = torch.tensor(P, dtype=torch.float32).to('cuda')
        for i, p in enumerate(P_torch):
            solutions = solver.generate_ik_solutions(
                p,
                num_sols,
                latent_distribution='gaussian',
                latent_scale=std,
                clamp_to_joint_limits=False,
            )
            J_hat[:, i] = solutions.detach().cpu().numpy()
    # return shape: (num_sols, num_poses, n)
    return J_hat.reshape(num_sols, P.shape[0], -1)


def random_ikp(robot, P: np.ndarray, solve_fn_batch: Any, num_poses: int, num_sols: int, J_hat_num: Optional[np.ndarray] = None):
    begin = time()
    # shape: (num_poses, num_sols, num_dofs or n)
    J_hat = solve_fn_batch(P=P, num_sols=num_sols)
    assert J_hat.shape == (
        num_sols, num_poses, robot.n_dofs), f"J_hat shape {J_hat.shape} is not correct"

    l2, ang = evaluate_pose_error_J3d_P2d(
        #init(num_sols, num_poses, num_dofs or n)
        robot, J_hat, P, return_all=True
    )
    
    num_sols_time_ms = round((time() - begin) / len(P), 3) * 1000
    
    ret_results = {}
    l2_mean = np.nanmean(l2)
    ang_mean = np.nanmean(ang)
    
    ret_results[f'{num_poses}_{num_sols}'] = {
        "l2_mm": l2_mean * 1000,
        "ang_deg": np.rad2deg(ang_mean),
        "num_sols_time_ms": num_sols_time_ms
    }
    
    if J_hat_num is None:
        mmd_guassian = np.nan
        mmd_imq = np.nan
    else:
        mmd_guassian_list = np.empty((num_poses))
        mmd_imq_list = np.empty((num_poses))
        for i in range(num_poses):
            mmd_guassian_list[i] = compute_mmd(J_hat[:, i], J_hat_num[:, i], kernel=gaussian_kernel)
            mmd_imq_list[i] = compute_mmd(J_hat[:, i], J_hat_num[:, i], kernel=inverse_multiquadric_kernel)
        mmd_guassian = mmd_guassian_list.mean()
        mmd_imq = mmd_imq_list.mean()
        
    ret_results[f'{num_poses}_{num_sols}']['mmd_guassian'] = mmd_guassian
    ret_results[f'{num_poses}_{num_sols}']['mmd_imq'] = mmd_imq

    return J_hat, ret_results

def nested_dict_to_2d_dict(nested_dict: dict):
    ret_dict = {}
    for key, value in nested_dict.items():
        if isinstance(value, dict):
            for k, v in value.items():
                ret_dict[f"{key}_{k}"] = v
        else:
            ret_dict[key] = value
    return ret_dict


def random_ikp_with_mmd(record_dir, robot_name, num_poses, num_sols, paik_std_list, radius_list, num_clusters_list):
    robot = get_robot(robot_name)
    nsf = get_solver(arch_name="nsf", robot=robot, load=True, work_dir='/home/luca/paik')
    retriever = Retriever(nsf)
    max_samples = int(5e6)
    retriever.init([max_samples], num_clusters_list)
    paik = get_solver(arch_name="paik", robot=robot, load=True, work_dir='/home/luca/paik')
    
    file_path = f"{record_dir}/random_ikp_with_mmd_{robot_name}_{num_poses}_{num_sols}.pkl"
    
    results = {}
    # if os.path.exists(file_path):
    #     results = load_pickle(file_path)
    #     ret_results = nested_dict_to_2d_dict(results)
    #     df = pd.DataFrame(ret_results).T
    #     # round to 4 decimal places
    #     df = df.round(4)
    #     print(df)
    #     print(f"Results are loaded from {file_path}")
    # else:
    #     print(f"Results are not found in {file_path}")
        
    if 'P' in results:
        P = results['P']
    else:
        _, P = nsf.robot.sample_joint_angles_and_poses(n=num_poses)
        
    print(f"Start numerical IK...")
    # num's variable: num_poses, num_sols
    num_solver_batch = partial(numerical_inverse_kinematics_batch, solver=nsf)    
    J_hat_num, results['num'] = random_ikp(robot, P, num_solver_batch, num_poses=num_poses, num_sols=num_sols)
    save_pickle(file_path, results)    
    print(f"Results numerical IK are saved in {file_path}")
    
    print(f"Start paik...")
    # paik's variable: num_poses, num_sols, std, 
    for std in tqdm(paik_std_list):
        paik_solver_batch = partial(solver_batch, solver=paik, std=std)
        name = f'paik_{std}_gaussian'
        if name not in results:
            _, results[name] = random_ikp(robot, P, paik_solver_batch, num_poses=num_poses, num_sols=num_sols, J_hat_num=J_hat_num)
            save_pickle(file_path, results) 
    print(f"Results paik are saved in {file_path}")
    
    print(f"Start nsf w/o retreiver...")
    # nsf's variable: std
    for std in tqdm(paik_std_list):
        nsf_solver_batch = partial(solver_batch, solver=nsf, std=std, retriever=None)
        name = f'nsf_gaussian_{std}'
        if name not in results:
            _, results[name] = random_ikp(robot, P, nsf_solver_batch, num_poses=num_poses, num_sols=num_sols, J_hat_num=J_hat_num)
            save_pickle(file_path, results)

    print(f"Start nsf with cluster retriever...")    
    # nsf's variable: num_poses, num_sols, max_samples, radius, num_clusters
    use_samples = max_samples
    for radius, num_clusters in tqdm_itertools.product(radius_list, num_clusters_list):
        nsf_solver_batch = partial(solver_batch, solver=nsf, radius=radius, num_clusters=num_clusters, retriever=retriever, use_samples=use_samples, retr_type='cluster')
        name = f'nsf_cluster_{radius}_{num_clusters}'
        if name not in results:
            _, results[name] = random_ikp(robot, P, nsf_solver_batch, num_poses=num_poses, num_sols=num_sols, J_hat_num=J_hat_num)
            save_pickle(file_path, results)
    print(f"Results nsf with cluster retriever are saved in {file_path}")
    
    print(f"Start nsf with random retriever...")
    # nsf's variable: num_poses, num_sols, max_samples, radius
    use_samples = min(max_samples, num_clusters)
    use_samples = max(use_samples, num_sols)
    for radius in tqdm(radius_list):
        nsf_solver_batch = partial(solver_batch, solver=nsf, radius=radius, retriever=retriever, use_samples=use_samples, retr_type='random')
        name = f'nsf_random_{radius}'
        if name not in results:
            _, results[name] = random_ikp(robot, P, nsf_solver_batch, num_poses=num_poses, num_sols=num_sols, J_hat_num=J_hat_num)
            save_pickle(file_path, results)
            
    print(f"Start nsf with numerical retriever...")
    # nsf's variable: num_poses, num_sols, max_samples, radius
    for radius in radius_list:
        nsf_solver_batch = partial(solver_batch, solver=nsf, radius=radius, retriever=retriever, retr_type='numerical', J_ref=None)
        name = f'nsf_numerical_{radius}'
        if name not in results:
            _, results[name] = random_ikp(robot, P, nsf_solver_batch, num_poses=num_poses, num_sols=num_sols, J_hat_num=J_hat_num)
            save_pickle(file_path, results)
    
    ret_results = nested_dict_to_2d_dict(results)

    df = pd.DataFrame(ret_results).T
    # round to 4 decimal places
    df = df.round(4)
    print(df)
    file_path = f"{record_dir}/random_ikp_with_mmd_evaluation_results_{robot_name}_{num_poses}_{num_sols}.csv"
    df.to_csv(file_path)
    print(f"Results are saved in {file_path}")

In [6]:
from common.config import Config_IKP
config = Config_IKP()

config.workdir = '/mnt/d/pads/Documents/paik_store'

kwarg = {
    'record_dir': config.record_dir,
    'robot_name': 'panda',
    'num_poses': 100, # 300, 500, 1000
    'num_sols': 100,  # 300, 500, 1000
    'paik_std_list': [0.001, 0.1, 0.25, 0.5, 0.7, 0.9], # 0.001, 0.1, 0.25, 0.5, 0.7
    'radius_list': [0.001, 0.1, 0.25, 0.5, 0.7, 0.9], # 0, 0.1, 0.3, 0.5, 0.7, 0.9
    'num_clusters_list': [70] # 13, 16, 19, 25, 30, 40
}

robot_names = ["panda"] # "panda", "fetch", "fetch_arm", "atlas_arm", "atlas_waist_arm", "baxter_arm"

for robot_name in robot_names:
    print(f"Start to evaluate {robot_name}...")
    kwarg['robot_name'] = robot_name
    random_ikp_with_mmd(**kwarg)

Start to evaluate panda...
WorldModel::LoadRobot: /home/luca/.cache/jrl/temp_urdfs/panda_arm_hand_formatted_link_filepaths_absolute.urdf
joint mimic: no multiplier, using default value of 1 
joint mimic: no offset, using default value of 0 
URDFParser: Link size: 17
URDFParser: Joint size: 12
LoadAssimp: Loaded model /home/luca/miniconda3/lib/python3.9/site-packages/jrl/urdfs/panda/meshes/visual/link0.dae (59388 verts, 20478 tris)
LoadAssimp: Loaded model /home/luca/miniconda3/lib/python3.9/site-packages/jrl/urdfs/panda/meshes/visual/link1.dae (37309 verts, 12516 tris)
LoadAssimp: Loaded model /home/luca/miniconda3/lib/python3.9/site-packages/jrl/urdfs/panda/meshes/visual/link2.dae (37892 verts, 12716 tris)
LoadAssimp: Loaded model /home/luca/miniconda3/lib/python3.9/site-packages/jrl/urdfs/panda/meshes/visual/link3.dae (42512 verts, 14233 tris)
LoadAssimp: Loaded model /home/luca/miniconda3/lib/python3.9/site-packages/jrl/urdfs/panda/meshes/visual/link4.dae (43520 verts, 14620 tris)
L

  0%|          | 0/1 [00:00<?, ?it/s]

Start to initialize numerical retriever...
[SUCCESS] load from /home/luca/paik/weights/panda/0904-1939
[SUCCESS] load best date 0904-1939 with l2 0.00297 from /home/luca/paik/weights/panda/best_date_paik.csv.
Start numerical IK...
Results numerical IK are saved in /mnt/d/pads/Documents/paik_store/record/2024_11_12/random_ikp_with_mmd_panda_100_100.pkl
Start nsf w/o retreiver...


100%|██████████| 3/3 [00:00<00:00,  3.81it/s]
100%|██████████| 3/3 [00:00<00:00,  5.15it/s]
100%|██████████| 3/3 [00:00<00:00,  5.14it/s]
100%|██████████| 3/3 [00:00<00:00,  5.17it/s]
100%|██████████| 3/3 [00:00<00:00,  5.14it/s]
100%|██████████| 3/3 [00:00<00:00,  5.18it/s]
100%|██████████| 6/6 [00:05<00:00,  1.04it/s]

Start nsf with cluster retriever...





  0%|          | 0/6 [00:00<?, ?it/s]

Start to cluster retriever with max_samples: 5000000, num_poses: 100, num_sols: 100, radius: 0.001, n_clusters: 70
Start to cluster retriever with max_samples: 5000000, num_poses: 100, num_sols: 100, radius: 0.1, n_clusters: 70
Start to cluster retriever with max_samples: 5000000, num_poses: 100, num_sols: 100, radius: 0.25, n_clusters: 70
Start to cluster retriever with max_samples: 5000000, num_poses: 100, num_sols: 100, radius: 0.5, n_clusters: 70
Start to cluster retriever with max_samples: 5000000, num_poses: 100, num_sols: 100, radius: 0.7, n_clusters: 70
Start to cluster retriever with max_samples: 5000000, num_poses: 100, num_sols: 100, radius: 0.9, n_clusters: 70
Results nsf with cluster retriever are saved in /mnt/d/pads/Documents/paik_store/record/2024_11_12/random_ikp_with_mmd_panda_100_100.pkl
Start nsf with random retriever...


  0%|          | 0/6 [00:00<?, ?it/s]

Start to random retriever with max_samples: 100, num_poses: 100, num_sols: 100, radius: 0.001


 17%|█▋        | 1/6 [00:00<00:04,  1.08it/s]

Start to random retriever with max_samples: 100, num_poses: 100, num_sols: 100, radius: 0.1


 33%|███▎      | 2/6 [00:01<00:03,  1.08it/s]

Start to random retriever with max_samples: 100, num_poses: 100, num_sols: 100, radius: 0.25


 50%|█████     | 3/6 [00:02<00:02,  1.08it/s]

Start to random retriever with max_samples: 100, num_poses: 100, num_sols: 100, radius: 0.5


 67%|██████▋   | 4/6 [00:03<00:01,  1.08it/s]

Start to random retriever with max_samples: 100, num_poses: 100, num_sols: 100, radius: 0.7


 83%|████████▎ | 5/6 [00:04<00:00,  1.08it/s]

Start to random retriever with max_samples: 100, num_poses: 100, num_sols: 100, radius: 0.9


100%|██████████| 6/6 [00:05<00:00,  1.08it/s]


Start nsf with numerical retriever...
Start to numerical retriever...
Start to numerical retriever...
Start to numerical retriever...
Start to numerical retriever...
Start to numerical retriever...
Start to numerical retriever...
                                l2_mm  ang_deg  num_sols_time_ms  \
num_100_100                    0.4187   0.0517              49.0   
nsf_gaussian_0.001_100_100     3.0865   3.1889              11.0   
nsf_gaussian_0.1_100_100       2.8854   1.8070               9.0   
nsf_gaussian_0.25_100_100      3.2234   1.7870               9.0   
nsf_gaussian_0.5_100_100       3.6623   1.8971               9.0   
nsf_gaussian_0.7_100_100       4.2836   2.1465               9.0   
nsf_gaussian_0.9_100_100       5.5797   2.7152               9.0   
nsf_cluster_0.001_70_100_100   3.4817   1.7853               9.0   
nsf_cluster_0.1_70_100_100     3.5516   1.8551               9.0   
nsf_cluster_0.25_70_100_100    3.4773   1.8048               9.0   
nsf_cluster_0.5_70_100

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv(f'{config.record_dir}/random_ikp_with_mmd_evaluation_results_panda_1000_1000.csv', index_col=0)
mi = df.index.str.split('_', expand=True)
df_mi = df.set_index(mi)
# set index names
df_mi.index.names = ['solver', 'max_samples', 'radius', 'num_clusters', 'num_poses', 'num_sols']
df_mi.reset_index(inplace=True)

# swap values in the columns of max_samples and num_poses for solver num
df_mi.loc[df_mi.solver == 'num', ['max_samples', 'num_poses']] = df_mi.loc[df_mi.solver == 'num', ['num_poses', 'max_samples']].values
# swap values in the columns of radius and num_sols for solver num
df_mi.loc[df_mi.solver == 'num', ['radius', 'num_sols']] = df_mi.loc[df_mi.solver == 'num', ['num_sols', 'radius']].values

# set new columns as float 
df_mi[['max_samples', 'radius', 'num_clusters', 'num_poses', 'num_sols']] = df_mi[['max_samples', 'radius', 'num_clusters', 'num_poses', 'num_sols']].astype(float)
df_mi

In [None]:
# filter nsf
df_nsf = df_mi[df_mi.solver == 'nsf']
# nsf has variables: max_samples, radius, num_clusters
# fix the other variables, e.g. num_poses, num_sols.

import seaborn as sns
import matplotlib.pyplot as plt


def plot_nsf_linechart(df, hue, x_label, y_labels=['l2_mm', 'mmd_imq']):
    df_cp = df.copy()
    df_cp = df_cp.sort_values(x_label)
    fig, axs = plt.subplots(2, 1, figsize=(10, 12))
    for i, y_label in enumerate(y_labels):
        sns.lineplot(data=df_cp, x=x_label, y=y_label, hue=hue, marker='o', ax=axs[i])
        y_label = y_label.replace('_', ' ').upper()
        axs[i].set_title(f'NSF {y_label} vs {x_label.replace("_", " ").title()}')
        axs[i].set_xlabel(f'{x_label.replace("_", " ").title()}')
        axs[i].set_ylabel(y_label)
        axs[i].grid()
            
    plt.show()

# plot the linechart where x-axis is max_samples, y-axis is mmd_imq
# and each line is a different num_clusters. 
# Fix radius=0, num_poses=1000, num_sols=1000
df_cp = df_nsf.copy()
# select the rows where nnum_poses=1000, num_sols=1000, radius=0
df_cp = df_cp[(df_cp.num_poses == 1000) & (df_cp.num_sols == 1000) & (df_cp.radius == 0)]
plot_nsf_linechart(df_cp, hue='num_clusters', x_label='max_samples')

# plot the linechart where x-axis is radius, y-axis is mmd_imq
# and each line is a different num_clusters. 
# Fix max_samples=5000000, num_poses=1000, num_sols=1000.
df_cp = df_nsf.copy()
# select the rows where nnum_poses=1000, num_sols=1000, max_samples=5000000
df_cp = df_cp[(df_cp.num_poses == 1000) & (df_cp.num_sols == 1000) & (df_cp.max_samples == 5000000)]
plot_nsf_linechart(df_cp, hue='num_clusters', x_label='radius')

# plot the linechart where x-axis is max_samples, y-axis is mmd_imq
# and each line is a different radius.
# Fix num_clusters=25, num_poses=1000, num_sols=1000.
df_cp = df_nsf.copy()
# select the rows where nnum_poses=1000, num_sols=1000, num_clusters=25
df_cp = df_cp[(df_cp.num_poses == 1000) & (df_cp.num_sols == 1000) & (df_cp.num_clusters == 25)]
plot_nsf_linechart(df_cp, hue='radius', x_label='max_samples')

# plot the linechart where x-axis is num_poses, y-axis is mmd_imq
# and each line is a different num_sols.
# Fix max_samples=5000000, radius=0.5, num_clusters=25.
df_cp = df_nsf.copy()
# select the rows where max_samples=5000000, radius=0.5, num_clusters=25
df_cp = df_cp[(df_cp.max_samples == 5000000) & (df_cp.radius == 0.5) & (df_cp.num_clusters == 25)]
plot_nsf_linechart(df_cp, hue='num_sols', x_label='num_poses')

# plot two sub-plot linechart where x-axis are max_samples, one y-axis is l2_mm and one is mmd_imq
# and each line is a different num_clusters. 
# Fix radius=0, num_poses=1000, num_sols=1000.
df_cp = df_nsf.copy()
# select the rows where nnum_poses=1000, num_sols=1000, radius=0
df_cp = df_cp[(df_cp.num_poses == 1000) & (df_cp.num_sols == 1000) & (df_cp.radius == 0)]
plot_nsf_linechart(df_cp, hue='num_clusters', x_label='max_samples')


In [None]:
df_mi.loc['paik']

In [None]:
from flatdict import FlatterDict

nested_dict = {'a': 1, 'c': {'a': 2, 'b': {'x': 3, 'y': 4, 'z': 5}}, 'd': [6, 7, 8]}
flat_dict = FlatterDict(nested_dict, delimiter='_')
print(dict(flat_dict))


In [None]:
# show method == random
df[df["method"] == "random"].describe()

In [None]:
import itertools
import os


robot_name = 'panda'

max_samples_list = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]
radius_list = [0.0, 0.01, 0.1, 0.5, 1.0, 2]
j_ref_list = [None]
use_cluster_list = [False, True]
n_clusters_list = [5, 10, 20, 30, 50, 100, 200]

# Combine as an iterator
combinations = itertools.product(max_samples_list, radius_list, j_ref_list, use_cluster_list, n_clusters_list)

for com in combinations:
    
    MAX_SAMPLES, RADIUS, J_REF, USE_CLUSTER, N_CLUSTERS = com
    print_retriever()
    
    if not USE_CLUSTER and N_CLUSTERS > n_clusters_list[0]:
        continue
    
    if USE_CLUSTER and MAX_SAMPLES < N_CLUSTERS * 1000:
        continue
    
    record_dir = f'/home/luca/paik/record/retriever/'
    
    if USE_CLUSTER:
        record_dir += 'cluster'
    else:
        record_dir += 'random'
    
    record_dir += f'_sam{MAX_SAMPLES}_r{RADIUS}_clstr{N_CLUSTERS}'
    
    os.makedirs(record_dir, exist_ok=True)
    test_random_ikp_with_mmd(robot_name, "diag_normal", 150, 150, [0.01], record_dir, verbose=False)

In [75]:
# read the results from the record directory
combinations = itertools.product(max_samples_list, radius_list, j_ref_list, use_cluster_list, n_clusters_list)

df_list = []

for com in combinations:
    
    MAX_SAMPLES, RADIUS, J_REF, USE_CLUSTER, N_CLUSTERS = com
    
    record_dir = f'/home/luca/paik/record/retriever/'
    
    if USE_CLUSTER:
        record_dir += 'cluster'
    else:
        record_dir += 'random'
    
    record_dir += f'_sam{MAX_SAMPLES}_r{RADIUS}_clstr{N_CLUSTERS}'

    if not os.path.exists(record_dir):
        continue
    
    df_file = pd.read_csv(f"{record_dir}/ikp_{robot_name}_150_150_0.01_nsf_diag_normal.csv")
    # convert to pd series with mean of df columns as keys
    # add max_samples, radius, j_ref, use_cluster, n_clusters as keys
    series = df_file.mean(numeric_only=True)
    series["max_samples"] = MAX_SAMPLES
    series["radius"] = RADIUS
    series["j_ref"] = False if J_REF is None else True
    series["method"] = "cluster" if USE_CLUSTER else "random"
    series["n_clusters"] = N_CLUSTERS

    df_list.append(series)

df = pd.DataFrame(df_list)

In [None]:
# show method == random
df[df["method"] == "random"].describe()

In [None]:
# cluster's variables: max_samples, radius, n_clusters

# lineplots for cluster method with radius = 0.0.
# x_axis is different max_samples 
# y_axis is mmd_imq
# lines are different number of clusters
df_cluster = df[(df["method"] == "cluster") & (df["radius"] == 0.0)]
df_cluster = df_cluster.sort_values(by="n_clusters")

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
sns.lineplot(data=df_cluster, x="max_samples", y="mmd_imq", hue="n_clusters", marker="o")
# set x_ticks as max_samples_list
plt.xticks(max_samples_list)
plt.title("MMD_IMQ with different number of clusters")
plt.show()

# n_clsuter domainates

In [None]:
# cluster's variables: max_samples, radius, n_clusters

# lineplots for cluster method with max_samples = 50000.
# x_axis is different raius 
# y_axis is mmd_imq
# lines are different number of clusters
df_cluster = df[(df["method"] == "cluster") & (df["max_samples"] == 100000)]
df_cluster = df_cluster.sort_values(by="radius")

# print out the row of the min mmd_imq
print(df_cluster[df_cluster["mmd_imq"] == df_cluster["mmd_imq"].min()].T)

plt.figure(figsize=(10, 6))
sns.lineplot(data=df_cluster, x="radius", y="mmd_imq", hue="n_clusters", marker="o")
# set x_ticks as radius_list
plt.xticks(radius_list)
plt.title("Cluster's MMD_IMQ with different radius")
plt.show()

# radius has a balance near 0.5

In [None]:
# random's variables: max_samples, radius 

# lineplots for random method. 
# x_axis is different radius
# y_axis is mmd_imq
# lines are different max_samples

df_random = df[df["method"] == "random"]
df_random = df_random.sort_values(by="max_samples")

print(df_random[df_random["mmd_imq"] == df_random["mmd_imq"].min()].T)

df_random_first_5 = df_random[df_random["max_samples"] <= 1000]
df_random_rest = df_random[df_random["max_samples"] > 1000]

# plot the first 5 max_samples as a sub-plot and the rest as a sub-plot
fig, axes = plt.subplots(2, 1, figsize=(10, 12))
sns.lineplot(data=df_random_first_5, x="radius", y="mmd_imq", hue="max_samples", marker="o", ax=axes[0])
sns.lineplot(data=df_random_rest, x="radius", y="mmd_imq", hue="max_samples", marker="o", ax=axes[1])
plt.xticks(radius_list)
plt.title("MMD_IMQ with different radius")
plt.show()

# radius has a balance near 0.5