In [None]:
import numpy as np
import pandas as pd
import json
import scipy.stats
import time

from selfoptforest import *

from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cm

In [None]:
ALGOS = ["rf", "lda", "pca"]
ALGO_NAMES = ["RF", "LDA RF", "PCA RF"]

In [None]:
df_benchmark = pd.read_csv("benchmark.csv")
df_benchmark

In [None]:
df_benchmark.query("openmlid == 60 and size == 100")

In [None]:
bm = Benchmark(df_benchmark)


seeds = list(range(20))
datasets = sorted(pd.unique(df_benchmark["openmlid"]))

rows = []

MAX_TREES = 100

for i, openmlid in enumerate(tqdm(datasets)):
    for j, seed in enumerate(seeds):

        print(openmlid, " (seed " + str(seed) + ")\n-------------------------------")
        
        if np.count_nonzero((df_benchmark["openmlid"] == openmlid) & (df_benchmark["seed"] == seed)) == 0:
            print("Skipping, no data for this seed.")
            continue
        
        bm.reset(openmlid, seed)
        rf = SelfOptRF()
        rf.simulate_training_with_benchmark(bm, max_forest_size = MAX_TREES)
        
        try:
            choice = rf.choice
            print(f"CHOICE: {choice}")
            print("TRAIN TIME SELF OPT:", bm.get_current_train_time())
            print("TRAIN TIMES PURE RFS: ", [bm.get_train_time(name, MAX_TREES) for name in bm.algos])
            print("WORST CASE TIME: ", sum([bm.get_train_time(name, MAX_TREES) for name in bm.algos]))
            try:
                test_performance = np.round(np.mean([bm.get_test_performance_at_size(choice, MAX_TREES + i) for i in range(-2, 3)]), 4)
            except:
                print("error in retrieving performance, going to next dataset!")
                continue
            print(f"Test performance of choice: {test_performance}")

            best_performance_test = 0
            best_performance_oob = 0
            best_choice_test = None
            best_choice_oob = None
            rf_performance_test = 0
            rf_performance_oob = 0

            row = [openmlid, seed]
            for algo in bm.algos:
                performance_test = np.round(np.mean([bm.get_test_performance_at_size(algo, MAX_TREES + i) for i in range(-2,3)]), 4)
                performance_oob = np.round(np.mean([bm.get_oob_performance_at_size(algo, MAX_TREES + i) for i in range(-2,3)]), 4)
                row.append(performance_oob)
                row.append(performance_test)
                if algo == "rf":
                    rf_performance_test = performance_test
                    rf_performance_oob = performance_oob
                if performance_test > best_performance_test:
                    best_performance_test = performance_test
                    best_choice_test = algo
                if performance_oob > best_performance_oob:
                    best_performance_oob = performance_oob
                    best_choice_oob = algo

                print(algo + "\t" + str(performance_oob), performance_test, np.round(performance_test-  performance_oob, 3))

            gap = best_performance_test - test_performance
            improvement_over_rf = np.round(test_performance - rf_performance_test, 3)
            train_times = [bm.get_current_train_time(a) for a in bm.algos]
            trained_trees = [bm.indices[i] for i, a in enumerate(bm.algos)]
            max_train_times = [bm.get_train_time(a, MAX_TREES) for a in bm.algos]
            time_compression = np.round(sum(train_times) / sum(max_train_times), 2)
            
            print(f"Number of trained trees: {bm.indices}")
            print(f"Actual  Train Times: {train_times}")
            print(f"Maximum Train Times: {max_train_times}")
            print("Time compression:", np.round(time_compression, 2))
            
            # sanity check
            for k in range(3):
                if train_times[k] > max_train_times[k]:
                    raise Exception()

            rows.append(row + [choice, best_choice_oob, best_choice_test, gap, improvement_over_rf] + train_times + max_train_times + trained_trees)
            print("best choice found:", choice == best_choice_oob, choice == best_choice_test, f"Test gap to best: {np.round(gap, 4)}. Test improvement over RF: {improvement_over_rf}")
        except:
            
            print("!!!!!!!!1 DA IST WAS SCHIEFGEGANGEN!!!!!!!!!!")
            raise

In [None]:
df_comparison = pd.DataFrame(rows, columns=["openmlid", "seed", "rf_oob", "rf_test", "lda_oob", "lda_test", "pca_oob", "pca_test", "choice", "best_choice_oob", "best_choice_test", "gap", "imp", "traintime_rf_act", "traintime_lda_act", "traintime_pca_act", "traintime_rf_max", "traintime_lda_max", "traintime_pca_max", "trees_rf", "trees_lda", "trees_pca"])
gaps_oob = []
gaps_test = []
imps_oob = []
imps_test = []
performance_selfopt_oob = []
performance_selfopt_test = []
for i, row in df_comparison.iterrows():
    gap_oob = row[row["best_choice_oob"] + "_oob"] - row[row["choice"] + "_oob"]
    gap_test = row[row["best_choice_test"] + "_test"] - row[row["choice"] + "_test"]
    imp_oob = row[row["choice"] + "_oob"] - row["rf_oob"]
    imp_test = row[row["choice"] + "_test"] - row["rf_test"]
    gaps_oob.append(gap_oob)
    gaps_test.append(gap_test)
    imps_oob.append(imp_oob)
    imps_test.append(imp_test)
    performance_selfopt_oob.append(row[row["choice"] + "_oob"])
    performance_selfopt_test.append(row[row["choice"] + "_test"])
df_comparison["selfopt_oob"] = performance_selfopt_oob
df_comparison["selfopt_test"] = performance_selfopt_test
df_comparison["gap_oob"] = gaps_oob
df_comparison["gap_test"] = gaps_test
df_comparison["imp_oob"] = imps_oob
df_comparison["imp_test"] = imps_test
df_comparison["traintime_total_act"] = df_comparison["traintime_rf_act"] + df_comparison["traintime_lda_act"] + df_comparison["traintime_pca_act"]
df_comparison["traintime_total_max"] = df_comparison["traintime_rf_max"] + df_comparison["traintime_lda_max"] + df_comparison["traintime_pca_max"]
df_comparison["time_compression"] = df_comparison["traintime_total_act"] / df_comparison["traintime_total_max"]
df_comparison["tree_compression"] = (df_comparison["trees_rf"] + df_comparison["trees_lda"] + df_comparison["trees_pca"]) / 300
df_comparison.to_csv("comparison.csv", index=False)
df_comparison

# Total CPU Time

In [None]:
cpu_hours = sum(df_comparison["traintime_total_max"]) / 3600
cpu_days = cpu_hours / 24
print(cpu_hours, "hours")
print(cpu_days, "days")

In [None]:
pd.unique(df_comparison["openmlid"])

In [None]:
df_rfcomparison = df_comparison.groupby("openmlid")["rf_test", "lda_test", "pca_test"].mean()
df_rfcomparison[np.abs(df_rfcomparison["rf_test"] - df_rfcomparison["pca_test"]) > 0.2]

In [None]:
%matplotlib inline
fig, ax = plt.subplots(1, 3, figsize=(10,3),  gridspec_kw={'width_ratios': [1.2, 1, 1]})
ct = pd.crosstab(df_comparison["best_choice_test"], df_comparison.rename(columns={"choice": "SORF Choice"})["SORF Choice"])

permutation = [1, 2, 0]

# plot comparison of RF against LDA RF and PCA RF
df_rfcomparison = df_comparison.groupby("openmlid")["rf_test", "lda_test", "pca_test"].mean()
ax[0].scatter(df_rfcomparison["rf_test"], df_rfcomparison["lda_test"], color="C0", s=5)
ax[0].scatter(df_rfcomparison["rf_test"], df_rfcomparison["pca_test"], color="C1", s=5)
ax[0].plot([0.2,1], [0.2,1], linewidth=1, linestyle="--", color="black")
ax[0].grid()
ax[0].set_xlabel("Accuracy of standard RF")
ax[0].set_ylabel("Accuracy of\nLDA RF (blue) and PCA RF(orange)")

ct = ct.rename(columns={a: n for a, n in zip(ALGOS, ALGO_NAMES)})
ct.plot(kind='bar', stacked=True, rot=0, ax=ax[1])
ct_normalized = ct.values / np.sum(ct.values)
sns.heatmap(ct_normalized, annot=True, ax = ax[2], vmax=0.33, cmap="Greens")
for a in ax[1:]:
    a.set_xlabel("Best Choice")
ax[1].set_xticklabels([ALGO_NAMES[i] for i in permutation])
ax[1].set_title("SORF and Best Choices\n(in absolute numbers)")

ax[2].set_ylabel("SORF Choice")
ax[2].set_xticklabels([ALGO_NAMES[i] for i in permutation])
ax[2].set_yticklabels([ALGO_NAMES[i] for i in permutation])
ax[2].set_title("Confusion Matrix of SORF")
fig.tight_layout()
fig.savefig("plots/confusion.pdf", bbox_inches='tight')

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(10, 2.5),  gridspec_kw={'width_ratios': [1.5, 1.5, 1, 1]})

Z = []
for i, a_choice in enumerate(ALGOS):
    df_selection = df_comparison[(df_comparison["choice"] == a_choice)]
    Z.append([np.mean(df_selection["traintime_" + a_tree + "_act"] / df_selection["traintime_" + a_tree + "_max"]) for a_tree in ALGOS])
Z = np.array(Z)
Z = np.column_stack([Z, np.sum(Z, axis=1) / 3])

ax = axes[0]
sns.heatmap(Z, annot=True, ax = ax, cmap="Reds")
ax.set_xticklabels(ALGO_NAMES + ["Total"])
ax.set_yticklabels(ALGO_NAMES)
ax.set_ylabel("Chosen Forest Type")
ax.set_xlabel("Training time spent per tree type\n(relative to maximum possible)")

Z = []
for i, a_choice in enumerate(ALGOS):
    df_selection = df_comparison[(df_comparison["choice"] == a_choice)]
    Z.append([int(np.round(np.mean(df_selection["trees_" + a_tree]))) for a_tree in ALGOS])
Z = np.array(Z,dtype=int)
Z = np.column_stack([Z, np.sum(Z, axis=1)])
ax = axes[1]
sns.heatmap(Z, annot=True, ax = ax, cmap="Reds", fmt='g')
ax.set_xticklabels(ALGO_NAMES + ["Total"])
ax.set_yticklabels(ALGO_NAMES)
ax.set_ylabel("Chosen forest type")
ax.set_xlabel("Numbers of trees grown per tree type")




compressions_in_time = [[np.mean(g["time_compression"]) for i, g in df_comparison[df_comparison["choice"] == a].groupby("openmlid")] for a in ALGOS] + [[np.mean(g["time_compression"]) for i, g in df_comparison.groupby("openmlid")]]
compressions_in_num_trees = [[np.mean(g["tree_compression"]) for i, g in df_comparison[df_comparison["choice"] == a].groupby("openmlid")] for a in ALGOS] + [[np.mean(g["tree_compression"]) for i, g in df_comparison.groupby("openmlid")]]

for a, values in zip(axes[2:], [compressions_in_time, compressions_in_num_trees]):
    a.boxplot(values)
    a.set_ylim([0, 1])
    a.set_xticklabels(ALGO_NAMES + ["Total"], rotation=90)
    a.axhline(0.5, linestyle="dotted", color="black", linewidth=1)
    a.axhline(0.25, linestyle="dotted", color="black", linewidth=1)
    a.axvline(4.5, color="black", linewidth=1)
axes[2].set_ylabel("Time Compression")
axes[3].set_ylabel("Tree Compression")

fig.tight_layout()
fig.savefig("plots/computations.pdf", bbox_inches='tight')
plt.show()

In [None]:
len(df_comparison.query("choice != 'rf'")) / len(df_comparison)

In [None]:
def get_figure_and_table_for_paper(df_comparison):
    
    
    algo_names = ["rf", "lda", "pca"]
    
    scores = []
    significances = {a : [] for a in algo_names}
    
    imps = []
    
    rows = []
    for openmlid, df_dataset in df_comparison.groupby("openmlid"):
        perf_selfopt = df_dataset["selfopt_test"]
        
        data_base = []
        for comp in algo_names:
            perf_comp = df_dataset[comp + "_test"]
            data_base.append(perf_comp)
            if np.linalg.norm(perf_comp - perf_selfopt) != 0:
                significant = scipy.stats.wilcoxon(perf_comp, perf_selfopt).pvalue < 0.05
            else:
                significant = False
            significances[comp].append(significant)
        
        data_base.append(perf_selfopt)

        scores_on_dataset = [np.mean(v) for v in data_base]
        imps_on_dataset = [scores_on_dataset[-1] - v for v in scores_on_dataset]
        scores.append(scores_on_dataset)
        imps.append(imps_on_dataset)
        best_score = max(scores_on_dataset)
        best_indices = [i for i in range(len(data_base)) if scores_on_dataset[i] == best_score]
        if False:


            # format entries
            formatted_vals = [f"{np.round(100 * np.mean(v), 2)}$\pm${np.round(100 * np.std(v), 1)}" for i, v in enumerate(data_base)]
            imps.append(scores_on_dataset[1] - scores_on_dataset[0])
            for i, val in enumerate(formatted_vals):
                if i in best_indices:
                    formatted_vals[i] = "\\textbf{" + val + "}"
                elif not significant:
                    formatted_vals[i] = "\\underline{" + val + "}"

            rows.append([openmlid] + formatted_vals)
    
    scores = np.array(scores)
    imps = np.array(imps)
    
    for a in algo_names:
        significances[a] = np.array(significances[a])
    
    
    colors = {
        "rf": "C0",
        "lda": "C1",
        "pca": "C2"
    }
    
    # create figure
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 8))#,  gridspec_kw={'width_ratios': [3, 1]})
    
    
    for i, a in enumerate(["rf", "lda", "pca"]):
        ax1.scatter(scores[significances[a],i], scores[significances[a],-1], s=20, color=colors[a])
        ax1.scatter(scores[~significances[a],i], scores[~significances[a],-1], s=20, facecolors="None", color=colors[a])
    
    ax1.plot([0.2,1], [0.2,1], linestyle="dotted", color="black")
    ax1.grid()
    ax1.set_xlabel("Performance of Standard RF (blue), LDA RF (orange), and PCA RF (green)")
    ax1.set_ylabel("Performance of Self-Optimized RF")
    
    imp_concated = []
    for i, algo in enumerate(algo_names):
        imp_concated.append(imps[:,i])
    for i, algo in enumerate(algo_names):
        imp_concated.append(imps[significances["rf"],i])
    
    ax2.boxplot(imp_concated, vert=False)
    ax2.axvline(-0.01, linestyle="dotted", color="red", linewidth=1)
    ax2.axvline(0, linestyle="--", color="black", linewidth=1)
    ax2.axvline(0.01, linestyle="dotted", color="black", linewidth=1)
    ax2.axvline(0.03, linestyle="dotted", color="black", linewidth=1)
    #ax2.set_yticklabels(["Improvem.", "Significant Improvem."])
    #ax2.scatter(list(range(len(imps))), imps)
    #ax2.hist(imps, bins=200)
    #ax2.set_yscale("log")
    fig.tight_layout()
    
    return (fig, ax), pd.DataFrame(rows, columns=["openmlid", "rf_test", "selfopt_test"]).to_latex(index = False, escape = False)
    
(fig, ax), df_latex = get_figure_and_table_for_paper(df_comparison)
plt.show()
print(df_latex)

In [None]:
def get_figure_and_table_for_paper(df_comparison):
    
    
    algo_names = ["rf", "lda", "pca"]
    
    scores = []
    significances = {a : [] for a in algo_names}
    
    imps = []
    
    rows = []
    for openmlid, df_dataset in df_comparison.groupby("openmlid"):
        perf_selfopt = df_dataset["selfopt_test"]
        
        data_base = []
        for comp in algo_names:
            perf_comp = df_dataset[comp + "_test"]
            data_base.append(perf_comp)
            if np.linalg.norm(perf_comp - perf_selfopt) != 0:
                significant = scipy.stats.wilcoxon(perf_comp, perf_selfopt).pvalue < 0.05
            else:
                significant = False
            significances[comp].append(significant)
        
        data_base.append(perf_selfopt)

        scores_on_dataset = [np.mean(v) for v in data_base]
        imps_on_dataset = [scores_on_dataset[-1] - v for v in scores_on_dataset]
        scores.append(scores_on_dataset)
        imps.append(imps_on_dataset)
        best_score = max(scores_on_dataset)
        best_indices = [i for i in range(len(data_base)) if scores_on_dataset[i] == best_score]
        if False:


            # format entries
            formatted_vals = [f"{np.round(100 * np.mean(v), 2)}$\pm${np.round(100 * np.std(v), 1)}" for i, v in enumerate(data_base)]
            imps.append(scores_on_dataset[1] - scores_on_dataset[0])
            for i, val in enumerate(formatted_vals):
                if i in best_indices:
                    formatted_vals[i] = "\\textbf{" + val + "}"
                elif not significant:
                    formatted_vals[i] = "\\underline{" + val + "}"

            rows.append([openmlid] + formatted_vals)
    
    scores = np.array(scores)
    imps = np.array(imps)
    
    for a in algo_names:
        significances[a] = np.array(significances[a])
    
    
    colors = {
        "rf": "C2",
        "lda": "C0",
        "pca": "C1"
    }
    
    
    
    # create figure
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 3),  gridspec_kw={'width_ratios': [1, 1]})
    
    
    for i, a in enumerate(["rf", "lda", "pca"]):
        ax1.scatter(scores[significances[a],i], scores[significances[a],-1], s=20, color=colors[a])
        ax1.scatter(scores[~significances[a],i], scores[~significances[a],-1], s=20, facecolors="None", color=colors[a])
    
    ax1.plot([0.2,1], [0.2,1], linestyle="dotted", color="black")
    ax1.grid()
    ax1.set_xlabel("Accuracy of Standard RF (blue), LDA RF (orange), and PCA RF (green).\nBullets show significant and circle statistically not significant differences.")
    ax1.set_ylabel("Accuracy of Self-Optimized RF")
    
    imp_concated = []
    for i, algo in enumerate(algo_names):
        imp_concated.append(imps[:,i])
    for i, algo in enumerate(algo_names):
        imp_concated.append(imps[significances[algo],i])
    
    imp_concated.append(np.min(imps, axis=1))
    
    ax2.boxplot(imp_concated, vert=False)
    ax2.axvline(-0.01, linestyle="dotted", color="red", linewidth=1)
    ax2.axvline(0, linestyle="--", color="black", linewidth=1)
    ax2.axvline(0.01, linestyle="dotted", color="black", linewidth=1)
    ax2.axvline(0.03, linestyle="dotted", color="black", linewidth=1)
    ax2.set_xlabel("Accuracy improvement achieved by SORF compared to ...")
    ax2.set_yticklabels(ALGO_NAMES + [name + " (sign.)" for name in ALGO_NAMES] + ["Oracle"])
    #ax2.scatter(list(range(len(imps))), imps)
    #ax2.hist(imps, bins=200)
    #ax2.set_yscale("log")
    fig.tight_layout()
    
    print(np.mean(imp_concated[-1]))
    
    return (fig, ax), pd.DataFrame(rows, columns=["openmlid", "rf_test", "selfopt_test"]).to_latex(index = False, escape = False)
    
(fig, ax), df_latex = get_figure_and_table_for_paper(df_comparison)
fig.savefig("plots/performance.pdf", bbox_inches='tight')
plt.show()
print(df_latex)

In [None]:
imps

In [None]:
def get_figure_and_table_for_paper(df_comparison):
    
    
    algo_names = ["rf", "lda", "pca"]
    
    scores = []
    significances = {a : [] for a in algo_names}
    
    imps = []
    
    rows = []
    for openmlid, df_dataset in df_comparison.groupby("openmlid"):
        perf_selfopt = df_dataset["selfopt_test"]
        
        data_base = []
        for comp in algo_names:
            perf_comp = df_dataset[comp + "_test"]
            data_base.append(perf_comp)
            if np.linalg.norm(perf_comp - perf_selfopt) != 0:
                significant = scipy.stats.wilcoxon(perf_comp, perf_selfopt).pvalue < 0.05
            else:
                significant = False
            significances[comp].append(significant)
        
        data_base.append(perf_selfopt)

        scores_on_dataset = [np.mean(v) for v in data_base]
        imps_on_dataset = [scores_on_dataset[-1] - v for v in scores_on_dataset]
        scores.append(scores_on_dataset)
        imps.append(imps_on_dataset)
        best_score = max(scores_on_dataset)
        best_indices = [i for i in range(len(data_base)) if scores_on_dataset[i] == best_score]
        if False:


            # format entries
            formatted_vals = [f"{np.round(100 * np.mean(v), 2)}$\pm${np.round(100 * np.std(v), 1)}" for i, v in enumerate(data_base)]
            imps.append(scores_on_dataset[1] - scores_on_dataset[0])
            for i, val in enumerate(formatted_vals):
                if i in best_indices:
                    formatted_vals[i] = "\\textbf{" + val + "}"
                elif not significant:
                    formatted_vals[i] = "\\underline{" + val + "}"

            rows.append([openmlid] + formatted_vals)
    
    scores = np.array(scores)
    imps = np.array(imps)
    
    for a in algo_names:
        significances[a] = np.array(significances[a])
    
    
    colors = {
        "rf": "C0",
        "lda": "C1",
        "pca": "C2"
    }
    
    # create figure
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 8))#,  gridspec_kw={'width_ratios': [3, 1]})
    
    
    for i, a in enumerate(["rf", "lda", "pca"]):
        ax1.scatter(scores[significances[a],i], scores[significances[a],-1], s=20, color=colors[a])
        ax1.scatter(scores[~significances[a],i], scores[~significances[a],-1], s=20, facecolors="None", color=colors[a])
    
    ax1.plot([0.2,1], [0.2,1], linestyle="dotted", color="black")
    ax1.grid()
    ax1.set_xlabel("Performance of Standard RF (blue), LDA RF (orange), and PCA RF (green)")
    ax1.set_ylabel("Performance of Self-Optimized RF")
    
    imp_concated = []
    for i, algo in enumerate(algo_names):
        imp_concated.append(imps[:,i])
    for i, algo in enumerate(algo_names):
        imp_concated.append(imps[significances["rf"],i])
    
    ax2.boxplot(imp_concated, vert=False)
    ax2.axvline(-0.01, linestyle="dotted", color="red", linewidth=1)
    ax2.axvline(0, linestyle="--", color="black", linewidth=1)
    ax2.axvline(0.01, linestyle="dotted", color="black", linewidth=1)
    ax2.axvline(0.03, linestyle="dotted", color="black", linewidth=1)
    #ax2.set_yticklabels(["Improvem.", "Significant Improvem."])
    #ax2.scatter(list(range(len(imps))), imps)
    #ax2.hist(imps, bins=200)
    #ax2.set_yscale("log")
    fig.tight_layout()
    
    return (fig, ax), pd.DataFrame(rows, columns=["openmlid", "rf_test", "selfopt_test"]).to_latex(index = False, escape = False)
    
(fig, ax), df_latex = get_figure_and_table_for_paper(df_comparison)
plt.show()
print(df_latex)

In [None]:
100* np.round(df_comparison[["openmlid", "rf_test", "selfopt_test"]].groupby("openmlid").mean(), 4)

In [None]:
print(len(gaps))
fig, ax = plt.subplots(1,2)
ax[0].boxplot(gaps)
ax[1].boxplot(improvements_over_rf)
plt.show()

fig, ax = plt.subplots(1,2)
ax[0].hist(choices)
ax[1].hist(best_choices)

In [None]:
for openmlid in sorted(pd.unique(bm.cache["openmlid"])):
    fig, ax = plt.subplots()
    sizes = list(range(1,201))
    max_val = 0
    for i, algo in enumerate(["rf", "lda", "pca"]):
        for test, linestyle in zip([True, False], ["solid", "--"]):
            curves = []
            for seed in range(1):
                curve = bm.get_curve(openmlid, algo, seed, oob = not test)
                curves.append(list(curve[1]))
            if not curves:
                continue
            curves = np.array(curves)
            means = np.mean(curves, axis=0)
            q3 = np.percentile(curves, 25, axis=0)
            q7 = np.percentile(curves, 75, axis=0)
            max_val = max(max_val, max(means))
            ax.plot(sizes, means, color=f"C{i}", linestyle=linestyle)
            ax.fill_between(sizes, q3, q7, color=f"C{i}", alpha=0.2)
    #ax.set_ylim([max_val - 0.1, max_val + 0.01])
    ax.set_title(openmlid)
    plt.show()