In [None]:
from init_notebook import *
from src.util.binarydb import BinaryDB

In [None]:
db = BinaryDB(
    config.PROJECT_PATH / "cache" / "random_pca" / "db2.sqlite",
)

In [None]:
param_map = {}
for id, meta in db.iter_meta():
    param_id = db.to_id(meta["params"])
    param_map.setdefault(param_id, []).append(meta)

In [None]:
rows = []
for trials in param_map.values():
    if len(trials) < 4:
        continue
    row = {
        **{
            f"{key}-{i}": v
            for key, value in trials[0]["params"].items()
            for i, v in enumerate(value)
            if key not in ("activation", "channels")
        },
        **{
            key: np.array([t["result"][key] for t in trials]).mean()
            for key in trials[0]["result"].keys()
        },
        "min_val_acc": min(t["result"]["val_accuracy"] for t in trials),
        "max_val_acc": max(t["result"]["val_accuracy"] for t in trials),
        "trails": len(trials),
        "config": ", ".join(f"{key}=" + ",".join(str(v) for v in value) for key, value in trials[0]["params"].items() if key not in ("activation",)),
    }
    rows.append(row)

df = pd.DataFrame(rows).sort_values("val_accuracy")
def _norm(x):
    return (x - x.min()) / (x.max() - x.min())
df["fitness"] = _norm(df["val_accuracy"]) + .3 * _norm(df["throughput"])
df = df.sort_values("fitness")
df_configs = df.pop("config")
df.tail(50)

In [None]:
px.scatter(
    df, 
    x="throughput", y="val_accuracy", 
    hover_name=df_configs,
    color="ratio",
    hover_data=("fitness", "throughput", "min_val_acc", "max_val_acc"),
)

In [None]:
px.scatter(
    df, 
    #x="throughput", 
    x="ratio",
    y="val_accuracy", 
    hover_name=df_configs,
    color="throughput",
    hover_data=("fitness", "throughput", "min_val_acc", "max_val_acc"),
)

In [None]:
px.scatter(
    df, 
    x="throughput", y="ratio", 
    hover_name=df_configs,
    color="val_accuracy",
    hover_data=("val_accuracy", "fitness", "throughput", "min_val_acc", "max_val_acc"),
)

In [None]:
df[(df["kernel_size-0"] == 3) & (df["kernel_size-1"] == 3) & (df["kernel_size-2"] == 3) & (df["stride-0"] == 2)]

In [None]:
import graphviz

def plot_conv(
    kernel_size: List[int],
    stride: List[int],
    dilation: List[int],
):
    g = graphviz.Graph()
    size = kernel_size[0] + 2
    for layer_idx in range(len(kernel_size)):
        ks = kernel_size[layer_idx]
        st = stride[layer_idx]
        for cell_idx in range(size):
            g.node(f"L{layer_idx}/{cell_idx}")
            if layer_idx > 0:
                g.edge(f"C{layer_idx-1}/{cell_idx}", f"L{layer_idx}/{cell_idx}")
        for cell_idx in range(ks):
            g.node(f"C{layer_idx}/{cell_idx}")
            for i in range(size - ks//2):
                g.edge(f"L{layer_idx}/{i*st+cell_idx}", f"C{layer_idx}/{cell_idx}")
        size = size - ks//2
    return g

plot_conv((9, 3, 3), (3, 1, 2), (2, 3, 1))

In [None]:
from scripts.test_random_conv import ConvModel
m = ConvModel((64, 64, 64), (9, 3, 3), (3, 1, 2), (2, 3, 1), ("relu", "relu", "relu"))
display(m)
m(torch.ones(1, 3, 96, 96)).shape

In [None]:
corr = df.corr(
    #"pearson"
    #"spearman"
    "kendall"
)
px.imshow(corr, height=1000)

In [None]:
from sklearn.decomposition import PCA
df_one = df# / df.max()
pca = PCA(df.shape[1])
pca.fit(df_one)

In [None]:
corr = pd.DataFrame(pca.transform(df_one)).corr()
corr[corr.abs() < 0.01] = np.nan
px.imshow(corr, height=1000)