In [None]:
from init_notebook import *
from src.util.binarydb import BinaryDB

In [None]:
db = BinaryDB(
    config.PROJECT_PATH / "cache" / "random_pca" / "db-ch32.sqlite",
)

In [None]:
param_map = {}
for id, meta in db.iter_meta():
    param_id = db.to_id(meta["params"])
    param_map.setdefault(param_id, []).append(meta)
print(len(param_map))

In [None]:
rows = []
for trials in param_map.values():
    if len(trials) < 5:
        continue
    if not "val_accuracy" in trials[0]["result"]:
        print("?", trials[0]["result"])
        continue
    row = {
        **{
            f"{key}-{i}": v
            for key, value in trials[0]["params"].items()
            for i, v in enumerate(value)
            if key not in ("activation", "channels")
        },
        **{
            key: np.array([t["result"][key] for t in trials]).mean()
            for key in trials[0]["result"].keys()
        },
        "min_val_acc": min(t["result"]["val_accuracy"] for t in trials),
        "max_val_acc": max(t["result"]["val_accuracy"] for t in trials),
        "trials": len(trials),
        "config": ", ".join(f"{key}=" + ",".join(str(v) for v in value) for key, value in trials[0]["params"].items() if key not in ("activation",)),
    }
    rows.append(row)
print(len(rows))
df = pd.DataFrame(rows).sort_values("val_accuracy")
def _norm(x):
    return (x - x.min()) / (x.max() - x.min())
df["fitness"] = _norm(df["val_accuracy"]) + .3 * _norm(df["throughput"])
df = df.sort_values("fitness")

# df = df[df["throughput"] >= 1000]
df = df[df["ratio"] <= 1]

print(len(df))
df_configs = df.pop("config")
df.tail(30)

In [None]:
corr = df.drop(["trials"], axis=1).corr()
px.imshow(corr, width=600, height=600, range_color=[-1, 1])

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 32, 7, stride=2, dilation=1),
    nn.Conv2d(32, 32, 3, stride=1, dilation=2),
    nn.Conv2d(32, 32, 5, stride=2, dilation=1),
)
inp = torch.ones(128, 3, 96, 96)
start_time = time.time()
outp = model(inp)
took = time.time() - start_time
print(f"ratio: {math.prod(outp.shape) / math.prod(inp.shape)}, throughput: {math.prod(inp.shape[:1]) / took:,}")

'aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg',
'brwnyl', 'bugn', 'bupu', 'burg', 'burgyl', 'cividis', 'curl',
'darkmint', 'deep', 'delta', 'dense', 'earth', 'edge', 'electric',
'emrld', 'fall', 'geyser', 'gnbu', 'gray', 'greens', 'greys',
'haline', 'hot', 'hsv', 'ice', 'icefire', 'inferno', 'jet',
'magenta', 'magma', 'matter', 'mint', 'mrybm', 'mygbm', 'oranges',
'orrd', 'oryel', 'oxy', 'peach', 'phase', 'picnic', 'pinkyl',
'piyg', 'plasma', 'plotly3', 'portland', 'prgn', 'pubu', 'pubugn',
'puor', 'purd', 'purp', 'purples', 'purpor', 'rainbow', 'rdbu',
'rdgy', 'rdpu', 'rdylbu', 'rdylgn', 'redor', 'reds', 'solar',
'spectral', 'speed', 'sunset', 'sunsetdark', 'teal', 'tealgrn',
'tealrose', 'tempo', 'temps', 'thermal', 'tropic', 'turbid',
'turbo', 'twilight', 'viridis', 'ylgn', 'ylgnbu', 'ylorbr',
'ylorrd'

In [None]:
px.scatter(
    df, 
    x="throughput", y="val_accuracy", 
    hover_name=df_configs,
    color="ratio",
    hover_data=("fitness", "throughput", "min_val_acc", "max_val_acc", "trials"),
    opacity=.5, color_continuous_scale="gnbu",
    height=1000,
    width=1000,
)

In [None]:
[100 - e for e in [36.64, 43.38, 41.76, 40.08, 37.86, 40.08, 37.33, 40.03, 36.09, 42.65]]

In [None]:
px.scatter(
    df, 
    #x="throughput", 
    x="ratio",
    y="val_accuracy", 
    hover_name=df_configs,
    color="throughput",
    hover_data=("fitness", "throughput", "val_accuracy", "min_val_acc", "max_val_acc"),
    opacity=.5, color_continuous_scale="spectral",
)

In [None]:
px.scatter(
    df, 
    x="throughput", y="ratio", 
    hover_name=df_configs,
    color="val_accuracy",
    hover_data=("val_accuracy", "fitness", "throughput", "min_val_acc", "max_val_acc", "trials"),
)

In [None]:
px.bar(
    df.groupby("ratio").median(),
    y="val_accuracy",
)

In [None]:
from sklearn.manifold import TSNE
reducer = TSNE(2)
df2 = df.loc[:, ["val_accuracy", "ratio", "throughput"]]
df2 /= df2.max()
xy = reducer.fit_transform(df2)

In [None]:
from sklearn.cluster import KMeans
clusterer = KMeans(60)
labels = clusterer.fit_predict(df2)

In [None]:
px.scatter(
    x=xy[:, 0], y=xy[:, 1], 
    hover_data={
        key: df.loc[:, key]
        for key in ("val_accuracy", "ratio", "throughput")
    },
    #color=df.loc[:, "val_accuracy"],
    color=labels,
    #size=df.loc[:, "val_accuracy"] - df.loc[:, "val_accuracy"].min(),
    size_max=10,
    height=800,
)

In [None]:
df3 = df.copy()
df3["label"] = labels
px.imshow(df3.groupby("label").max().corr())

In [None]:
corr = df.corr(
    #"pearson"
    #"spearman"
    "kendall"
)
px.imshow(corr, height=700)

In [None]:
from sklearn.decomposition import PCA
df_one = df# / df.max()
pca = PCA(df.shape[1])
pca.fit(df_one)

In [None]:
corr = pd.DataFrame(pca.transform(df_one)).corr()
corr[corr.abs() < 0.01] = np.nan
px.imshow(corr, height=1000)