In [5]:
%load_ext autoreload
%autoreload 2

from shared import Db, Fn

from main.ai import ai_setups
from main.data import Mol, data
from main.tree import JsonTree, Tree, TreeTypes
from main.types import AiInput, Setup

from IPython.display import display, HTML

import numpy as np
import pandas
from main.utils import fn_txt

def get_stats(mol: Mol, setup: Setup):
    ai_input: AiInput = {"smiles": mol.smiles, "setup": setup}
    json_tree = db.read(["ai_postprocess", ai_input], JsonTree)
    assert json_tree
    tree = Tree(json_tree)
    return tree.type, tree.stats()

filters: list[Fn[TreeTypes, bool]] = [
    lambda _type: True,
    lambda type: type == "internal",
    lambda type: type == "not_solved"
]

with Db("db", True) as db:
    mols = data()
    setup_and_stats = [(setup, [get_stats(mol, setup) for mol in mols]) for setup in ai_setups]
    for filter in filters:
        display(HTML("============================================="))
        display(HTML("filter: " + fn_txt(filter)))
        for characteristic in ("max_depth", "max_width", "node_count", "not_solved_count"):
            display(HTML(characteristic))
            rows: list[tuple[str,float,float,float]] = []
            for s, trees in setup_and_stats:
                l: list[int] = [stats[characteristic] for (type, stats) in trees if filter(type)]
                setup = f"{s['score']}-{s['agg']}-{s['uw_multiplier']}-{s['normalize']}"
                rows.append((setup, np.std(l).item(), np.average(l).item(), np.median(l).item()))
            display(pandas.DataFrame(rows, columns=["setup", "std", "avg", "median"]))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",3.15806,8.836735,9.0
1,"sc-max-0.15-(2.5, 4.5, False)",3.0817,8.816327,9.0
2,"sc-max-0.15-(3.0, 4.5, False)",3.028258,8.816327,9.0
3,"sc-max-0.15-(3.5, 4.5, False)",2.906713,8.714286,9.0
4,"sc-max-0.4-(2.5, 4.5, False)",3.063944,8.857143,9.0
5,"sc-max-0.4-(3.0, 4.5, False)",3.048409,8.816327,9.0
6,"sc-max-0.4-(3.5, 4.5, False)",3.04376,8.795918,9.0
7,"sc-max-0.6-(2.5, 4.5, False)",3.026056,8.836735,9.0
8,"sc-max-0.6-(3.0, 4.5, False)",3.119046,8.836735,9.0
9,"sc-max-0.6-(3.5, 4.5, False)",3.011985,8.77551,9.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",78.871872,114.591837,101.0
1,"sc-max-0.15-(2.5, 4.5, False)",77.302421,114.265306,101.0
2,"sc-max-0.15-(3.0, 4.5, False)",71.402281,113.571429,103.0
3,"sc-max-0.15-(3.5, 4.5, False)",80.725985,117.734694,104.0
4,"sc-max-0.4-(2.5, 4.5, False)",72.661941,114.55102,103.0
5,"sc-max-0.4-(3.0, 4.5, False)",78.814889,116.734694,103.0
6,"sc-max-0.4-(3.5, 4.5, False)",68.788111,113.306122,103.0
7,"sc-max-0.6-(2.5, 4.5, False)",91.918707,120.367347,105.0
8,"sc-max-0.6-(3.0, 4.5, False)",93.702041,120.734694,105.0
9,"sc-max-0.6-(3.5, 4.5, False)",79.533076,117.428571,104.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",373.255247,614.102041,622.0
1,"sc-max-0.15-(2.5, 4.5, False)",365.726489,614.367347,629.0
2,"sc-max-0.15-(3.0, 4.5, False)",365.461167,622.816327,632.0
3,"sc-max-0.15-(3.5, 4.5, False)",369.438513,626.959184,634.0
4,"sc-max-0.4-(2.5, 4.5, False)",357.939257,624.755102,632.0
5,"sc-max-0.4-(3.0, 4.5, False)",363.3189,627.77551,633.0
6,"sc-max-0.4-(3.5, 4.5, False)",358.565557,627.081633,629.0
7,"sc-max-0.6-(2.5, 4.5, False)",375.355114,638.387755,633.0
8,"sc-max-0.6-(3.0, 4.5, False)",366.142283,635.693878,646.0
9,"sc-max-0.6-(3.5, 4.5, False)",361.020515,630.938776,642.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",23.872108,15.857143,1.0
1,"sc-max-0.15-(2.5, 4.5, False)",24.029951,15.897959,1.0
2,"sc-max-0.15-(3.0, 4.5, False)",23.925906,15.857143,1.0
3,"sc-max-0.15-(3.5, 4.5, False)",23.623928,15.693878,1.0
4,"sc-max-0.4-(2.5, 4.5, False)",23.540673,15.285714,1.0
5,"sc-max-0.4-(3.0, 4.5, False)",23.910548,15.571429,1.0
6,"sc-max-0.4-(3.5, 4.5, False)",23.156315,15.22449,1.0
7,"sc-max-0.6-(2.5, 4.5, False)",22.902597,14.959184,1.0
8,"sc-max-0.6-(3.0, 4.5, False)",22.597358,14.632653,1.0
9,"sc-max-0.6-(3.5, 4.5, False)",23.008392,14.959184,1.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",2.796101,8.0,8.0
1,"sc-max-0.15-(2.5, 4.5, False)",2.787591,8.045455,8.0
2,"sc-max-0.15-(3.0, 4.5, False)",2.787591,8.045455,8.0
3,"sc-max-0.15-(3.5, 4.5, False)",2.661324,7.909091,8.0
4,"sc-max-0.4-(2.5, 4.5, False)",2.794623,8.090909,8.0
5,"sc-max-0.4-(3.0, 4.5, False)",2.794623,8.090909,8.0
6,"sc-max-0.4-(3.5, 4.5, False)",2.794623,8.090909,8.0
7,"sc-max-0.6-(2.5, 4.5, False)",2.794623,8.090909,8.0
8,"sc-max-0.6-(3.0, 4.5, False)",2.794623,8.090909,8.0
9,"sc-max-0.6-(3.5, 4.5, False)",2.803849,8.045455,8.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",50.811595,84.0,96.0
1,"sc-max-0.15-(2.5, 4.5, False)",50.628692,86.090909,95.0
2,"sc-max-0.15-(3.0, 4.5, False)",51.061723,87.136364,99.0
3,"sc-max-0.15-(3.5, 4.5, False)",50.479048,86.954545,99.0
4,"sc-max-0.4-(2.5, 4.5, False)",50.507916,87.636364,97.0
5,"sc-max-0.4-(3.0, 4.5, False)",50.372825,87.818182,98.0
6,"sc-max-0.4-(3.5, 4.5, False)",50.129502,87.181818,97.0
7,"sc-max-0.6-(2.5, 4.5, False)",50.421222,88.136364,99.0
8,"sc-max-0.6-(3.0, 4.5, False)",49.499583,87.863636,97.0
9,"sc-max-0.6-(3.5, 4.5, False)",49.513668,87.818182,97.5


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",303.985578,447.363636,470.5
1,"sc-max-0.15-(2.5, 4.5, False)",304.850285,460.409091,543.5
2,"sc-max-0.15-(3.0, 4.5, False)",305.245874,462.954545,542.5
3,"sc-max-0.15-(3.5, 4.5, False)",305.205472,464.272727,551.0
4,"sc-max-0.4-(2.5, 4.5, False)",303.620223,473.181818,558.5
5,"sc-max-0.4-(3.0, 4.5, False)",298.906399,471.318182,551.0
6,"sc-max-0.4-(3.5, 4.5, False)",301.79213,473.681818,558.0
7,"sc-max-0.6-(2.5, 4.5, False)",301.592951,478.318182,567.5
8,"sc-max-0.6-(3.0, 4.5, False)",299.826868,481.409091,565.0
9,"sc-max-0.6-(3.5, 4.5, False)",296.632602,478.909091,569.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",25.805054,34.090909,29.5
1,"sc-max-0.15-(2.5, 4.5, False)",26.065715,34.181818,29.0
2,"sc-max-0.15-(3.0, 4.5, False)",25.915789,34.090909,28.5
3,"sc-max-0.15-(3.5, 4.5, False)",25.55063,33.727273,28.5
4,"sc-max-0.4-(2.5, 4.5, False)",26.008104,32.818182,27.0
5,"sc-max-0.4-(3.0, 4.5, False)",26.324422,33.454545,27.5
6,"sc-max-0.4-(3.5, 4.5, False)",25.322262,32.681818,26.0
7,"sc-max-0.6-(2.5, 4.5, False)",25.211666,32.090909,28.0
8,"sc-max-0.6-(3.0, 4.5, False)",25.086297,31.363636,27.5
9,"sc-max-0.6-(3.5, 4.5, False)",25.425308,32.090909,28.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",3.270184,9.518519,9.0
1,"sc-max-0.15-(2.5, 4.5, False)",3.166179,9.444444,9.0
2,"sc-max-0.15-(3.0, 4.5, False)",3.071172,9.444444,9.0
3,"sc-max-0.15-(3.5, 4.5, False)",2.933184,9.37037,9.0
4,"sc-max-0.4-(2.5, 4.5, False)",3.131328,9.481481,9.0
5,"sc-max-0.4-(3.0, 4.5, False)",3.118158,9.407407,9.0
6,"sc-max-0.4-(3.5, 4.5, False)",3.116838,9.37037,9.0
7,"sc-max-0.6-(2.5, 4.5, False)",3.071172,9.444444,9.0
8,"sc-max-0.6-(3.0, 4.5, False)",3.235604,9.444444,9.0
9,"sc-max-0.6-(3.5, 4.5, False)",3.044706,9.37037,9.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",88.328667,139.518519,110.0
1,"sc-max-0.15-(2.5, 4.5, False)",87.076524,137.222222,108.0
2,"sc-max-0.15-(3.0, 4.5, False)",78.06803,135.111111,110.0
3,"sc-max-0.15-(3.5, 4.5, False)",91.374702,142.814815,110.0
4,"sc-max-0.4-(2.5, 4.5, False)",80.19946,136.481481,111.0
5,"sc-max-0.4-(3.0, 4.5, False)",89.270631,140.296296,110.0
6,"sc-max-0.4-(3.5, 4.5, False)",74.367585,134.592593,112.0
7,"sc-max-0.6-(2.5, 4.5, False)",108.285681,146.62963,110.0
8,"sc-max-0.6-(3.0, 4.5, False)",111.085343,147.518519,110.0
9,"sc-max-0.6-(3.5, 4.5, False)",90.473787,141.555556,110.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",369.368177,749.962963,674.0
1,"sc-max-0.15-(2.5, 4.5, False)",363.273495,739.814815,670.0
2,"sc-max-0.15-(3.0, 4.5, False)",358.71901,753.074074,679.0
3,"sc-max-0.15-(3.5, 4.5, False)",364.220768,759.518519,676.0
4,"sc-max-0.4-(2.5, 4.5, False)",351.323065,748.259259,677.0
5,"sc-max-0.4-(3.0, 4.5, False)",361.329704,755.259259,672.0
6,"sc-max-0.4-(3.5, 4.5, False)",352.590931,752.074074,674.0
7,"sc-max-0.6-(2.5, 4.5, False)",379.063469,768.814815,678.0
8,"sc-max-0.6-(3.0, 4.5, False)",367.213961,761.407407,673.0
9,"sc-max-0.6-(3.5, 4.5, False)",361.47056,754.814815,678.0


Unnamed: 0,setup,std,avg,median
0,"sc-max-0.0-(2.5, 4.5, False)",0.0,1.0,1.0
1,"sc-max-0.15-(2.5, 4.5, False)",0.0,1.0,1.0
2,"sc-max-0.15-(3.0, 4.5, False)",0.0,1.0,1.0
3,"sc-max-0.15-(3.5, 4.5, False)",0.0,1.0,1.0
4,"sc-max-0.4-(2.5, 4.5, False)",0.0,1.0,1.0
5,"sc-max-0.4-(3.0, 4.5, False)",0.0,1.0,1.0
6,"sc-max-0.4-(3.5, 4.5, False)",0.0,1.0,1.0
7,"sc-max-0.6-(2.5, 4.5, False)",0.0,1.0,1.0
8,"sc-max-0.6-(3.0, 4.5, False)",0.0,1.0,1.0
9,"sc-max-0.6-(3.5, 4.5, False)",0.0,1.0,1.0
