In [None]:
import numpy as np
import pandas as pd
from ConfigSpace import ConfigurationSpace, Configuration
from smac import MultiFidelityFacade, Scenario

from datasets import get_hits
from hopfield import iterate
from metrics.tracks import track_metrics
from segment.track import gen_seg_track_layered

## 1.Learning rate is *critical*:
- small easy dataset
- many trials are needed to find a workable config
- high t_max helps
- learning rate results are still better and much faster

In [None]:
N_TRIALS = 1000
N_EVENTS = 50
EVENT_SIZE = 10
N_NOISE_HITS = 10
N_WORKERS = -1

In [None]:
events = get_hits('spdsim', n_events=N_EVENTS, n_noise_hits=N_NOISE_HITS, event_size=EVENT_SIZE)

events = {eid: event.reset_index(drop=True) for eid, event in events.groupby('event_id')}
eids = tuple(events.keys())

def evaluate(config: Configuration, seed: int, budget: float = 10) -> float:
    rng = np.random.default_rng(seed=seed)
    scores = []
    for eid in rng.choice(eids, int(budget), replace=False):
        event = events[eid]
        seg, acts, positives = iterate.run(event, **config)
        tseg = gen_seg_track_layered(event)
        score = track_metrics(event, seg, tseg, acts[-1], positives[-1])
        score['total steps'] = config['cooling_steps'] + config['rest_steps']
        score['trackml loss'] = 1. - score['trackml score']
        scores.append(score)
    return pd.DataFrame(scores).mean()['trackml loss']


In [None]:
scenario = Scenario(
    ConfigurationSpace({
        'alpha': (0., 1000.),
        'gamma': (0., 2000.),
        'bias': (-40.0, 40.0),
        # 'threshold': 0.5,
        'cosine_power': (0.0, 50.0),
        # 'cosine_min_allowed': -1.,
        'cosine_min_rewarded': (0., 1.),
        # 'distance_op': 'sum',
        # 'distance_power': (0., 3.),
        # 't_min': 1.,
        't_max': (1., 500.),
        'cooling_steps': (1, 50),
        'rest_steps': (1, 10),
        'initial_act': (0., 1.),
        # 'learning_rate': (0., 1.)
    }),
    'easy-classic',
    n_trials=N_TRIALS,
    n_workers=N_WORKERS,
    min_budget=2,
    max_budget=N_EVENTS
)

optimizer = MultiFidelityFacade(scenario, evaluate, overwrite=True)
best_config = optimizer.optimize()
optimizer.validate(best_config)

In [None]:
best_config

In [None]:
pd.DataFrame(optimizer.intensifier.trajectory)

In [None]:
scenario = Scenario(
    ConfigurationSpace({
        'alpha': (0., 1000.),
        'gamma': (0., 2000.),
        'bias': (-40.0, 40.0),
        # 'threshold': 0.5,
        'cosine_power': (0.0, 50.0),
        # 'cosine_min_allowed': -1.,
        'cosine_min_rewarded': (0., 1.),
        # 'distance_op': 'sum',
        # 'distance_power': (0., 3.),
        # 't_min': 1.,
        't_max': (1., 500.),
        'cooling_steps': (1, 50),
        'rest_steps': (1, 10),
        'initial_act': (0., 1.),
        'learning_rate': (0., 1.)
    }),
    'easy-rate',
    n_trials=N_TRIALS/4,
    n_workers=N_WORKERS,
    min_budget=2,
    max_budget=N_EVENTS
)

optimizer = MultiFidelityFacade(scenario, evaluate, overwrite=True)
best_config = optimizer.optimize()
optimizer.validate(best_config)

In [None]:
best_config

In [None]:
pd.DataFrame(optimizer.intensifier.trajectory)

## 2. Harder problem. TODO: result? 

In [None]:
N_TRIALS = 200
N_EVENTS = 20
EVENT_SIZE = 10
N_NOISE_HITS = 35*50
N_WORKERS = -1

In [None]:
events = get_hits('spdsim', n_events=N_EVENTS, n_noise_hits=N_NOISE_HITS, event_size=EVENT_SIZE)

events = {eid: event.reset_index(drop=True) for eid, event in events.groupby('event_id')}
eids = tuple(events.keys())

def evaluate(config: Configuration, seed: int, budget: float = 10) -> float:
    rng = np.random.default_rng(seed=seed)
    scores = []
    for eid in rng.choice(eids, int(budget), replace=False):
        event = events[eid]
        seg, acts, positives = iterate.run(event, **config)
        tseg = gen_seg_track_layered(event)
        score = track_metrics(event, seg, tseg, acts[-1], positives[-1])
        score['total steps'] = config['cooling_steps'] + config['rest_steps']
        score['trackml loss'] = 1. - score['trackml score']
        scores.append(score)
    return pd.DataFrame(scores).mean()['trackml loss']


In [None]:
scenario = Scenario(
    ConfigurationSpace({
        'alpha': (0., 100.),
        'gamma': (0., 1000.),
        'bias': (-40.0, 40.0),
        # 'threshold': 0.5,
        'cosine_power': (0.0, 50.0),
        # 'cosine_min_allowed': -1.,
        'cosine_min_rewarded': (0., 1.),
        # 'distance_op': 'sum',
        # 'distance_power': (0., 3.),
        # 't_min': 1.,
        't_max': (1., 500.),
        'cooling_steps': (1, 50),
        'rest_steps': (1, 10),
        'initial_act': (0., 1.),
        'learning_rate': (0., 1.)
    }),
    'basic',
    n_trials=N_TRIALS,
    n_workers=N_WORKERS,
    min_budget=2,
    max_budget=N_EVENTS
)

optimizer = MultiFidelityFacade(scenario, evaluate, overwrite=True)
best_config = optimizer.optimize()
optimizer.validate(best_config)

In [None]:
best_config

In [None]:
pd.DataFrame(optimizer.intensifier.trajectory)