In [13]:
import sys
import os
sys.path = [os.path.abspath('../')] + sys.path

os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'

from experiments.plot_hawkes import plot_coeffs, plot_ticks_hist
from experiments.hawkes_coeffs import get_coeffs_dim_30, get_coeffs_dim_100, retrieve_coeffs
from experiments.simulation import simulate_and_save_hawkes, simulate_hawkes_in_parallel
from experiments.weights_computation import pre_compute_hawkes
from experiments.learning import find_best_metrics

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
from multiprocessing import cpu_count

dim = 30
run_times = [5000, 7000, 10000, 15000, 20000]

n_simulations = 10
n_cpu = cpu_count() - 1

directory_path = '/home/m.bompaire/experiments/hawkes_data/'
#directory_path = '/home/m.bompaire/experiments/hawkes_data_v2_2nd/'
#directory_path = '/home/m.bompaire/experiments/hawkes_data_v3/'

n_decays = 3

In [20]:
# simulate_hawkes_in_parallel(dim, run_times, n_decays, n_simulations, directory_path, n_cpu=n_cpu)

In [21]:
# max_pre_computed_hawkes = 100
# 
# for run_time in run_times:
#     pre_compute_hawkes(dim, run_time, n_decays, max_pre_computed_hawkes, 
#                        directory_path, n_cpu=n_cpu)

In [18]:
from collections import OrderedDict

from experiments.report_utils import read_lambdas_csv

from experiments.tested_prox import (
    create_prox_l1_no_mu, create_prox_l1w_no_mu,
    create_prox_l1_no_mu_nuclear, create_prox_l1w_no_mu_nuclear,
    create_prox_nuclear
)

prox_infos = OrderedDict()

prox_infos['l1'] = {
    'n_initial_points': 10,
    'max_relative_step': 1.4,
    'create_prox': create_prox_l1_no_mu,
    'tol': 1e-10,
    'dim': 1,
}

prox_infos['l1w'] = {
    'n_initial_points': 10,
    'max_relative_step': 1.4,
    'create_prox': create_prox_l1w_no_mu,
    'tol': 1e-10,
    'dim': 1,
}

prox_infos['nuclear'] = {
    'n_initial_points': 3,
    'max_relative_step': 2,
    'create_prox': create_prox_nuclear,
    'tol': 1e-7,
    'dim': 1,
}

# prox_infos['l1_nuclear'] = {
#     'n_initial_points': 3,
#     'max_relative_step': 5,
#     'create_prox': create_prox_l1_no_mu_nuclear,
#     'tol': 1e-7,
#     'dim': 2,
# }
# 
# prox_infos['l1w_nuclear'] = {
#     'n_initial_points': 3,
#     'max_relative_step': 5,
#     'create_prox': create_prox_l1w_no_mu_nuclear,
#     'tol': 1e-7,
#     'dim': 2,
# }

def get_nuclear_strength_as_initial_points(suffix, model_file_paths):
    lambdas_df = read_lambdas_csv(suffix, [])
    prox_lambdas_df = lambdas_df[lambdas_df['prox'] == 'nuclear']
    
    initial_points_dict = {}
    for _, row in prox_lambdas_df.iterrows():
        end_time = int(row['end_time'])
        best_lambdas_str = row[['alpha_auc', 'alphas_auc_no_diag', 'estimation_error', 'estimation_error_no_diag','kendall', 'kendall_no_diag']].values
        best_lambdas = sorted(list(set([float(l) for l in best_lambdas_str])))
        initial_points_dict[end_time] = best_lambdas
        
    end_time = int(model_file_paths[0].split('T_')[1].split('/')[0])
    return initial_points_dict[end_time]

# get_nuclear_strength_as_initial_points('v3_10_models-Copy1', ['/home/m.bompaire/experiments/hawkes_data_v3/train_hawkes/dim_30/u=3/T_10000/precomputed/precomputed_000.pkl'])

prox_infos['dedicated_l1w_nuclear_1d'] = {
    'initial_points': get_nuclear_strength_as_initial_points,
    'n_initial_points': 5,
    'max_relative_step': 2,
    'create_prox': create_prox_l1w_no_mu_nuclear,
    'tol': 1e-8,
    'dim': 2,
    'mode': 'dedicated',
    'dim_1_n_extra_initial_points': 2,
}

prox_infos['dedicated_l1_nuclear_1d'] = {
    'initial_points': get_nuclear_strength_as_initial_points,
    'n_initial_points': 5,
    'max_relative_step': 2,
    'create_prox': create_prox_l1_no_mu_nuclear,
    'tol': 1e-8,
    'dim': 2,
    'mode': 'dedicated',
    'dim_1_n_extra_initial_points': 2,
}

prox_infos = OrderedDict(
    (prox_name, dict(name=prox_name, **prox_info)) for prox_name, prox_info in prox_infos.items())
list(prox_infos.keys())

['l1', 'l1w', 'nuclear', 'dedicated_l1w_nuclear_1d', 'dedicated_l1_nuclear_1d']

In [19]:
directory_path

'/home/m.bompaire/experiments/hawkes_data/'

In [None]:
n_models = 30

solver_kwargs = {'tol': 1e-6, 'max_iter': 25000}


if False:
    n_simulations = n_models

    simulate_hawkes_in_parallel(dim, run_times, n_decays, n_simulations, directory_path, n_cpu=n_cpu)

    max_pre_computed_hawkes = n_models

    for run_time in run_times:
        pre_compute_hawkes(dim, run_time, n_decays, max_pre_computed_hawkes, 
                           directory_path, n_cpu=n_cpu)


keep_prox = ['l1', 'l1w', 'nuclear', 'dedicated_l1w_nuclear_1d', 'dedicated_l1_nuclear_1d']
for run_time in run_times:
    for prox_info in prox_infos.values():
        if prox_info['name'] not in keep_prox:
            continue
        infos = find_best_metrics(
            dim, run_time, n_decays, n_models, prox_info, solver_kwargs,
            directory_path, max_run_count=5, n_cpu=n_cpu, suffix='v2_30_models', first_prox=keep_prox[0])


## For time 5000
### For prox l1
coeffs file existed already and was the same
Retrieved 100 precomputed models
We keep 30 precomputed models
2020-01-26 23:14 Run 0 - With 11 new points
