In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
# Add project to path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from lib.generate.generate_ar import simulate_ar
import threading
from queue import Queue
import numpy as np

from lib.dataprocessor.ArDataProcessor import ArDataProcessor
from lib.loss.Mse import Mse
from lib.generate.generate_ar import generate_stationary_ar_coefficients
from lib.models.ArModale import ArModel


In [2]:

def run_simulation_thread(models, oracle_coefficients, sigma, length, data_processor, train_ratio, loss, x_theo_risk, y_theo_risk, results_queue, sim_range, seed_start):
    train_length = int(train_ratio * length)
    test_length = length - train_length
    simulations = {}
    models_theo_risk = {model.name: [] for model in models}
    
    for i in sim_range:
        simulation = {}
        seed = seed_start + i
        series = simulate_ar(oracle_coefficients, sigma, length, seed=seed)
        x, y = data_processor.process(series)
        x_train, y_train = x[:train_length], y[:train_length]
        x_test, y_test = x[train_length:], y[train_length:]
        for model in models:
            model = model.fit(x_train, y_train)
            model_loss_all_test = loss.compute(model.predict(x_test), y_test)
            model_theo_risk_loss = loss.compute(model.predict(x_theo_risk), y_theo_risk)
            model_theo_risk = model_theo_risk_loss.mean()
            simulation[model.name] = np.ravel(model_loss_all_test)
            models_theo_risk[model.name].append(model_theo_risk)
        simulations[i] = simulation

    results_queue.put((simulations, models_theo_risk, test_length))


def run_n_simulation_parallel(models, oracle_coefficients, loss, sigma, length, data_processor, n=20, train_ratio=0.6, theo_risk_estimator_length=10**6, seed=1, n_threads=4):
    theo_risk_series = simulate_ar(oracle_coefficients, sigma, theo_risk_estimator_length, seed=seed)
    x_theo_risk, y_theo_risk = data_processor.process(theo_risk_series)
    
    results_queue = Queue()

    threads = []
    simulations_per_thread = n // n_threads
    extra_simulations = n % n_threads

    current_sim_start = 0
    for i in range(n_threads):
        sim_range_length = simulations_per_thread + (1 if i < extra_simulations else 0)
        sim_range = range(current_sim_start, current_sim_start + sim_range_length)
        current_sim_start += sim_range_length

        thread = threading.Thread(target=run_simulation_thread, args=(
            models, oracle_coefficients, sigma, length, data_processor, train_ratio, loss, x_theo_risk, y_theo_risk, results_queue, sim_range, seed))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    simulations = {}
    models_theo_risk = {model.name: [] for model in models}
    test_length = None

    while not results_queue.empty():
        thread_simulations, thread_models_theo_risk, thread_test_length = results_queue.get()
        simulations.update(thread_simulations)
        test_length = thread_test_length  
        for model_name, model_theo_risks in thread_models_theo_risk.items():
            models_theo_risk[model_name].extend(model_theo_risks)
    
    for model_name, model_theo_risks in models_theo_risk.items():
        models_theo_risk[model_name] = np.array(model_theo_risks).mean()

    return simulations, models_theo_risk, test_length

In [3]:


length = 10**4
sigma = 0.1
degree = 20
max_degree = 50

models = [ArModel(i) for i in range(1, max_degree)]
oracle_coefficients = generate_stationary_ar_coefficients(degree=degree, seed=1)
loss = Mse()
data_processor = ArDataProcessor('ar', max_degree)


In [4]:
simulations_1000, models_theo_risk_1000, test_length_1000 = run_n_simulation_parallel(models, oracle_coefficients, loss, sigma, length*10, data_processor, n=10, train_ratio=0.6, theo_risk_estimator_length=10**7, seed=1, n_threads=8)

In [5]:
simulations_1000, models_theo_risk_1000, test_length_1000 = run_n_simulation_parallel(models, oracle_coefficients, loss, sigma, length*10, data_processor, n=10, train_ratio=0.6, theo_risk_estimator_length=10**7, seed=1, n_threads=16)


KeyboardInterrupt

