In [None]:
# Standard imports
from time import time

# Third-party imports
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

# Project imports
import twinlab as tl

In [None]:
# Parameters
dataset_id = "timings"
campaign_id = dataset_id
err_sig = 0.25
n = 100
random_seed = 42
use_cloud = True
n_cycle = 1
n_warm = 10
ns_train = [10*np.power(2, i) for i in range(10)]
ns_eval = [10*np.power(2, i) for i in range(10)]

# Training parameters
params = {
    "dataset_id": dataset_id,
    "inputs": ["X"],
    "outputs": ["y"],
    "test_train_ratio": 1.,
}

In [None]:
# Seed the random-number generator
np.random.seed(random_seed)

# Warm up lambdas to ensure fair tests
# if use_cloud:
#     df_warm= pd.DataFrame({'X': np.random.rand(n_warm), 'y': np.random.rand(n_warm)})
#     tl.upload_dataset(df_warm, dataset_id)
#     tl.train_campaign(params, campaign_id)
#     tl.predict_campaign(df_warm, campaign_id)
#     tl.delete_campaign(campaign_id)
#     tl.delete_dataset(dataset_id)

In [None]:
# Loop over numbers of training data
dict_upload = {"n": [], "t [s]": []}
dict_train = {"n": [], "t [s]": []}
dict_eval = {"n_train": [], "n_eval": [], "t [s]": []}

# df_small = None
for n_train in ns_train:

    # Create training data
    X = np.random.rand(n_train)
    y = np.sin(X*2.*np.pi*n_cycle)+np.random.normal(0., err_sig, n_train)
    df_train = pd.DataFrame({'X': X, 'y': y})
    # if df_small is None:
    #     df_small = df_train.copy()

    # Upload data
    if use_cloud: # Warm-up lambda
        tl.upload_dataset(df_train, dataset_id)
    if use_cloud:
        t_start = time()
        tl.upload_dataset(df_train, dataset_id)
        t_upload = time()-t_start
        dict_upload["n"].append(n_train); dict_upload["t [s]"].append(t_upload)
        print(f"Uploading {n_train} data points took {t_upload:.2f} seconds")

    # Train
    if use_cloud: # Warm-up lambda
        tl.train_campaign(params, campaign_id)
    t_start = time()
    if use_cloud:
        tl.train_campaign(params, campaign_id)
    else:
        raise NotImplementedError("twinLab local not implemented yet")
    t_train = time()-t_start
    dict_train["n"].append(n_train); dict_train["t [s]"].append(t_train)
    print(f"Training on {n_train} data points took {t_train:.2f} seconds")

    # Loop over number of test data
    for n_eval in ns_eval:

        # Create evaluation data
        X = np.random.rand(n_eval)
        df_eval = pd.DataFrame({"X": X})

        # Predict
        if use_cloud: # Warm-up lambda
            _, _ = tl.predict_campaign(df_eval, campaign_id)
        t_start = time()
        if use_cloud:
            _, _ = tl.predict_campaign(df_eval, campaign_id)
        else:
            raise NotImplementedError("twinLab local not implemented yet")
        t_predict = time()-t_start
        dict_eval["n_train"].append(n_train); dict_eval["n_eval"].append(n_eval); dict_eval["t [s]"].append(t_predict)
        print(f"Predicting {n_eval} data points took {t_predict:.2f} seconds")
    print()

    # Delete campaign
    if use_cloud:
        tl.delete_campaign(campaign_id)

# Delete dataset
if use_cloud:
    tl.delete_dataset(dataset_id)


In [None]:
# Plot upload data times
if use_cloud:
    logx, logy = True, True
    df_upload = pd.DataFrame(dict_upload)
    plt.scatter(df_upload["n"], df_upload["t [s]"])
    plt.xlabel("number of data points")
    plt.ylabel("upload time [s]")
    if logx:
        plt.xscale("log")
    else:
        plt.xlim(left=0.)
    if logy:
        plt.yscale("log")
    else:
        plt.ylim(bottom=0.)
    plt.show()

In [None]:
# Plot training times
logx, logy = True, True
df_train = pd.DataFrame(dict_train)
plt.scatter(df_train["n"], df_train["t [s]"])
plt.xlabel("number of data points")
plt.ylabel("training time [s]")
if logx:
    plt.xscale("log")
else:
    plt.xlim(left=0.)
if logy:
    plt.yscale("log")
else:
    plt.ylim(bottom=0.)
plt.show()

In [None]:
# Plot evaluation times
logx, logy = True, True
df_eval = pd.DataFrame(dict_eval)
plt.scatter(df_eval["n_eval"], df_eval["t [s]"], c=df_eval["n_train"], cmap="inferno_r", norm=matplotlib.colors.LogNorm())
plt.colorbar(label="number of training points")
plt.xlabel("number of evaluation data points")
plt.ylabel("prediction time [s]")
if logx:
    plt.xscale("log")
else:
    plt.xlim(left=0.)
if logy:
    plt.yscale("log")
else:
    plt.ylim(bottom=0.)
plt.show()