In [None]:
# Third-party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Project imports
import twinlab as tl

# Establishing parameters and training data for an example

In this section, we want to establish data and model parameters, and also generate trial data in order to showcase the functionality of active learning (Bayesian optimisation for design of experiments). 

In [None]:
# Establish data and model parameters
dataset_id = "active-learning"
campaign_id = dataset_id
err_sig = 0.25
n_train = 100
n_eval = 101
random_seed = 42
n_cycle = 2

In [None]:
# Seed the random-number generator
np.random.seed(random_seed)

# Create a sine-wave function to generate training data
def f(x):
    return np.sin(2*np.pi*x*n_cycle)

def model(X):
    return np.random.normal(f(X), err_sig)

# Set up training data dataframe 
X = np.array([0.05, 0.1, 0.15, 0.4, 0.45, 0.48, 0.53, 0.85, 0.9, 0.95])
y = model(X)
df_train = pd.DataFrame({"X": X, "y": y})
display(df_train)

# Send the training dataset to the cloud
tl.upload_dataset(df_train, dataset_id, verbose=True)

In [None]:
# Evaluation data for testing of the model after it's been trained
eval = {"X": np.linspace(0, 1, n_eval)}
df_test = pd.DataFrame(eval)
display(df_test)

In [None]:
# Plotting routuine
def plot_data(df, grid, mean, stdv, xs=None):
    _, ax = plt.subplots()
    ax.plot(grid, mean, "-", label="Pre-trained model")
    if xs is not None:
        for x in xs:
            label = "Recommended points" if x == xs[-1] else None
            transform = ax.get_xaxis_transform()
            plt.arrow(*x, 0.2, 0., -0.13, color='grey', head_width=0.02, head_length=0.05, alpha=0.8, transform=transform)
    for nsig in [1,2]:
        label = "Model uncertainty" if nsig == 1 else None
        plt.fill_between(grid, mean-nsig*stdv, mean+nsig*stdv, lw=0, alpha=0.25, color="C0", label=label)
    ax.plot(df["X"], df["y"], ".", color="black", label="Datapoints")
    ax.set_xlim(0, 1)
    ax.set_xlabel("X")
    ax.set_ylabel("y")
    plt.legend()
    plt.show()

## Training the model

In this section, we'll be establishing the model training parameters, training our model in the Cloud, and producing the output of that training and prediction. 

First we need to establish what criteria our model training has. We do this via a JSON dictionary, which will include what we want to name our model, specify what the model inputs and outputs are, and fine-tune modelling parameters, such as the test_train_ratio.

In [None]:
# Training parameters
params = {
    "dataset_id": dataset_id,
    "inputs" : ["X"],
    "outputs": ["y"],
    "test_train_ratio": 1.,
}

Next, we can actually train our model in the Cloud, and pass through predict_campaign, too, which will make predictions from this model that we have trained using the test data we have set up. From this function we can get back our mean predictions, as well as the predictions for the standard deviations. This quantifies the uncertainty in our trained model. 

In [None]:
# Train model 
tl.train_campaign(params, campaign_id, verbose=True)

# Predict 
df_mean, df_stdv = tl.predict_campaign(df_test, campaign_id)
mean, stdv = df_mean["y"].values, df_stdv["y"].values

In [None]:
# Plot parameters 
grid = df_test["X"].values
# alpha_fill = 0.25
# nrow, ncol = 2, 2
# figx, figy = 4, 3

# # Plot the trained model
plot_data(df_train, grid, mean, stdv)

We can see from the above graph our data points in the black, the model that has been generated based on those points (the blue line), and as well the predicted deviation from that model (depicted in shades of 1 sigma and 2 sigma around the blue model function). Our next question might then be--to improve this model, where should we next take more data? As you can see, there's a wide range of x's--but which x's should we model to generate the best possible improvement in our model, and reduce those sigmas? 

## Active learning

To answer those questions, we need to use active learning. This is part of a wider research area called design of experiments, and in twinLab we particularly implement this as a form of Bayesian optimisation which we call active learning. Currently on the API, we have implemented active learning using a specific acquisition function, a Monte Carlo instance of Negative Integrated Posterior Variance.

In [None]:
# Use active learning 
df_active = tl.active_learn_campaign(campaign_id, 5)

In [None]:
# Plot the trained model and test data 
plot_data(df_train, grid, mean, stdv, xs=df_active.values)

This plot demonstrates where active learning has identified we should sample next, with the green dashed lines indicating which x-values should be sampled next. If this was a practical experiment, you now know where your next best bets be for improving your model! 

In [None]:
# Active learning when varying the number of requested points
numpoints = [2, 3, 5]
dfs_active = []
for numpoint in numpoints:
    df = tl.active_learn_campaign(campaign_id, numpoint)
    dfs_active.append(df)

In [None]:
# Plot the results
for n, df in zip(numpoints, dfs_active):
    plot_data(df_train, grid, mean, stdv, xs=df.values)

## Active learning loop

In [None]:
# Active Learning loop
df = df_train.copy()
for i in range(4):
    if i != 0:
        tl.upload_dataset(df, dataset_id)
        tl.train_campaign(params, campaign_id)
    df_mean, df_stdv = tl.predict_campaign(df_test, campaign_id)
    mean, stdv = df_mean["y"].values, df_stdv["y"].values
    X = tl.active_learn_campaign(campaign_id, 1)
    plot_data(df, grid, mean, stdv, xs=X.values)
    y = model(X)
    df = pd.concat([df, pd.DataFrame({"X": X.values[0], "y": y[0]})])

## Error messaging for active learning

In [None]:
print("Failing to specify any arguments:")
try:
    df_active = tl.active_learn_campaign()
except Exception as e:
    print(e)
print()

print("Failing to specify the number of points:")
try:
    df_active = tl.active_learn_campaign(campaign_id)
except Exception as e:
    print(e)
print()

print("Requesting a negative number of points:")
try:
    df_active = tl.active_learn_campaign(campaign_id, -1)
except Exception as e:
    print(e)
print()

print("Requesting zero points:")
try:
    df_active = tl.active_learn_campaign(campaign_id, 0)
except Exception as e:
    print(e)
print()

## Finishing up 

This section covers how to delete your trained model and dataset from the cloud. Note that you don't need to delete your model and data to rerun a campaign or dataset of the same name--if, say, you rerun the functions above as-is, with no name changes, will simply overwrite your existing model and dataset on the Cloud.

In [None]:
# Delete campaign and dataset if necessary
tl.delete_campaign(campaign_id, verbose=True)
tl.delete_dataset(dataset_id, verbose=True)