# Advanced Quickstart Guide

This tutorial fllows the same formoat as `Quickstart Guide` but explores further functionality provided by twinLab.

In [None]:
# Third-party imports
import numpy as np
import pandas as pd

# Project imports
import twinlab as tl

### API setup

When you first use twinLab you will have to set your API and server url

In [None]:
# Set the API key
tl.set_api_key("my_api_key")

# Set the server url
tl.set_server_url("http://twinlab.digilab.co.uk/prod")

# Check which url is being used
tl.get_server_url()

### Your twinLab information

Confirm your twinLab version

In [None]:
tl.get_versions()

Or view your user infomration including how many credits you have.

In [None]:
tl.get_user_information()

### Upload a dataset

twinLab requires datasets to be uploaded to the cloud with a `dataset_id`. This is what the data is saved as in the cloud and how models are able to access the data for training. Data can be uploaded in the form of a `pandas` dataframes directly from your code.

> 📝 **Note:** Your dataset must have column headers.

In [None]:
x = [0.6964691855978616,
0.28613933495037946,
0.2268514535642031,
0.5513147690828912,
0.7194689697855631,
0.42310646012446096,
0.9807641983846155,
0.6848297385848633,
0.48093190148436094,
0.3921175181941505]

y = [-0.8173739564129022,
0.8876561174050408,
0.921552660721474,
-0.3263338765412979,
-0.8325176123242133,
0.4006686354731812,
-0.16496626502368078,
-0.9607643657025954,
0.3401149876855609,
0.8457949914442409]

df = pd.DataFrame({'x': x, 'y': y})
display(df)

dataset_id = "example_data"

# Upload dataset using a local dataframe
tl.upload_dataset(df, dataset_id, verbose=True)

Alternatively data can be upload directly from a csv by using a filepath. The filepath string is input into `tl.upload_dataset` in the exact same place the dataframe was.

In [None]:
df_filepath = "example_data_folder/example_data.csv"

dataset_id = "example_data"

# Upload the dataset to the cloud
tl.upload_dataset(df_filepath, dataset_id, verbose=True)

### View datasets

Once a dataset has been upload it can be easily acccesed using built in twinLab functions. A list of all uploaded dataset can be produced, individual datasets can be printed and you can even querey a dataset to get a statistical summary of it.

In [None]:
# List all uploaded datasets
tl.list_datasets()

In [None]:
# View the data within the dataset
tl.view_dataset(dataset_id)

In [None]:
# Querey the dataset to get a statistical summary
tl.query_dataset(dataset_id)

### Train a campaign

The `campaign` class is used to train and implement your surrogate models. As with the dataset an id is defined, this is what the model will be saved as in the cloud. When training a model the arguments are passed using a dictionary; here that dictionary is called `campaign_params`.

In [None]:
campaign_id = "example_campaign"

campaign_params = {
    "dataset_id": "example_data",   # This points the campaign to the uploaded dataset
    "inputs": ["x"],                # Using the datasets column headers define the input and output data
    "outputs": ["y"],
    "test_train_ratio": 0.8         # Determine how much data is used for training, here 80% is used to tran the model  
}                                   # and 20% is used to test it.     

# Start a new campaign and train a surrogate model
tl.train_campaign(campaign_params, campaign_id, verbose=True)

### View campaigns

Just as with datasets all saved campaigns can be listed and queried.

In [None]:
# List campaigns
tl.list_campaigns()

In [None]:
# View a campaigns parameters
tl.view_campaign(campaign_id)

In [None]:
# View the status of a campaign
tl.query_campaign(campaign_id)

### Using a predict campaign

The surrogate model is now trained and saved to the cloud under the campaign_id. It can now be used to make predictions. First an evaluation dataset containing only inputs is defined, as the campaign will provide the outputs. This can be done using a local pandas dataframe.

In [None]:
x_eval = np.linspace(0,1,128)

df_eval = pd.DataFrame({'x':x_eval})
display(df_eval)

df_mean, df_std = tl.predict_campaign(df_eval, campaign_id)

Alternatively the evaluation dataset can be uploaded driectly from a csv by using a filepath.

In [None]:
df_eval_filepath = "example_data_folder/example_eval_data.csv"

df_mean, df_std = tl.predict_campaign(df_eval, campaign_id)

### Viewing the results
`tl.predict_campaign` outputs mean values for each input and their standard deviation; this gives the abilty to nicely visualise the uncertainty in results.


In [None]:
import matplotlib.pyplot as plt

# Plot parameters
nsigs = [1, 2]
# nsigs = [0.674, 1.960, 2.576]
color = "blue"
alpha = 0.5
plot_training_data = True
plot_model_mean = True
plot_model_bands = True

# Plot results
grid = df_eval["x"]
mean = df_mean["y"]
err = df_std["y"]
if plot_model_bands:
    label = r"Model prediction"
    plt.fill_between(grid, np.nan, np.nan, lw=0, color=color, alpha=alpha, label=label)
    for isig, nsig in enumerate(nsigs):
        plt.fill_between(grid, mean-nsig*err, mean+nsig*err, lw=0, color=color, alpha=alpha/(isig+1))
if plot_model_mean:
    label = r"Model prediction" if not plot_model_bands else None
    plt.plot(grid, mean, color=color, alpha=alpha, label=label)
if plot_training_data:
    plt.plot(df["x"], df["y"], ".", color="black", label="Training data")
plt.xlim((0.0, 1.0))
plt.xlabel(r"$X$")
plt.ylabel(r"$y$")
plt.legend()
plt.show()

### Using a sample campaign

The `tl.sample_campaign` function can be used to retrieve any number of results from your model. It requires the inputs for which you want the values and how many to calculate.

In [None]:
# Define the sample inputs
sample_inputs = pd.DataFrame({'x': np.linspace(0,1,20)})

# Define number of samples to calculate for each input
num_samples = 3

sample_result = tl.sample_campaign(sample_inputs, campaign_id, num_samples)

# View the results in the form of a fataframe
display(sample_result)

### Viewing the results

The results can be plotted over the top of the previous graph giving a nice visualisation of the sampled data, with the model's uncertainity.

In [None]:
# Plot parameters
nsigs = [1, 2]
# nsigs = [0.674, 1.960, 2.576]
color = "blue"
alpha = 0.5
plot_training_data = True
plot_model_mean = True
plot_model_bands = True

# Plot results
grid = df_eval["x"]
mean = df_mean["y"]
err = df_std["y"]
if plot_model_bands:
    label = r"Model prediction"
    plt.fill_between(grid, np.nan, np.nan, lw=0, color=color, alpha=alpha, label=label)
    for isig, nsig in enumerate(nsigs):
        plt.fill_between(grid, mean-nsig*err, mean+nsig*err, lw=0, color=color, alpha=alpha/(isig+1))
if plot_model_mean:
    label = r"Model prediction" if not plot_model_bands else None
    plt.plot(grid, mean, color=color, alpha=alpha, label=label)
if plot_training_data:
    plt.plot(df["x"], df["y"], ".", color="#1d1d1b", label="Training data")
for i in range(num_samples):
    plt.scatter(sample_inputs, data[:,i], marker='x', s=9, c='#ffb500')
plt.scatter([], [], marker='x', s=9, c='#ffb500', label='Sampled Points')
plt.xlim((0.0, 1.0))
plt.xlabel(r"$X$")
plt.ylabel(r"$y$")
plt.legend()
plt.show()