# Quickstart Training

Download the pokemon training dataset. Credits to Vishal Subbiah for making this dataset available on Kaggle https://www.kaggle.com/datasets/vishalsubbiah/pokemon-images-and-types

In [None]:
!curl https://storage.googleapis.com/ubiops/data/training-data.zip -o training-data.zip

Add your API token and project name. You can also adapt the experiment name and run name or leave the default values. Afterwards we initialize the client library, which establishes the connection with UbiOps.

In [None]:
PROJECT_NAME = '<YOUR_PROJECT_NAME>'
ENVIRONMENT_NAME = 'python3-8-tf-2-4-4'
EXPERIMENT_NAME = 'pokemon-experiment'
RUN_NAME = 'pokemon-run'

In [None]:
import logging
import shutil
import ubiops
import uuid

logger = logging.getLogger("Training")

from ubiops.utils import upload_file, wait_for_environment, wait_for_experiment_run

Make a connection with UbiOps

In [None]:
import ubiops
from getpass import getpass

api_token = getpass("Enter your UbiOps API token: ")
if not api_token.startswith("Token "):
    api_token = f"Token {api_token}"

configuration = ubiops.Configuration(host="https://api.ubiops.com/v2.1")
configuration.api_key['Authorization'] = api_token

Here we open the connection with the UbiOps API Client.

In [None]:
api_client = ubiops.ApiClient(configuration)
core_instance = ubiops.CoreApi(api_client=api_client)
training_instance = ubiops.Training(api_client=api_client)
core_instance.service_status()

# Enable training

In [None]:
try:
    training_instance.initialize(project_name=PROJECT_NAME)
except ubiops.exceptions.ApiException as e:
    logger.warning(f"The training feature may already have been initialized in your project: {e}")

# Create environment

Create an environment for the Python packages we need and wait for it to be ready

In [None]:
try:
    core_instance.environments_create(
        project_name=PROJECT_NAME,
        data=ubiops.EnvironmentCreate(
            name=ENVIRONMENT_NAME,
            display_name=ENVIRONMENT_NAME,
            base_environment='python3-8',
            description='Test training environment with tensorflow 2.4.4',
        )
    )
    # Zip the directory with the training environment dependencies
    training_environment_archive = shutil.make_archive('training_environment', 'zip', 'training_environment')

    core_instance.environment_revisions_file_upload(
        project_name=PROJECT_NAME,
        environment_name=ENVIRONMENT_NAME,
        file=training_environment_archive
    )
except ubiops.exceptions.ApiException as e:
    logger.error(e)
    
wait_for_environment(
    client=api_client,
    project_name=PROJECT_NAME,
    environment_name=ENVIRONMENT_NAME,
    timeout=600,
    stream_logs=True
)

# Create experiment

Prepare the training run by creating an experiment

In [None]:
try:
    experiment = training_instance.experiments_create(
        project_name=PROJECT_NAME,
        data=ubiops.ExperimentCreate(
            instance_type_group_name='4096 MB + 1 vCPU',
            description='Train a pokemon classification algorithm',
            name=EXPERIMENT_NAME,
            environment=ENVIRONMENT_NAME,
            default_bucket='default',
            labels={}
        )
    )
except ubiops.exceptions.ApiException as e:
    logger.error(e)

# Run a training job

Run a training job by uploading the training code and dataset and passing the training parameters. Use the `wait_for_experiment_run` function from utils to stream the logs of the run while waiting.

In [None]:
try:
    file_uri = upload_file(
        client=api_client,
        project_name=PROJECT_NAME,
        file_path='training-data.zip',
        bucket_name='default',
        file_name=EXPERIMENT_NAME + '/' + 'training-data.zip'
    )
    new_run = training_instance.experiment_runs_create(
        project_name=PROJECT_NAME,
        experiment_name=EXPERIMENT_NAME,
        data=ubiops.ExperimentRunCreate(
            name="training-run-"+str(uuid.uuid4()),
            description='Trying out a run with 15 epochs and batch size 32',
            training_code='./training_code/train.py',
            training_data=file_uri,
            parameters={
                'nr_epochs': 15,
                'batch_size': 32
            },
            timeout=14400
        )
    )
    
    wait_for_experiment_run(
        client=api_client,
        project_name=PROJECT_NAME,
        experiment_name=EXPERIMENT_NAME,
        run_id=new_run.id,
        stream_logs=True
    )
except ubiops.exceptions.ApiException as e:
    logger.error(e)