# DeepRacer Complete Pipeline Example

## 1. Imports

In [1]:
from drfc_manager.pipelines import (
    train_pipeline, stop_training_pipeline, clone_pipeline,
    start_viewer_pipeline, stop_viewer_pipeline,
    start_metrics_pipeline, stop_metrics_pipeline
)

from drfc_manager.types.hyperparameters import HyperParameters
from drfc_manager.types.model_metadata import ModelMetadata, DiscreteActionSpace
from drfc_manager.types.env_vars import EnvVars

2025-07-04 22:27:56 - drfc - INFO - Using existing MinIO bucket: tcc-experiments
2025-07-04 22:27:56 - drfc - INFO - Using existing MinIO bucket: tcc-experiments
2025-07-04 22:27:56 - drfc - INFO - Using existing MinIO bucket: tcc-experiments
2025-07-04 22:27:56 - drfc - INFO - Using existing MinIO bucket: tcc-experiments
2025-07-04 22:27:56 - drfc - INFO - Using existing MinIO bucket: tcc-experiments


Initializing EnvVars for the first time


## 2. Environment Configuration (Optional)
Set a value in the params only if you want override a default config

In [3]:
envs = EnvVars()
#envs

## 3. Model Configuration

In [5]:
# Define a unique model name
model_name = 'rl-deepracer-sagemaker-v2'

hyperparameters = HyperParameters()
hyperparameters

HyperParameters(batch_size=64, beta_entropy=0.01, discount_factor=0.999, e_greedy_value=0.05, epsilon_steps=10000, exploration_type=<ExplorationType.CATEGORICAL: 'categorical'>, loss_type=<LossType.HUBER: 'huber'>, lr=0.0003, num_episodes_between_training=40, num_epochs=3, stack_size=1, term_cond_avg_score=100000, term_cond_max_episodes=100000)

In [4]:
dasp1 = DiscreteActionSpace(steering_angle=-30, speed=0.6)
dasp2 = DiscreteActionSpace(steering_angle=-15, speed=0.6)
dasp3 = DiscreteActionSpace(steering_angle=-0, speed=0.6)
dasp4 = DiscreteActionSpace(steering_angle=15, speed=0.6)
dasp5 = DiscreteActionSpace(steering_angle=30, speed=0.6)
action_spaces = [dasp1, dasp2, dasp3, dasp4, dasp5]

# model_metadata = ModelMetadata(action_space_type=ActionSpaceType.DISCRETE, action_space=action_spaces)
model_metadata = ModelMetadata()
model_metadata

ModelMetadata(action_space_type=<ActionSpaceType.CONTINUOUS: 'continuous'>, action_space=ContinuousActionSpace(steering_angle=SteeringAngle(high=30.0, low=-30.0), speed=Speed(high=4.0, low=1.0)), version=5, training_algorithm=<TrainingAlgorithm.PPO: 'clipped_ppo'>, neural_network=<NeuralNetwork.DEEP_CONVOLUTIONAL_NETWORK_SHALLOW: 'DEEP_CONVOLUTIONAL_NETWORK_SHALLOW'>, sensor=[<Sensor.FRONT_FACING_CAMERA: 'FRONT_FACING_CAMERA'>])

In [7]:
def reward_function(params):
    '''
    Example of penalize steering, which helps mitigate zig-zag behaviors
    '''
    
    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    steering = abs(params['steering_angle']) # Only need the absolute steering angle

    # Calculate 3 marks that are farther and father away from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Give higher reward if the car is closer to center line and vice versa
    if distance_from_center <= marker_1:
        reward = 1
    elif distance_from_center <= marker_2:
        reward = 0.5
    elif distance_from_center <= marker_3:
        reward = 0.1
    else:
        reward = 1e-3  # likely crashed/ close to off track

    # Steering penality threshold, change the number based on your action space setting
    ABS_STEERING_THRESHOLD = 15

    # Penalize reward if the car is steering too much
    if steering > ABS_STEERING_THRESHOLD:
        reward *= 0.8

    return float(reward)

## 3. Pipeline Operations

### 3.1 Training Pipeline

In [8]:
# Start training with our model configuration
train_pipeline(
    model_name=model_name,
    hyperparameters=hyperparameters,
    model_metadata=model_metadata,
    reward_function=reward_function,
    overwrite=True,
    # env_vars=envs, # You can pass it or not, if you don't, the default values will be used
    quiet=True
)

Data uploaded successfully to custom files
The reward function copied successfully to models folder at rl-deepracer-sagemaker-v2/reward_function.py
Upload successfully the RoboMaker training configurations
Starting model training
Docker stack started.
Skipping log check.


In [15]:
stop_training_pipeline()

### 3.1.2 - Cloning your model

In [16]:
def reward_function_base_reward(params):
    """
    Example of a reward function for DeepRacer.
    
    Args:
        params (dict): Input parameters from the simulator
        
    Returns:
        float: The reward value
    """
    # Give a high reward by default
    reward = 1.0
    
    # ...
        
    return float(reward)

In [17]:
from drfc_manager.types.model_metadata import NeuralNetwork

hyperparameters_base_reward = HyperParameters(batch_size=128)
model_metadata_base_reward = ModelMetadata(neural_network=NeuralNetwork.DEEP_CONVOLUTIONAL_NETWORK_DEEP)

In [18]:
hyperparameters_base_reward

HyperParameters(batch_size=128, beta_entropy=0.01, discount_factor=0.999, e_greedy_value=0.05, epsilon_steps=10000, exploration_type=<ExplorationType.CATEGORICAL: 'categorical'>, loss_type=<LossType.HUBER: 'huber'>, lr=0.0003, num_episodes_between_training=40, num_epochs=3, stack_size=1, term_cond_avg_score=100000, term_cond_max_episodes=100000)

In [19]:
model_metadata_base_reward

ModelMetadata(action_space_type=<ActionSpaceType.CONTINUOUS: 'continuous'>, action_space=ContinuousActionSpace(steering_angle=SteeringAngle(high=30.0, low=-30.0), speed=Speed(high=4.0, low=1.0)), version=5, training_algorithm=<TrainingAlgorithm.PPO: 'clipped_ppo'>, neural_network=<NeuralNetwork.DEEP_CONVOLUTIONAL_NETWORK_DEEP: 'DEEP_CONVOLUTIONAL_NETWORK_DEEP'>, sensor=[<Sensor.FRONT_FACING_CAMERA: 'FRONT_FACING_CAMERA'>])

In [20]:
clone_pipeline(
    model_name,
    wipe_target=True,
    custom_hyperparameters=hyperparameters_base_reward,
    custom_model_metadata=model_metadata_base_reward,
    custom_reward_function=reward_function_base_reward
)

Data uploaded successfully to custom files
The reward function copied successfully to models folder at rl-deepracer-sagemaker-v2-1/reward_function.py
Upload successfully the RoboMaker training configurations
Starting model training
Docker stack started.
Skipping log check.


'rl-deepracer-sagemaker-v2-1'

In [24]:
stop_training_pipeline()

## 3.2 Viewer Pipeline

In [21]:
result = start_viewer_pipeline(delay=0)
# print(result)
print(f"View the training process at: {result['viewer_url']}. The proxy is {result["proxy_url"]}")

View the training process at: http://localhost:8103. The proxy is http://localhost:8090


In [12]:
stop_viewer_pipeline()

{'status': 'success', 'message': 'Viewer and proxy processes stopped.'}

## 3.3 Grafana Pipeline

In [22]:
start_metrics_pipeline()

MetricsResult(status='success', error=None, error_type=None, grafana_url='http://localhost:3000', credentials={'username': 'admin', 'password': 'admin'}, log_file='/tmp/drfc_logs/drfc_20250704_223838.log', message=None)

In [14]:
stop_metrics_pipeline()

MetricsResult(status='success', error=None, error_type=None, grafana_url=None, credentials=None, log_file=None, message='Metrics stack stopped successfully')