# DeepRacer Complete Pipeline Example

## 1. Imports

In [None]:
%reload_ext dotenv
%dotenv

import warnings
warnings.filterwarnings(action='ignore', module='.*paramiko.*')

from src.training_pipeline import train_pipeline, stop_training_pipeline
from src.viewer_pipeline import start_viewer_pipeline, stop_viewer_pipeline

from src.types.hyperparameters import HyperParameters
from src.types.model_metadata import ModelMetadata
from src.config import settings

Using existing MinIO bucket: tcc-experiments
Using existing MinIO bucket: tcc-experiments
Using existing MinIO bucket: tcc-experiments
Using existing MinIO bucket: tcc-experiments
Using existing MinIO bucket: tcc-experiments
Viewer pipeline logging configured to file: /tmp/viewer_pipeline.log




## 2. Model Configuration

In [2]:
# Define a unique model name
model_name = 'rl-deepracer-demo'

# Create default hyperparameters and model metadata
hyperparameters = HyperParameters()
model_metadata = ModelMetadata()

In [None]:
# Display the hyperparameters
hyperparameters

In [None]:
# Display the model metadata
model_metadata

In [6]:
def reward_function(params):
    """
    Example of a reward function for DeepRacer.
    
    Args:
        params (dict): Input parameters from the simulator
        
    Returns:
        float: The reward value
    """
    # Give a high reward by default
    reward = 1.0
    
    # Get track parameters
    all_wheels_on_track = params.get('all_wheels_on_track', True)
    distance_from_center = params.get('distance_from_center', 0)
    track_width = params.get('track_width', 1)
    
    # Calculate 3 markers that are at varying distances from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width
    
    # Give higher reward if the car is closer to center line and vice versa
    if distance_from_center <= marker_1:
        reward = 1.0
    elif distance_from_center <= marker_2:
        reward = 0.5
    elif distance_from_center <= marker_3:
        reward = 0.1
    else:
        reward = 1e-3  # likely crashed/ close to off track
        
    # Penalize if the car goes off track
    if not all_wheels_on_track:
        reward = 1e-3
        
    return float(reward)

## 3. Pipeline Operations

### 3.1 Training Pipeline

In [None]:
# First stop any existing training
stop_training_pipeline()

In [None]:
# Start training with our model configuration
train_pipeline(
    model_name=model_name,
    hyperparameters=hyperparameters,
    model_metadata=model_metadata,
    reward_function=reward_function,
    overwrite=True,
    check_logs_after_start=False
)

### 3.2 Viewer Pipeline

In [None]:
result = start_viewer_pipeline(delay=0)
print(f"View the training process at: {result['viewer_url']}")

In [None]:
stop_viewer_pipeline()