In [2]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler
import torch.nn as nn
import torch.optim as optim
import wandb
from jumping_task import JumpTaskEnv
from simplemodel_v1 import SimpleModelV1
from simplemodel_v2 import SimpleModelV2
from simplemodel_v3 import SimpleModelV3
from data_helpers import generate_imitation_data
from data_helpers import generate_training_positions
from data_helpers import prepare_observation_target_data
from data_helpers import generate_augmented_data_horiz
from data_helpers import generate_augmented_data_vert
from data_helpers import generate_validation_positions_adjacent
from data_helpers import generate_validation_positions_random
from data_helpers import calculate_sampler_weights
from training_helpers import hyperparameter_grid_search
from training_helpers import train_model
from evaluation_helpers import test_agent
from misc_helpers import get_device
from misc_helpers import setup_seed
from misc_helpers import print_positions

wandb.login()
device = get_device()

wandb: Currently logged in as: k12123854 (davidkla). Use `wandb login --relogin` to force relogin


cuda:0


In [3]:
imitation_data = generate_imitation_data()

In [41]:
# Wide grid
training_positions = generate_training_positions()

# tight grid
# training_positions = generate_training_positions(min_obstacle_position=28,
#                                 max_obstacle_position=38,
#                                 min_floor_height=13,
#                                 max_floor_height=17,
#                                 positions_train_diff=2,
#                                 heights_train_diff=2)

# Random Train Split
#training_positions = generate_training_positions(random_tasks=True)

In [43]:
validation_positions = generate_validation_positions_adjacent(training_positions, 20, 10, 45, 20)
#validation_positions = generate_validation_positions_random(training_positions, n_positions=20)
#validation_positions = []
print(len(validation_positions))

print_positions(training_positions, validation_positions)

54
 t v o o v t v o o v t v o o v t v o o v t v o o v t
 v o o o o v o o o o v o o o o v o o o o v o o o o v
 o o o o o o o o o o o o o o o o o o o o o o o o o o
 o o o o o o o o o o o o o o o o o o o o o o o o o o
 v o o o o v o o o o v o o o o v o o o o v o o o o v
 t v o o v t v o o v t v o o v t v o o v t v o o v t
 v o o o o v o o o o v o o o o v o o o o v o o o o v
 o o o o o o o o o o o o o o o o o o o o o o o o o o
 o o o o o o o o o o o o o o o o o o o o o o o o o o
 v o o o o v o o o o v o o o o v o o o o v o o o o v
 t v o o v t v o o v t v o o v t v o o v t v o o v t


In [36]:
x_train, y_train = prepare_observation_target_data(training_positions, imitation_data)
x_val, y_val = prepare_observation_target_data(validation_positions, imitation_data)

x_train_augmented, y_train_augmented = generate_augmented_data_horiz(x_train, y_train, 3, 3)
#x_train_augmented, y_train_augmented = generate_augmented_data_vert(x_train_augmented, y_train_augmented, 2, 2)

In [37]:
samples_weights_train = calculate_sampler_weights(y_train).to(device)
sampler_train = WeightedRandomSampler(samples_weights_train, len(samples_weights_train))

samples_weights_train_augmented = calculate_sampler_weights(y_train_augmented).to(device)
sampler_train_augmented = WeightedRandomSampler(samples_weights_train_augmented, len(samples_weights_train_augmented))

if len(validation_positions) > 0:
    samples_weights_val = calculate_sampler_weights(y_val)
    sampler_val = WeightedRandomSampler(samples_weights_val, len(samples_weights_val))

In [38]:
setup_seed(42)

train_dataset = TensorDataset(torch.Tensor(x_train).unsqueeze(1).to(device), torch.LongTensor(y_train).to(device))  # create your datset
train_dataloader = DataLoader(train_dataset, batch_size=64, sampler=sampler_train)

train_dataset_augmented = TensorDataset(torch.Tensor(x_train_augmented).unsqueeze(1).to(device), torch.LongTensor(y_train_augmented).to(device))  # create your datset
train_dataloader_augmented = DataLoader(train_dataset_augmented, batch_size=64, sampler=sampler_train_augmented)

if len(validation_positions) > 0:
    val_dataset = TensorDataset(torch.Tensor(x_val).unsqueeze(1).to(device), torch.LongTensor(y_val).to(device))  # create your datset
    val_dataloader = DataLoader(val_dataset, batch_size=64, sampler=sampler_val)
else:
    val_dataset = None
    val_dataloader = None

**Train single model (1 run)**

In [None]:
#model = SimpleModelV3(dropout_rate_conv=0.3, dropout_rate_fc=0.1)
model = SimpleModelV3(dropout_rate_conv=0.0, dropout_rate_fc=0.0)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

run = wandb.init(
    project="JumpingTask_BC",
    config={
    "learning_rate": 0.0005,
    "architecture": "SimpleModelV3",
    "dropout_rate_conv": 0.0,
    "dropout_rate_fc": 0.0,
    "weight_decay": 0.0,
    "dataset": "Wide-Grid",
    "batch_size": 64,
    "validation_positions": "No validation",
    "epochs": 400,
    }
)

train_model(model, optimizer, criterion, 300, train_dataloader, val_dataloader, validate=False, print_out=True)
solved_total, solved_train, solved_val, solved_test = test_agent(model, device, training_positions, validation_positions, True)
run.summary["solved_envs"] = solved_total
run.summary["solved_envs_train"] = solved_train
run.summary["solved_envs_val"] = solved_val
run.summary["solved_envs_test"] = solved_test
run.summary["validation_size"] = len(validation_positions)
run.log_model(path="best_model.pt", name = "model")
run.finish()

print(f"Solved {solved_total} out of 286 Environments")

**Train/Test mutiple models (50 runs)**

In [None]:
setup_seed(42)

for i_run in range(0, 50):
    model = SimpleModelV3(dropout_rate_conv=0.3, dropout_rate_fc=0.1)
    #model = SimpleModelV3(dropout_rate_conv=0.0, dropout_rate_fc=0.0)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.0001)
    #optimizer = optim.Adam(model.parameters(), lr=0.0005)

    run = wandb.init(
        project="JumpingTask_BC",
        name=f"TestTight_{i_run+1}",
        config={
        "learning_rate": 0.0005,
        "architecture": "SimpleModelV3",
        "dropout_rate_conv": 0.3,
        "dropout_rate_fc": 0.1,
        "weight_decay": 0.0001,
        "dataset": "Tight-Grid",
        "batch_size": 64,
        "validation_positions": "No validation",
        "epochs": 400,
        }
    )
    
    train_model(model, optimizer, criterion, 400, train_dataloader, val_dataloader, validate=False, print_out=False)
    solved_total, solved_train, solved_val, solved_test = test_agent(model, device, training_positions, validation_positions)
    run.summary["solved_envs"] = solved_total
    run.summary["solved_envs_train"] = solved_train
    run.summary["solved_envs_val"] = solved_val
    run.summary["solved_envs_test"] = solved_test
    run.summary["validation_size"] = len(validation_positions)
    run.log_model(path="best_model.pt", name = "model")
    run.finish()
   

**Random-Grid Experiment Setup**

In [None]:
setup_seed(42)

for i_run in range(0, 50):
    training_positions = generate_training_positions(random_tasks=True)
    #validation_positions = generate_validation_positions_adjacent(training_positions, 20, 10, 45, 20)
    #validation_positions = generate_validation_positions_random(training_positions, n_positions=20)
    validation_positions = []

    x_train, y_train = prepare_observation_target_data(training_positions, imitation_data)
    x_val, y_val = prepare_observation_target_data(validation_positions, imitation_data)
        
    #x_train_augmented, y_train_augmented = generate_augmented_data_horiz(x_train, y_train, 3, 3)

    samples_weights_train = calculate_sampler_weights(y_train).to(device)
    sampler_train = WeightedRandomSampler(samples_weights_train, len(samples_weights_train))
    
    # samples_weights_train_augmented = calculate_sampler_weights(y_train_augmented).to(device)
    # sampler_train_augmented = WeightedRandomSampler(samples_weights_train_augmented, len(samples_weights_train_augmented))

    if len(validation_positions) > 0:
        samples_weights_val = calculate_sampler_weights(y_val)
        sampler_val = WeightedRandomSampler(samples_weights_val, len(samples_weights_val))

    train_dataset = TensorDataset(torch.Tensor(x_train).unsqueeze(1).to(device), torch.LongTensor(y_train).to(device))  # create your datset
    train_dataloader = DataLoader(train_dataset, batch_size=64, sampler=sampler_train)

    # train_dataset_augmented = TensorDataset(torch.Tensor(x_train_augmented).unsqueeze(1).to(device), torch.LongTensor(y_train_augmented).to(device))  # create your datset
    # train_dataloader_augmented = DataLoader(train_dataset_augmented, batch_size=64, sampler=sampler_train_augmented)

    if len(validation_positions) > 0:
        val_dataset = TensorDataset(torch.Tensor(x_val).unsqueeze(1).to(device), torch.LongTensor(y_val).to(device))  # create your datset
        val_dataloader = DataLoader(val_dataset, batch_size=64, sampler=sampler_val)
    else:
        val_dataloader = None
        val_dataset = None

    model = SimpleModelV3(dropout_rate_conv=0.3, dropout_rate_fc=0.1)
    #model = SimpleModelV3(dropout_rate_conv=0.0, dropout_rate_fc=0.0)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.0001)
    #optimizer = optim.Adam(model.parameters(), lr=0.0005)

    run = wandb.init(
        project="JumpingTask_BC",
        name=f"TestRandom_{i_run+1}",
        config={
        "learning_rate": 0.0005,
        "architecture": "SimpleModelV3",
        "dropout_rate_conv": 0.3,
        "dropout_rate_fc": 0.1,
        "weight_decay": 0.0001,
        "dataset": "Random-Grid",
        "batch_size": 64,
        "validation_positions": "No validation",
        "epochs": 400,
        }
    )
    
    train_model(model, optimizer, criterion, 400, train_dataloader, val_dataloader, validate=False, print_out=False)
    solved_total, solved_train, solved_val, solved_test = test_agent(model, device, training_positions, validation_positions)
    run.summary["solved_envs"] = solved_total
    run.summary["solved_envs_train"] = solved_train
    run.summary["solved_envs_val"] = solved_val
    run.summary["solved_envs_test"] = solved_test
    run.summary["validation_size"] = len(validation_positions)
    run.log_model(path="best_model.pt", name = "model")
    run.finish()

**Search for dropout rates**

In [None]:
dropout_rates_conv = [0.1, 0.2, 0.3, 0.4, 0.5]
dropout_rates_fc = [0.1, 0.2, 0.3, 0.4, 0.5]
weight_decay_factor = [0.0001, 0.001, 0.01]
    
for rate_conv in dropout_rates_conv:
        for rate_fc in dropout_rates_fc:
            for weight_decay in weight_decay_factor:
                setup_seed(42)
                for i_run in range(0, 5):
                    model = SimpleModelV3(dropout_rate_conv=rate_conv, dropout_rate_fc=rate_fc)
                    model = model.to(device)
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=weight_decay)
                
                    run = wandb.init(
                        project="JumpingTask_BC",
                        name=f"SearchDropoutRateWide_{i_run+1}",
                        config={
                        "learning_rate": 0.0005,
                     .   "architecture": "SimpleModelV3",
                        "dropout_rate_conv": rate_conv,
                        "dropout_rate_fc": rate_fc,
                        "weight_decay": weight_decay,
                        "dataset": "Wide-Grid",
                        "batch_size": 64,
                        "validation_positions": "No validation",
                        "epochs": 400,
                        }
                    )
                    
                    train_model(model, optimizer, criterion, 400, train_dataloader, val_dataloader, validate=False, print_out=False)
                    solved_total, solved_train, solved_val, solved_test = test_agent(model, device, training_positions, validation_positions)
                    run.summary["solved_envs"] = solved_total
                    run.summary["solved_envs_train"] = solved_train
                    run.summary["solved_envs_val"] = solved_val
                    run.summary["solved_envs_test"] = solved_test
                    run.summary["validation_size"] = len(validation_positions)
                    run.log_model(path="best_model.pt", name = "model")
                    run.finish()
   