In [17]:
import os
import sys
import torch
import copy
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

# navigates to the main project folder
project_root = os.path.abspath(os.path.join(os.path.dirname("__file__"), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)
    
from src.behavior_cloning import *
from src.utils.data_loading import *

<h2> Loading Data </h2>

In [18]:
rb_observations, rb_next_observations, rb_actions, rb_rewards, rb_dones = load_data()
fp_observations, fp_next_observations, fp_actions, fp_rewards, fp_dones = load_data('../data/final_policy.npz')
rb_df = load_data_as_df(rb_observations, rb_next_observations, rb_actions, rb_rewards, rb_dones)
fp_df = load_data_as_df(fp_observations, fp_next_observations, fp_actions, fp_rewards, fp_dones)

<h2> Implementations </h2>

In [19]:
def get_BC_data_loaders(observations: np.array, actions: np.array,
                        train: float = 0.70,
                        test: float = 0.15,
                        validation: float = 0.15,
                        batch_size: int = 32,
                        seed: int = 16) -> Tuple[DataLoader, DataLoader, DataLoader]:
    assert abs(train + test + validation - 1.0) < 1e-5, 'Data splits must add up to 1.'

    np.random.seed(seed)
    test_to_valid_ratio = test / (test + validation)

    observations_train, observations_test_valid, actions_train, actions_test_valid = train_test_split(
        observations, actions, test_size=(1 - train), random_state=seed)
    observations_test, observations_valid, actions_test, actions_valid = train_test_split(
        observations, actions, test_size=test_to_valid_ratio, random_state=seed)

    train_dataset = BCDataset(observations_train, actions_train)
    test_dataset = BCDataset(observations_test, actions_test)
    valid_dataset = BCDataset(observations_valid, actions_valid)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader, valid_loader

In [None]:
def train_and_evaluate(train_loader: DataLoader, val_loader: DataLoader, optimizer: torch.optim.Optimizer, model: torch.nn.Module,
        early_stop_epoch_without_improvement: int = 3, loss_function: callable = torch.nn.CrossEntropyLoss(), epochs=6, log_subfolder: str = 'logs'):
    tensorboard_log_subfolder = os.path.join(log_subfolder, 'tensorboard')
    if not os.path.exists(log_subfolder):  
        os.makedirs(log_subfolder)
    if not os.path.exists(tensorboard_log_subfolder):  
        os.makedirs(tensorboard_log_subfolder)
    log_writer = SummaryWriter(log_dir=tensorboard_log_subfolder)
    
    # add the model architecture as a graph
    sample_state_batch, _ = next(iter(train_loader))
    log_writer.add_graph(model, sample_state_batch)
    
    
    best_model_path, best_val_loss, best_model_valid_accuracy, epochs_without_improvement = None, float('inf'), -1.0, 0
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    train_losses, valid_losses = [], []
    
    
    for epoch in tqdm(range(epochs), desc='Epochs'):
        model.train()
        correct_train, total_train, train_loss = 0, 0, 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_accuracy = correct_train / total_train
        avg_train_loss = train_loss / len(train_loader)
        
        log_writer.add_scalar("Loss/Train", avg_train_loss, epoch)
        log_writer.add_scalar("Accuracy/Train", train_accuracy, epoch)
        
        model.eval()
        correct_val, total_val, valid_loss = 0, 0, 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = loss_function(outputs, labels)
                valid_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_accuracy = correct_val / total_val
        avg_valid_loss = valid_loss/len(val_loader)
        
        log_writer.add_scalar("Loss/Valid", avg_valid_loss, epoch)
        log_writer.add_scalar("Accuracy/Valid", val_accuracy, epoch)
    
        # average loss per batch
        train_losses.append(train_loss / len(train_loader))
        valid_losses.append(valid_loss / len(val_loader))
        
        # early stopping:
        if valid_loss < best_val_loss:
            #best performing model here, save it:
            best_model_path = os.path.join(log_subfolder, f"best_model.pt")
            torch.save(model.state_dict(), best_model_path)
            print(f"Best model saved at epoch {epoch+1} with validation loss: {valid_loss/len(val_loader):.4f}")
            epochs_without_improvement = 0
            best_val_loss = valid_loss
            best_model_valid_accuracy = val_accuracy
        else:
            epochs_without_improvement += 1
        
        if epochs_without_improvement >= early_stop_epoch_without_improvement:
            break
    
    log_writer.flush()
    log_writer.close()
    
    best_model = copy.deepcopy(model)
    best_model.load_state_dict(torch.load(best_model_path, map_location=device))
    return best_model, best_model_valid_accuracy

In [21]:
class BC(torch.nn.Module):
    def __init__(self, input_neurons: int,
                 hidden_neurons: int,
                 num_hidden_layers: int,
                 out_neurons: int,
                 activation_function: torch.nn.Module = torch.nn.ReLU()):
        super().__init__()

        # Add the first (input) layer + activation function
        layers = [torch.nn.Linear(input_neurons, hidden_neurons),
                  activation_function]

        # Add the hidden layers
        for _ in range(num_hidden_layers):
            layers.append(torch.nn.Linear(hidden_neurons, hidden_neurons))
            layers.append(activation_function)

        layers.append(torch.nn.Linear(hidden_neurons, out_neurons))

        # Combine the layers into a container
        self.network = torch.nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.network(x)


<h2> Testing </h2>

In [22]:
bc_model = BC(input_neurons=8, num_hidden_layers=128, hidden_neurons=256, out_neurons=4)
bc_model(torch.Tensor(rb_observations[0]))

tensor([ 0.0249, -0.0718,  0.0541,  0.0162], grad_fn=<AddBackward0>)

In [23]:
train_loader, test_loader, valid_loader = get_BC_data_loaders(observations=rb_observations[0:1000], actions=rb_actions.flatten()[0:1000])

In [24]:
train_and_evaluate(train_loader=train_loader, val_loader=valid_loader,  model=bc_model,
                optimizer = torch.optim.Adam(bc_model.parameters(), lr=0.01), epochs=12)

Epochs:   0%|          | 0/12 [00:00<?, ?it/s]

Epoch 1/12, Training Loss: 1.3928, Training Accuracy: 0.236052%


Epochs:   8%|▊         | 1/12 [00:02<00:28,  2.61s/it]

Epoch 1/12, Validation Loss: 1.3852, Validation Accuracy: 0.264000%
Best model saved at epoch 1 with validation loss: 1.3852
Epoch 2/12, Training Loss: 1.3858, Training Accuracy: 0.278970%


Epochs:  17%|█▋        | 2/12 [00:05<00:25,  2.55s/it]

Epoch 2/12, Validation Loss: 1.3848, Validation Accuracy: 0.264000%
Best model saved at epoch 2 with validation loss: 1.3848
Epoch 3/12, Training Loss: 1.3860, Training Accuracy: 0.278970%


Epochs:  25%|██▌       | 3/12 [00:07<00:22,  2.50s/it]

Epoch 3/12, Validation Loss: 1.3862, Validation Accuracy: 0.264000%
Epoch 4/12, Training Loss: 1.3851, Training Accuracy: 0.278970%


Epochs:  33%|███▎      | 4/12 [00:10<00:19,  2.48s/it]

Epoch 4/12, Validation Loss: 1.3860, Validation Accuracy: 0.264000%
Epoch 5/12, Training Loss: 1.3846, Training Accuracy: 0.278970%


Epochs:  33%|███▎      | 4/12 [00:12<00:24,  3.11s/it]

Epoch 5/12, Validation Loss: 1.3865, Validation Accuracy: 0.264000%





(BC(
   (network): Sequential(
     (0): Linear(in_features=8, out_features=256, bias=True)
     (1): ReLU()
     (2): Linear(in_features=256, out_features=256, bias=True)
     (3): ReLU()
     (4): Linear(in_features=256, out_features=256, bias=True)
     (5): ReLU()
     (6): Linear(in_features=256, out_features=256, bias=True)
     (7): ReLU()
     (8): Linear(in_features=256, out_features=256, bias=True)
     (9): ReLU()
     (10): Linear(in_features=256, out_features=256, bias=True)
     (11): ReLU()
     (12): Linear(in_features=256, out_features=256, bias=True)
     (13): ReLU()
     (14): Linear(in_features=256, out_features=256, bias=True)
     (15): ReLU()
     (16): Linear(in_features=256, out_features=256, bias=True)
     (17): ReLU()
     (18): Linear(in_features=256, out_features=256, bias=True)
     (19): ReLU()
     (20): Linear(in_features=256, out_features=256, bias=True)
     (21): ReLU()
     (22): Linear(in_features=256, out_features=256, bias=True)
     (23): ReLU