In [1]:
import os
import numpy as np
import sys
import torch
import copy
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

# navigates to the main project folder
project_root = os.path.abspath(os.path.join(os.path.dirname("__file__"), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)
    
from src.behavior_cloning import *
from src.utils.data_loading import *
print(project_root)

d:\University\5th_Semester\Practical_Work_in_AI\Offline_RL_PW


<h2> Loading Data </h2>

In [2]:
rb_observations, rb_next_observations, rb_actions, rb_rewards, rb_dones = load_data()
fp_observations, fp_next_observations, fp_actions, fp_rewards, fp_dones = load_data('../data/final_policy.npz')
rb_df = load_data_as_df(rb_observations, rb_next_observations, rb_actions, rb_rewards, rb_dones)
fp_df = load_data_as_df(fp_observations, fp_next_observations, fp_actions, fp_rewards, fp_dones)

<h2> Implementations </h2>

<h2> Testing </h2>

In [3]:
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [4]:
train_loader, test_loader, valid_loader = get_BC_data_loaders(observations=rb_observations, actions=rb_actions.flatten(),
                                                              train=0.7, test=0.15, validation=0.15,
                                                              batch_size=32, seed=16)


In [5]:
def get_data(data_loader) -> tuple:
    all_data = []
    all_labels = []

    for data, labels in data_loader:
        all_data.append(data)
        all_labels.append(labels)

    # Convert lists into a single tensor, if needed
    all_data = torch.cat(all_data, dim=0)
    all_labels = torch.cat(all_labels, dim=0)

    return np.array(all_data), np.array(all_labels)

In [None]:
#SVMs
# Define hyperparameter grid
def grid_search_SVMs(X_train: np.array, y_train: np.array, X_valid: np.array, y_valid: np.array, param_grid: dict) -> tuple: # (dict, model, score, logs)
    grid_search_results = []

    best_score = 0
    best_params = None
    best_model = None

    for C in tqdm(param_grid['C']):
        for kernel in param_grid['kernel']:
            for gamma in param_grid['gamma']:
                model = SVC(C=C, kernel=kernel, gamma=gamma)
                model.fit(X_train, y_train)
                
                y_val_pred = model.predict(X_valid)
                score = accuracy_score(y_valid, y_val_pred)
                grid_search_results.append(({'C': C, 'kernel': kernel, 'gamma': gamma}, score))
                
                
                hyperparams = {'C': C, 'kernel': kernel, 'gamma': gamma}
                if score > best_score:
                    best_score = score
                    best_params = hyperparams
                    best_model = model
                # print(f'{hyperparams}: {score}')
                
                if kernel == 'linear' or kernel =='poly':
                    break
                print('Curr Valid Accuracy: {score}')

    print('='*100)
    print("Best hyperparameters:", best_params)
    print("Best validation accuracy:", best_score)
    return best_params, best_model, best_score, grid_search_results

In [None]:
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100], 
    'kernel': ['poly', 'rbf', 'sigmoid'], 
    'gamma': ['scale', 0.001, 0.01, 0.1]
}

print('Classifier type: SVM')

X_train, y_train = get_data(train_loader)
X_test, y_test = get_data(test_loader)
X_valid, y_valid = get_data(valid_loader)

svm_best_hyperparams, svm_best_model, svm_best_valid_score, svm_grid_search_logs_SVM = grid_search_SVMs(X_train=X_train,
                                                                                    y_train=y_train,
                                                                                    X_valid=X_valid,
                                                                                    y_valid=y_valid,
                                                                                    param_grid = param_grid)
y_pred = svm_best_model.predict(X_test)
score = accuracy_score(y_test, y_pred)
print('test accuracy:', score)

Classifier type: SVM


  0%|          | 0/5 [00:00<?, ?it/s]