#### Testing the data loading for the transition data

In [1]:
from GAME.utils.data_loaders import TransitionDataLoader
from GAME.utils.nn import LinearNeuralNet
from GAME.utils.config import config
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
import pickle
import os
import json

In [2]:
config_data = config()
file_path = os.path.join(config_data["output_path"], "11012022 3DMC Sample Collection 50 Episodes Full Explore", "3DMC_50_episodes_sample_data.csv")
current_state_cols = config_data['3DMC_current_state_transition_df_col_names']
next_state_cols = config_data['3DMC_next_state_transition_df_col_names']
action_col_name = config_data['action_transition_df_col_name']
nn_folder_path = os.path.join(config_data["pickle_path"], '01072023 3DMC Transition Approx MSE')

In [3]:
## nn training parameters
parameters = {
    'hidden_layer_sizes': [(8,), (10,), (20,)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive'],
    'learning_rate_init': [0.001, 0.01, 0.1, 0.2],
    'max_iter': [2500]
}

parameters = {
    'hidden_layer_sizes': [(8,), (10,), (20,), (8, 8), (10, 10)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['adam'],
    'learning_rate': ['constant'],
    'learning_rate_init': [0.001, 0.01, 0.1, 0.2],
    'max_iter': [2500]
}

actions = [0, 1, 2, 3, 4]
targets = next_state_cols
for action in actions:
    data = TransitionDataLoader(file_path, current_state_cols, next_state_cols, action, action_col_name)
    for target in targets:
        print("Evaluating action: {}, target: {}".format(action, target))
        df_with_one_target = data.split_features_targets(target).copy(deep=True)
        feature_scaler = MinMaxScaler()
        target_scaler = MinMaxScaler()
        X = df_with_one_target[data.current_state_cols]
        y = df_with_one_target[target]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=420)

        # scale
        # feature_scaler.fit(X_train)
        # target_scaler.fit(y_train)
        # X_train = feature_scaler.transform(X_train)
        # y_train = target_scaler.transform(y_train)
        
        mlp = MLPRegressor()
        clf = GridSearchCV(mlp, parameters, scoring = 'neg_mean_squared_error')

        clf.fit(X_train, y_train)

        # param_file = nn_folder_path + 'a{}_s{}_3DMC_cv_results.txt'.format(action, 'Next_' + '_'.join(target.split('-')[1:]))
        # with open(param_file, 'r') as f:
        #     network_params = json.loads(f.readline())

        network_params = clf.best_params_

        best_mlp = MLPRegressor(hidden_layer_sizes=network_params['hidden_layer_sizes'], 
            activation=network_params['activation'], 
            learning_rate=network_params['learning_rate'], 
            learning_rate_init=network_params['learning_rate_init'], 
            solver=network_params['solver'], 
            random_state=609, 
        max_iter=2500)

        # X_test = feature_scaler.transform(X_test)
        # y_test = target_scaler.transform(y_test)        

        final_mlp = best_mlp.fit(X_train, y_train)

        # save crossval results and model
        nn_cv_params_filename = 'a{}--s{}--params.txt'.format(action, target)
        nn_test_results_filename = 'a{}--s{}--results.txt'.format(action, target)
        nn_model_filename = 'a{}--s{}.pickle'.format(action, target)
        with open(os.path.join(nn_folder_path, nn_cv_params_filename), 'w') as f:
            f.write(json.dumps(network_params))
        with open(os.path.join(nn_folder_path, nn_model_filename), 'wb') as f:
            pickle.dump(final_mlp, f)
        with open(os.path.join(nn_folder_path, nn_test_results_filename), 'w') as f:
            f.write('Test results: {}'.format(1 - mean_squared_error(final_mlp.predict(X_test), y_test)))            

        print(network_params)
        print('Test results: {}'.format(1 - mean_squared_error(final_mlp.predict(X_test), y_test)))

Evaluating action: 0, target: Next-x_position
{'activation': 'relu', 'hidden_layer_sizes': (8, 8), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.999950980536314
Evaluating action: 0, target: Next-x_velocity
{'activation': 'relu', 'hidden_layer_sizes': (10,), 'learning_rate': 'constant', 'learning_rate_init': 0.1, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.9999969438934888
Evaluating action: 0, target: Next-y_position
{'activation': 'relu', 'hidden_layer_sizes': (8, 8), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.9999964282275815
Evaluating action: 0, target: Next-y_velocity
{'activation': 'relu', 'hidden_layer_sizes': (10,), 'learning_rate': 'constant', 'learning_rate_init': 0.1, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.999580084089596
Evaluating action: 1, target: Next-x_position
{'activation': 'relu', 'hidden_layer_sizes': (20,), 'learnin