#### Testing the data loading for the transition data

In [1]:
from GAME.utils.data_loaders import TransitionDataLoader
from GAME.utils.nn import LinearNeuralNet
from GAME.utils.config import config
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPRegressor
import torch
import torch.nn as nn 
import pickle
import os
import json

In [7]:
config_data = config()
file_path = config_data["output_path"] + "11012022 3DMC Sample Collection 50 Episodes Full Explore\\3DMC_50_episodes_sample_data.csv"
current_state_cols = config_data['3DMC_current_state_transition_df_col_names']
next_state_cols = config_data['3DMC_next_state_transition_df_col_names']
action_col_name = config_data['action_transition_df_col_name']
nn_folder_path = config_data["pickle_path"] + "11012022 3DMC Neural Nets\\"

In [10]:
## nn training parameters
parameters = {
    'hidden_layer_sizes': [(8,), (10,), (20,)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive'],
    'learning_rate_init': [0.001, 0.01, 0.1, 0.2],
    'max_iter': [2500]
}

actions = [0, 1, 2, 3, 4]
targets = next_state_cols
for action in actions:
    data = TransitionDataLoader(file_path, current_state_cols, next_state_cols, action, action_col_name)
    for target in targets:
        print("Evaluating action: {}, target: {}".format(action, target))
        df_with_one_target = data.split_features_targets(target)
        X = df_with_one_target[data.current_state_cols]
        y = df_with_one_target[target]
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # mlp = MLPRegressor()
        # clf = GridSearchCV(mlp, parameters)

        # clf.fit(X_val, y_val)

        param_file = nn_folder_path + 'a{}_s{}_3DMC_cv_results.txt'.format(action, 'Next_' + '_'.join(target.split('-')[1:]))
        with open(param_file, 'r') as f:
            network_params = json.loads(f.readline())

        best_mlp = MLPRegressor(hidden_layer_sizes=network_params['hidden_layer_sizes'], 
            activation=network_params['activation'], 
            learning_rate=network_params['learning_rate'], 
            learning_rate_init=network_params['learning_rate_init'], 
            solver=network_params['solver'], 
            random_state=42, 
        max_iter=5000)

        final_mlp = best_mlp.fit(X_train, y_train)

        # save crossval results and model
        nn_cv_results_filename = 'a{}--s{}.txt'.format(action, target)
        nn_model_filename = 'a{}--s{}.pickle'.format(action, target)
        with open(os.path.join(nn_folder_path, nn_cv_results_filename), 'w') as f:
            f.write(json.dumps(network_params))
        with open(os.path.join(nn_folder_path, nn_model_filename), 'wb') as f:
            pickle.dump(final_mlp, f)

        print(network_params)

Evaluating action: 0, target: Next-x_position
{'activation': 'relu', 'hidden_layer_sizes': [10], 'learning_rate': 'adaptive', 'learning_rate_init': 0.1, 'max_iter': 2500, 'solver': 'adam'}
Evaluating action: 0, target: Next-x_velocity
{'activation': 'logistic', 'hidden_layer_sizes': [20], 'learning_rate': 'adaptive', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Evaluating action: 0, target: Next-y_position
{'activation': 'relu', 'hidden_layer_sizes': [10], 'learning_rate': 'constant', 'learning_rate_init': 0.2, 'max_iter': 2500, 'solver': 'adam'}
Evaluating action: 0, target: Next-y_velocity
{'activation': 'logistic', 'hidden_layer_sizes': [10], 'learning_rate': 'adaptive', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Evaluating action: 1, target: Next-x_position
{'activation': 'relu', 'hidden_layer_sizes': [10], 'learning_rate': 'adaptive', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Evaluating action: 1, target: Next-x_velocit

#### Splitting data into train and validation sets

In [5]:
X = df_with_one_target[data.current_state_cols]
y = df_with_one_target[target]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

#### Grid Search CV

In [6]:
parameters = {
    'hidden_layer_sizes': [(8,), (10,), (20,)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive'],
    'learning_rate_init': [0.001, 0.01, 0.1, 0.2],
}
mlp = MLPRegressor()
clf = GridSearchCV(mlp, parameters)

In [None]:
clf.fit(X_val, y_val)

In [8]:
sorted(clf.cv_results_.keys())

['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'param_activation',
 'param_hidden_layer_sizes',
 'param_learning_rate',
 'param_learning_rate_init',
 'param_solver',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split1_test_score',
 'split2_test_score',
 'split3_test_score',
 'split4_test_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score']

In [13]:
clf.best_params_

{'activation': 'relu',
 'hidden_layer_sizes': (8,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.1,
 'solver': 'adam'}

In [15]:
best_mlp = MLPRegressor(hidden_layer_sizes=(8,), activation='relu', learning_rate='constant', learning_rate_init=0.1, solver='adam', random_state=42, max_iter=500)

In [16]:
best_mlp = best_mlp.fit(X_train, y_train)

In [17]:
best_mlp.score(X_train, y_train)

0.9981000749879521

In [18]:
device = 'cpu'
model = LinearNeuralNet(4, 8, 1).to(device)

In [21]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [None]:
n_total_steps = len(X_train)
num_epochs = 500
i = 0
for epoch in range(num_epochs):
    for index, row in X_train.iterrows():  
        # origin shape: [100, 1, 28, 28]
        # resized: [100, 784]
        input = np.array(row)
        labels = y_train[index]
        # Forward pass
        outputs = model(input)
        loss = criterion(outputs, labels)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
        i += 1

In [34]:
X_train

Unnamed: 0,Current_x_position,Current_x_velocity,Current_y_position,Current_y_velocity
13534,-0.446652,0.005859,-0.345691,0.011334
15767,-0.760550,-0.007376,-0.361183,-0.023958
15189,-1.083762,0.022334,-0.333265,0.007612
4810,-0.485702,0.021343,-0.497159,-0.025896
7475,-0.434994,-0.015725,-0.273362,-0.005274
...,...,...,...,...
6265,-0.136792,0.005597,-0.415978,0.021700
11284,-0.920653,-0.026118,-0.804433,0.020909
38158,-0.202016,-0.020679,-0.503067,0.041462
860,-0.885014,-0.012493,-0.765107,0.008429


In [35]:
best_mlp.predict(X_train)

array([-0.42785933, -0.75584796, -1.05087529, ..., -0.20736411,
       -0.88447452, -0.37080822])

In [36]:
y_train

13534   -0.442364
15767   -0.767295
15189   -1.059943
4810    -0.465643
7475    -0.452376
           ...   
6265    -0.134488
11284   -0.945450
38158   -0.225750
860     -0.896298
15795   -0.387359
Name: Next_x_position, Length: 35457, dtype: float64

In [37]:
import pickle

In [38]:
pickle.dump(best_mlp, open('test.pickle', 'wb'))

In [39]:
best_mlp_2 = pickle.load(open('test.pickle', 'rb'))

In [40]:
best_mlp_2.score(X_train, y_train)

0.9981000749879521