#### Testing the data loading for the transition data

In [1]:
from GAME.utils.data_loaders import TransitionDataLoader
from GAME.utils.nn import LinearNeuralNet
from GAME.utils.config import config
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn 
import pickle
import os
import json

In [6]:
config_data = config()
file_path = config_data["output_path"] + "\\11012022 3DMC Sample Collection 50 Episodes Full Explore\\3DMC_50_episodes_sample_data.csv"
current_state_cols = config_data['3DMC_current_state_transition_df_col_names']
next_state_cols = config_data['3DMC_next_state_transition_df_col_names']
action_col_name = config_data['action_transition_df_col_name']
nn_folder_path = os.path.join(config_data["pickle_path"], 'neural_nets', 'mountain_car', '11142022 MC3D Neural Nets')

In [8]:
## nn training parameters
parameters = {
    'hidden_layer_sizes': [(8,), (10,), (20,)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive'],
    'learning_rate_init': [0.001, 0.01, 0.1, 0.2],
    'max_iter': [2500]
}

parameters = {
    'hidden_layer_sizes': [(8,), (10,), (20,)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['adam'],
    'learning_rate': ['constant'],
    'learning_rate_init': [0.001, 0.01, 0.1, 0.2],
    'max_iter': [2500]
}

actions = [0, 1, 2, 3, 4]
targets = next_state_cols
for action in actions:
    data = TransitionDataLoader(file_path, current_state_cols, next_state_cols, action, action_col_name)
    for target in targets:
        print("Evaluating action: {}, target: {}".format(action, target))
        df_with_one_target = data.split_features_targets(target).copy(deep=True)
        feature_scaler = MinMaxScaler()
        target_scaler = MinMaxScaler()
        X = df_with_one_target[data.current_state_cols]
        y = df_with_one_target[target]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # scale
        feature_scaler.fit(X_train)
        # target_scaler.fit(y_train)
        X_train = feature_scaler.transform(X_train)
        # y_train = target_scaler.transform(y_train)
        
        mlp = MLPRegressor()
        clf = GridSearchCV(mlp, parameters)

        clf.fit(X_train, y_train)

        # param_file = nn_folder_path + 'a{}_s{}_3DMC_cv_results.txt'.format(action, 'Next_' + '_'.join(target.split('-')[1:]))
        # with open(param_file, 'r') as f:
        #     network_params = json.loads(f.readline())

        network_params = clf.best_params_

        best_mlp = MLPRegressor(hidden_layer_sizes=network_params['hidden_layer_sizes'], 
            activation=network_params['activation'], 
            learning_rate=network_params['learning_rate'], 
            learning_rate_init=network_params['learning_rate_init'], 
            solver=network_params['solver'], 
            random_state=420, 
        max_iter=2500)

        X_test = feature_scaler.transform(X_test)
        # y_test = target_scaler.transform(y_test)        

        final_mlp = best_mlp.fit(X_train, y_train)

        # save crossval results and model
        nn_cv_params_filename = 'a{}--s{}--params.txt'.format(action, target)
        nn_test_results_filename = 'a{}--s{}--results.txt'.format(action, target)
        nn_model_filename = 'a{}--s{}.pickle'.format(action, target)
        with open(os.path.join(nn_folder_path, nn_cv_params_filename), 'w') as f:
            f.write(json.dumps(network_params))
        with open(os.path.join(nn_folder_path, nn_model_filename), 'wb') as f:
            pickle.dump(final_mlp, f)
        with open(os.path.join(nn_folder_path, nn_test_results_filename), 'w') as f:
            f.write('Test results: {}'.format(final_mlp.score(X_test, y_test)))            

        print(network_params)
        print('Test results: {}'.format(final_mlp.score(X_test, y_test)))

Evaluating action: 0, target: Next-x_position
{'activation': 'relu', 'hidden_layer_sizes': (10,), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.9999068011651299
Evaluating action: 0, target: Next-x_velocity
{'activation': 'logistic', 'hidden_layer_sizes': (10,), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.9896835378278072
Evaluating action: 0, target: Next-y_position
{'activation': 'relu', 'hidden_layer_sizes': (8,), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.999993599878603
Evaluating action: 0, target: Next-y_velocity
{'activation': 'relu', 'hidden_layer_sizes': (20,), 'learning_rate': 'constant', 'learning_rate_init': 0.1, 'max_iter': 2500, 'solver': 'adam'}
Test results: 0.999172266798462
Evaluating action: 1, target: Next-x_position
{'activation': 'relu', 'hidden_layer_sizes': (10,), 'learn

#### Splitting data into train and validation sets

In [5]:
X = df_with_one_target[data.current_state_cols]
y = df_with_one_target[target]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

#### Grid Search CV

In [6]:
parameters = {
    'hidden_layer_sizes': [(8,), (10,), (20,)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant', 'adaptive'],
    'learning_rate_init': [0.001, 0.01, 0.1, 0.2],
}
mlp = MLPRegressor()
clf = GridSearchCV(mlp, parameters)

In [None]:
clf.fit(X_val, y_val)

In [8]:
sorted(clf.cv_results_.keys())

['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'param_activation',
 'param_hidden_layer_sizes',
 'param_learning_rate',
 'param_learning_rate_init',
 'param_solver',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split1_test_score',
 'split2_test_score',
 'split3_test_score',
 'split4_test_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score']

In [13]:
clf.best_params_

{'activation': 'relu',
 'hidden_layer_sizes': (8,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.1,
 'solver': 'adam'}

In [15]:
best_mlp = MLPRegressor(hidden_layer_sizes=(8,), activation='relu', learning_rate='constant', learning_rate_init=0.1, solver='adam', random_state=42, max_iter=500)

In [16]:
best_mlp = best_mlp.fit(X_train, y_train)

In [17]:
best_mlp.score(X_train, y_train)

0.9981000749879521

In [18]:
device = 'cpu'
model = LinearNeuralNet(4, 8, 1).to(device)

In [21]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [None]:
n_total_steps = len(X_train)
num_epochs = 500
i = 0
for epoch in range(num_epochs):
    for index, row in X_train.iterrows():  
        # origin shape: [100, 1, 28, 28]
        # resized: [100, 784]
        input = np.array(row)
        labels = y_train[index]
        # Forward pass
        outputs = model(input)
        loss = criterion(outputs, labels)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
        i += 1

In [34]:
X_train

Unnamed: 0,Current_x_position,Current_x_velocity,Current_y_position,Current_y_velocity
13534,-0.446652,0.005859,-0.345691,0.011334
15767,-0.760550,-0.007376,-0.361183,-0.023958
15189,-1.083762,0.022334,-0.333265,0.007612
4810,-0.485702,0.021343,-0.497159,-0.025896
7475,-0.434994,-0.015725,-0.273362,-0.005274
...,...,...,...,...
6265,-0.136792,0.005597,-0.415978,0.021700
11284,-0.920653,-0.026118,-0.804433,0.020909
38158,-0.202016,-0.020679,-0.503067,0.041462
860,-0.885014,-0.012493,-0.765107,0.008429


In [35]:
best_mlp.predict(X_train)

array([-0.42785933, -0.75584796, -1.05087529, ..., -0.20736411,
       -0.88447452, -0.37080822])

In [36]:
y_train

13534   -0.442364
15767   -0.767295
15189   -1.059943
4810    -0.465643
7475    -0.452376
           ...   
6265    -0.134488
11284   -0.945450
38158   -0.225750
860     -0.896298
15795   -0.387359
Name: Next_x_position, Length: 35457, dtype: float64

In [37]:
import pickle

In [38]:
pickle.dump(best_mlp, open('test.pickle', 'wb'))

In [39]:
best_mlp_2 = pickle.load(open('test.pickle', 'rb'))

In [40]:
best_mlp_2.score(X_train, y_train)

0.9981000749879521

#### EDA on 4v3 transition data

In [12]:
import pandas as pd
from GAME.utils.config import config
from GAME.utils.data_loaders import TransitionDataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [3]:
config_data = config()

In [13]:
file_path = os.path.join(config_data['output_path'], '11102022 4v3 6x350 eps random', 'keepaway_4v3_transitions_v3.csv')
current_state_cols = config_data['4v3_current_state_transition_df_col_names']
next_state_cols = config_data['4v3_next_state_transition_df_col_names']
action_col_name = config_data['action_transition_df_col_name']
data = TransitionDataLoader(file_path, current_state_cols, next_state_cols, 0, action_col_name)
target = next_state_cols[0]
df_with_one_target = data.split_features_targets(target)

In [None]:
df_with_one_target.isna().sum()

In [15]:
df_with_one_target.head()

Unnamed: 0,"Current-dist(K1,C)","Current-dist(K1,K2)","Current-dist(K1,K3)","Current-dist(K1,K4)","Current-dist(K1,T1)","Current-dist(K1,T2)","Current-dist(K1,T3)","Current-dist(K2,C)","Current-dist(K3,C)","Current-dist(K4,C)","Current-dist(T1,C)","Current-dist(T2,C)","Current-dist(T3,C)","Current-Min(dist(K2,T1),dist(K2,T2),dist(K2,T3))","Current-Min(dist(K3,T1),dist(K3,T2),dist(K3,T3))","Current-Min(dist(K4,T1),dist(K4,T2),dist(K4,T3))","Current-Min(ang(K2,K1,T1),ang(K2,K1,T2),ang(K2,K1,T3))","Current-Min(ang(K3,K1,T1),ang(K3,K1,T2),ang(K3,K1,T3))","Current-Min(ang(K4,K1,T1),ang(K4,K1,T2),ang(K4,K1,T3))","Next-dist(K1,C)"
0,40.499188,47.057832,78.062421,188.980056,26.100533,55.191169,59.433482,32.844181,50.380351,55.452001,2.100599,12.423837,17.746259,5.946075,40.761488,74.65587,18.08358,45.388051,1.341811,40.513728
1,134.349283,74.720919,274.320161,549.648801,138.73627,196.306423,307.626997,58.903482,145.255928,140.566367,34.065994,39.360529,36.62551,145.062454,217.586078,36.542561,45.942785,51.160952,3.652071,133.319148
2,66.49505,42.181856,100.067254,332.308526,103.295725,112.512884,146.042278,132.697771,145.341098,122.583639,25.868321,6.774272,32.598323,107.689767,165.417905,73.716001,73.367512,77.159517,16.339526,66.404321
3,107.601919,105.659774,204.917707,405.426306,94.034927,108.740057,187.78209,108.312375,62.127871,102.645594,27.260141,29.576257,13.967598,121.40039,35.331293,41.938324,52.360545,24.370342,2.603832,106.06262
4,76.964096,89.29393,97.929444,274.869856,37.507035,98.745222,99.68137,13.290491,3.750926,63.289137,9.974706,7.438985,1.95872,1.848765,0.891221,50.239185,7.510336,5.419946,6.188784,77.225185


In [20]:
trans = StandardScaler()
X = df_with_one_target[data.current_state_cols]
y = df_with_one_target[target]
X = trans.fit_transform(X)
X

array([[-0.8477702 , -0.74911748, -1.32032927, ..., -0.88962398,
        -0.28091109, -1.33480264],
       [ 1.29114884, -0.2132291 ,  1.28670531, ..., -0.17906845,
        -0.12828506, -1.23482449],
       [-0.25530363, -0.84357472, -1.02802304, ...,  0.52040557,
         0.55907436, -0.68576604],
       ...,
       [ 0.52279744,  0.05748036, -1.07813201, ..., -1.31841241,
         0.09436401,  0.80781465],
       [-0.97749439, -1.02435688, -0.55410279, ...,  0.23811576,
         2.01010957,  1.54877008],
       [-1.01498041, -1.2177336 , -0.45738086, ...,  0.14892727,
         2.16858255,  1.80897057]])