In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import os
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# 1. Define Neural Network

In [3]:
# Define the neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_size=3, output_size=1):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)  # Input size: 3 (3D float input), Output size: 64
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 32)
        self.fc6 = nn.Linear(32, output_size)  # Output size: 1 (1D float output)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = torch.relu(self.fc5(x))
        x = self.fc6(x)
        return x


# 2. Prepare Training Dataset

In [4]:
# Import dataset
file_path = 'state_action_data1.csv'

if os.path.isfile(file_path):
    df = pd.read_csv(file_path)
else:
    print(f"File '{file_path}' not found.")

original_State = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df.iloc[:,0]])
original_Action = np.array([float(action.strip("[]")) for action in df.iloc[:,1]])
original_NextState = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df.iloc[:,2]])
X = torch.tensor(original_State, dtype=torch.float32)
y = torch.tensor(original_Action, dtype=torch.float32)
df.head()

Unnamed: 0,State,Action,NextState
0,[-0.91937816 0.39337495 0.6090173 ],[2.],[-0.9413804 0.3373469 1.2040485]
1,[-0.9413804 0.3373469 1.2040485],[2.],[-0.9673487 0.25344923 1.7570587 ]
2,[-0.9673487 0.25344923 1.7570587 ],[2.],[-0.9896661 0.14339098 2.2471457 ]
3,[-0.9896661 0.14339098 2.2471457 ],[2.],[-0.99993783 0.01115229 2.6546888 ]
4,[-0.99993783 0.01115229 2.6546888 ],[2.],[-0.99063015 -0.13657197 2.963053 ]


# 3. Train and save Policy model 
action = model(state)

In [5]:
# Create an instance of the model
model = NeuralNetwork(3,1)
model.load_state_dict(torch.load('model_1_NN.pth'))
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop 10000 times
for epoch in range(10):
    # Zero the gradients
    optimizer.zero_grad()

    # Forward pass
    y_ = model(X)

    # Calculate loss
    loss = criterion(y_.squeeze(), y)

    # Backward pass and update weights
    loss.backward()
    optimizer.step()

    # Print the loss at every 100 epochs
    if (epoch + 1) % 50 == 0:
        print(f'Epoch [{epoch+1}/10000], Loss: {loss.item():.4f}')

torch.save(model.state_dict(), 'model_1_NN.pth')
print("Model saved successfully!")

Model saved successfully!


# 4. Reload and test model

In [6]:
# Load the saved model
loaded_model = NeuralNetwork()
loaded_model.load_state_dict(torch.load('model_1_NN.pth'))
criterion = nn.MSELoss()
print("Model loaded successfully!")

test_file_path = 'state_action_data2.csv'

if os.path.isfile(test_file_path):
    df = pd.read_csv(test_file_path)
else:
    print(f"File '{test_file_path}' not found.")
X_test = torch.tensor(np.array([np.array(state.strip("[]").split(), dtype=float) for state in df.iloc[:,0]]), dtype=torch.float32)
y_test = np.array([float(action.strip("[]")) for action in df.iloc[:,1]])

# Use the loaded model to make predictions
y_pred = loaded_model(X_test)
test_loss = criterion(y_pred.squeeze(), torch.tensor(y_test, dtype=torch.float32))
print(f"test loss = {test_loss.item():.4f}")
df_pred = pd.DataFrame(y_pred.detach().numpy(), columns=["Predicted"])
df_true = pd.DataFrame(y_test, columns=['True'])
pd.concat([df_pred, df_true], axis=1)

Model loaded successfully!
test loss = 0.0454


Unnamed: 0,Predicted,True
0,1.832116,1.932680
1,1.880634,1.959677
2,1.846952,1.939406
3,1.478491,1.673984
4,-0.161963,0.037763
...,...,...
3251,0.486162,0.564635
3252,-0.576355,-0.529998
3253,0.484327,0.559431
3254,-0.572707,-0.525331


# 5. Test with gym 

In [5]:
import gymnasium as gym
env = gym.make("Pendulum-v1", render_mode="human")

# Load the saved model
model = NeuralNetwork()
model.load_state_dict(torch.load('model_1_NN.pth'))
print("Model loaded successfully!")

for episode in range(5):
    obs = env.reset()[0]
    done = False
    count = 10
    steps = 0
    while not done:
        steps +=1
        action = model(torch.tensor(obs)).detach().numpy()
        env.render()
        obs, reward, done, info, _ = env.step(action)
        if 1 - abs(obs[0]) < 0.001 and abs(action[0]) < 0.6:
            count -=1
            if count <0:
                done = True
                print(episode+1, " steps: ", steps)
env.close()

Model loaded successfully!
1  steps:  12
2  steps:  58
3  steps:  73
4  steps:  75
5  steps:  64


# 6. Dynamic Model
next_state = dynamic_model(state, action)

train with which action, predicted or true label?

### Train with action from dataset
(state[0], state[1], state[2], action)

In [19]:
S_A

tensor([[ 2.7373e+00,  6.0902e-01,  2.0000e+00],
        [ 2.7975e+00,  1.2040e+00,  2.0000e+00],
        [ 2.8853e+00,  1.7571e+00,  2.0000e+00],
        ...,
        [ 4.2568e-03, -4.0410e-02,  5.6147e-01],
        [ 2.2361e-03,  4.0619e-02, -5.2716e-01],
        [ 4.2285e-03, -4.0122e-02,  5.5691e-01]])

In [22]:
# Create an instance of the model
dynamic_model = NeuralNetwork(3,2)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(dynamic_model.parameters(), lr=0.01)

# Prepare dataset
S_A = torch.tensor(np.hstack((np.arccos(original_State[:,0]).reshape(-1, 1), original_State[:,2].reshape(-1, 1), original_Action.reshape(-1, 1))), dtype=torch.float32)
NextS = torch.tensor(np.hstack((np.arccos(original_NextState[:,0]).reshape(-1, 1), original_NextState[:,2].reshape(-1, 1))), dtype=torch.float32)

# dynamic_model.load_state_dict(torch.load('model_2_NN_32.pth'))
# Training loop 4000 times
for epoch in range(2000):
    # Zero the gradients
    optimizer.zero_grad()

    # Forward pass
    NextS_pred = dynamic_model(S_A)

    # Calculate loss
    loss = criterion(NextS_pred.squeeze(), NextS)

    # Backward pass and update weights
    loss.backward()
    optimizer.step()

    # Print the loss at every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/1000], Loss: {loss.item():.4f}')

torch.save(dynamic_model.state_dict(), 'model_2_NN_32.pth')
print("Model saved successfully!")

Epoch [100/1000], Loss: 1.9450
Epoch [200/1000], Loss: 0.5385
Epoch [300/1000], Loss: 0.2117
Epoch [400/1000], Loss: 0.1757
Epoch [500/1000], Loss: 0.1466
Epoch [600/1000], Loss: 0.1170
Epoch [700/1000], Loss: 0.0977
Epoch [800/1000], Loss: 0.0816
Epoch [900/1000], Loss: 0.0694
Epoch [1000/1000], Loss: 0.0614
Epoch [1100/1000], Loss: 0.0568
Epoch [1200/1000], Loss: 0.0545
Epoch [1300/1000], Loss: 0.0531
Epoch [1400/1000], Loss: 0.0522
Epoch [1500/1000], Loss: 0.0515
Epoch [1600/1000], Loss: 0.0510
Epoch [1700/1000], Loss: 0.0504
Epoch [1800/1000], Loss: 0.0500
Epoch [1900/1000], Loss: 0.0496
Epoch [2000/1000], Loss: 0.0491
Model saved successfully!


In [24]:
# # Create an instance of the model
# dynamic_model = NeuralNetwork(4,3)

# # Define the loss function and optimizer
# criterion = nn.MSELoss()
# optimizer = optim.SGD(dynamic_model.parameters(), lr=0.01)

# # Prepare dataset
# S_A = torch.tensor(np.hstack((original_State, original_Action.reshape(-1, 1))), dtype=torch.float32)
# NextS = torch.tensor(original_NextState, dtype=torch.float32)

# dynamic_model.load_state_dict(torch.load('model_2_NN.pth'))
# # Training loop 4000 times
# for epoch in range(2000):
#     # Zero the gradients
#     optimizer.zero_grad()

#     # Forward pass
#     NextS_pred = dynamic_model(S_A)

#     # Calculate loss
#     loss = criterion(NextS_pred.squeeze(), NextS)

#     # Backward pass and update weights
#     loss.backward()
#     optimizer.step()

#     # Print the loss at every 100 epochs
#     if (epoch + 1) % 100 == 0:
#         print(f'Epoch [{epoch+1}/1000], Loss: {loss.item():.4f}')

# torch.save(dynamic_model.state_dict(), 'model_2_NN.pth')
# print("Model saved successfully!")

# 7. True Dynamic Test

In [27]:
test_file_path = 'state_action_data2.csv'

if os.path.isfile(test_file_path):
    df_test = pd.read_csv(test_file_path)
else:
    print(f"File '{test_file_path}' not found.")

test_State = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df_test.iloc[:,0]])
test_Action = np.array([float(action.strip("[]")) for action in df_test.iloc[:,1]])
test_NextState = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df_test.iloc[:,2]])

test_S_A = torch.tensor(np.hstack((test_State, test_Action.reshape(-1, 1))), dtype=torch.float32)
test_NextS = torch.tensor(test_NextState, dtype=torch.float32)

# Use the loaded model to make predictions
NextS_pred = dynamic_model(test_S_A)
test_loss = criterion(NextS_pred.squeeze(), test_NextS)
print(f"test loss = {test_loss.item():.4f}")
df_pred = pd.DataFrame(NextS_pred.detach().numpy(), columns=["Pred_theta", "Pred_angle", "Pred_velocity"])
df_true = pd.DataFrame(test_NextState, columns=['True_theta', 'True_angle', "True_velocity"])
pd.concat([df_pred[40:60], df_true[40:60]], axis=1)

test loss = 0.0016


Unnamed: 0,Pred_theta,Pred_angle,Pred_velocity,True_theta,True_angle,True_velocity
40,1.023956,0.034546,-0.118506,0.999911,0.013311,-0.12625
41,1.010205,0.025549,-0.059558,0.999938,0.011093,-0.044367
42,1.022882,0.03327,-0.074066,0.999976,0.006876,-0.084346
43,1.012388,0.028628,-0.032532,0.999983,0.005903,-0.019444
44,1.020508,0.030184,-0.048751,0.999996,0.002874,-0.06059
45,1.012158,0.029413,-0.013555,0.999996,0.002721,-0.003049
46,1.018574,0.027358,-0.035625,1.0,0.000365,-0.047123
47,1.011081,0.027888,-0.002292,1.0,0.000773,0.008146
48,1.016251,0.024556,-0.028893,0.999999,-0.001217,-0.039786
49,1.009668,0.025391,0.004304,1.0,-0.000425,0.015844


# 8. True Dynamic Test with gym

In [48]:
import gymnasium as gym
env = gym.make("Pendulum-v1", render_mode="human")

# Load the saved model
model = NeuralNetwork(3,1)
model.load_state_dict(torch.load('model_1_NN.pth'))
dynamic_model = NeuralNetwork(3,2)
dynamic_model.load_state_dict(torch.load('model_2_NN_32.pth'))
print("Model loaded successfully!")


for episode in range(5):
    obs = env.reset()[0]
    done = False
    count = 10
    steps = 0
    while not done:
        steps +=1
        state = torch.tensor(obs)
        action = model(state)
        print(torch.cat((np.arccos(state[0]), state[2], action), dim=0))
        NextS_pred = dynamic_model(torch.cat((np.arccos(state[0]), state[2], action)))
        env.render()
        obs, reward, done, info, _ = env.step(action.detach().numpy())
        print(NextS_pred.detach().numpy(), obs)
        if 1 - abs(obs[0]) < 0.001 and abs(action[0]) < 0.6:
            count -=1
            if count <0:
                done = True
                print(episode+1, " steps: ", steps)
env.close()


Model loaded successfully!


RuntimeError: zero-dimensional tensor (at position 0) cannot be concatenated

In [8]:
# import gymnasium as gym
# env = gym.make("Pendulum-v1", render_mode="human")

# # Load the saved model
# model = NeuralNetwork(3,1)
# model.load_state_dict(torch.load('model_1_NN.pth'))
# dynamic_model = NeuralNetwork(4,3)
# dynamic_model.load_state_dict(torch.load('model_2_NN.pth'))
# print("Model loaded successfully!")


# for episode in range(5):
#     obs = env.reset()[0]
#     done = False
#     count = 10
#     steps = 0
#     while not done:
#         steps +=1
#         state = torch.tensor(obs)
#         action = model(state)
#         NextS_pred = dynamic_model(torch.cat((state, action)))
#         env.render()
#         obs, reward, done, info, _ = env.step(action.detach().numpy())
#         print(NextS_pred.detach().numpy(), obs)
#         if 1 - abs(obs[0]) < 0.001 and abs(action[0]) < 0.6:
#             count -=1
#             if count <0:
#                 done = True
#                 print(episode+1, " steps: ", steps)
# env.close()


Model loaded successfully!
[ 0.78451496 -0.11818154  0.6339282 ] [ 0.8967053 -0.442628   0.5845185]
[ 0.77197534 -0.10196419  0.5594414 ] [ 0.9078484  -0.41929856  0.5170948 ]
[ 0.76991975 -0.09283645  0.5091233 ] [ 0.9174498  -0.39785165  0.4699707 ]
[ 0.7731351  -0.08817834  0.47723177] [ 0.92596585 -0.3776073   0.43926203]
[ 0.7800803  -0.08699945  0.4612484 ] [ 0.9337337  -0.35796842  0.42239374]
[ 0.79085517 -0.08908924  0.45902675] [ 0.94099706 -0.33841473  0.41719025]
[ 0.8063433  -0.09444392  0.4681111 ] [ 0.9479194  -0.3185102   0.42148545]
[ 0.8304466  -0.10396992  0.48635596] [ 0.95458    -0.29795465  0.43216348]
[ 0.8691266  -0.11863224  0.5083708 ] [ 0.9609325  -0.27678275  0.4420962 ]
[ 0.9071475  -0.12979606  0.50780606] [ 0.9668895  -0.25519532  0.44789502]
[ 0.9303831 -0.1363222  0.4942011] [ 0.9723887  -0.23336731  0.45021048]
[ 0.9528147  -0.1420021   0.47359073] [ 0.97734207 -0.21166609  0.44519624]
[ 0.9693756  -0.14471777  0.4553075 ] [ 0.98171145 -0.19037499  0.4