In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import os
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# 1. Define Neural Network

In [2]:
# Define the neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_size=3, output_size=1):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)  # Input size: 3 (3D float input), Output size: 64
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 32)
        self.fc6 = nn.Linear(32, output_size)  # Output size: 1 (1D float output)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = torch.relu(self.fc5(x))
        x = self.fc6(x)
        return x


# 2. Prepare Training Dataset

In [4]:
# Import dataset
file_path = 's_a_random.csv'

if os.path.isfile(file_path):
    df = pd.read_csv(file_path)
else:
    print(f"File '{file_path}' not found.")

original_State = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df.iloc[:,0]])
original_Action = np.array([float(action.strip("[]")) for action in df.iloc[:,1]])
original_NextState = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df.iloc[:,2]])
X = torch.tensor(original_State, dtype=torch.float32)
y = torch.tensor(original_Action, dtype=torch.float32)
df.head()

Unnamed: 0,State,Action,NextState
0,[ 0.95710313 0.2897474 -0.6954342 ],[1.7224684],[ 0.960229 0.2792138 -0.21975343]
1,[-0.73346454 0.67972773 -0.45208222],[-0.01455253],[-0.735349 0.6776886 0.05553071]
2,[-0.16427568 -0.9864145 -0.22078486],[0.7562059],[-0.20589861 -0.9785733 -0.8471648 ]
3,[0.9596829 0.28108495 0.37179363],[1.3267918],[0.9479677 0.31836644 0.7816261 ]
4,[-0.9994136 -0.03424074 0.7779218 ],[1.867581],[-0.9963158 -0.08576088 1.0323783 ]


# 3. Train and save Policy model 
action = model(state)

In [None]:
# Create an instance of the model
model = NeuralNetwork(3,1)
model.load_state_dict(torch.load('model_1_NN.pth'))
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop 10000 times
for epoch in range(10):
    # Zero the gradients
    optimizer.zero_grad()

    # Forward pass
    y_ = model(X)

    # Calculate loss
    loss = criterion(y_.squeeze(), y)

    # Backward pass and update weights
    loss.backward()
    optimizer.step()

    # Print the loss at every 100 epochs
    if (epoch + 1) % 50 == 0:
        print(f'Epoch [{epoch+1}/10000], Loss: {loss.item():.4f}')

torch.save(model.state_dict(), 'model_1_NN.pth')
print("Model saved successfully!")

# 4. Reload and test model

In [4]:
# Load the saved model
loaded_model = NeuralNetwork()
loaded_model.load_state_dict(torch.load('model_1_NN.pth'))
criterion = nn.MSELoss()
print("Model loaded successfully!")

test_file_path = 'state_action_data2.csv'

if os.path.isfile(test_file_path):
    df = pd.read_csv(test_file_path)
else:
    print(f"File '{test_file_path}' not found.")
X_test = torch.tensor(np.array([np.array(state.strip("[]").split(), dtype=float) for state in df.iloc[:,0]]), dtype=torch.float32)
y_test = np.array([float(action.strip("[]")) for action in df.iloc[:,1]])

# Use the loaded model to make predictions
y_pred = loaded_model(X_test)
test_loss = criterion(y_pred.squeeze(), torch.tensor(y_test, dtype=torch.float32))
print(f"test loss = {test_loss.item():.4f}")
df_pred = pd.DataFrame(y_pred.detach().numpy(), columns=["Predicted"])
df_true = pd.DataFrame(y_test, columns=['True'])
pd.concat([df_pred, df_true], axis=1)

Model loaded successfully!
test loss = 0.1623


Unnamed: 0,Predicted,True
0,1.800475,1.932680
1,1.849026,1.959677
2,1.813180,1.939406
3,1.346297,1.673984
4,-0.508259,0.037763
...,...,...
3251,0.111452,0.564635
3252,-0.889309,-0.529998
3253,0.106900,0.559431
3254,-0.885603,-0.525331


# 5. Test with gym 

In [5]:
import gymnasium as gym
env = gym.make("Pendulum-v1", render_mode="human")

# Load the saved model
model = NeuralNetwork()
model.load_state_dict(torch.load('model_1_NN.pth'))
print("Model loaded successfully!")

for episode in range(5):
    obs = env.reset()[0]
    done = False
    count = 10
    steps = 0
    while not done:
        steps +=1
        action = model(torch.tensor(obs)).detach().numpy()
        env.render()
        obs, reward, done, info, _ = env.step(action)
        if 1 - abs(obs[0]) < 0.001 and abs(action[0]) < 0.6:
            count -=1
            if count <0:
                done = True
                print(episode+1, " steps: ", steps)
env.close()

Model loaded successfully!
1  steps:  12
2  steps:  58
3  steps:  73
4  steps:  75
5  steps:  64


# 6. Dynamic Model
next_state = dynamic_model(state, action)

train with which action, predicted or true label?

### Train with action from dataset
(state[0], state[1], state[2], action)

In [7]:
# Create an instance of the model
dynamic_model = NeuralNetwork(4,3)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(dynamic_model.parameters(), lr=0.01)

# Prepare dataset
S_A = torch.tensor(np.hstack((original_State, original_Action.reshape(-1, 1))), dtype=torch.float32)
NextS = torch.tensor(original_NextState, dtype=torch.float32)

dynamic_model.load_state_dict(torch.load('model_2_NN.pth'))
# Training loop 4000 times
for epoch in range(2000):
    # Zero the gradients
    optimizer.zero_grad()

    # Forward pass
    NextS_pred = dynamic_model(S_A)

    # Calculate loss
    loss = criterion(NextS_pred.squeeze(), NextS)

    # Backward pass and update weights
    loss.backward()
    optimizer.step()

    # Print the loss at every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/1000], Loss: {loss.item():.4f}')

torch.save(dynamic_model.state_dict(), 'model_2_NN.pth')
print("Model saved successfully!")

Epoch [100/1000], Loss: 0.0036
Epoch [200/1000], Loss: 0.0033
Epoch [300/1000], Loss: 0.0031
Epoch [400/1000], Loss: 0.0029
Epoch [500/1000], Loss: 0.0028
Epoch [600/1000], Loss: 0.0026
Epoch [700/1000], Loss: 0.0025
Epoch [800/1000], Loss: 0.0024
Epoch [900/1000], Loss: 0.0023
Epoch [1000/1000], Loss: 0.0022
Epoch [1100/1000], Loss: 0.0021
Epoch [1200/1000], Loss: 0.0020
Epoch [1300/1000], Loss: 0.0019
Epoch [1400/1000], Loss: 0.0018
Epoch [1500/1000], Loss: 0.0018
Epoch [1600/1000], Loss: 0.0017
Epoch [1700/1000], Loss: 0.0016
Epoch [1800/1000], Loss: 0.0016
Epoch [1900/1000], Loss: 0.0015
Epoch [2000/1000], Loss: 0.0015
Model saved successfully!


# 7. True Dynamic Test

In [8]:
test_file_path = 'state_action_data2.csv'

if os.path.isfile(test_file_path):
    df_test = pd.read_csv(test_file_path)
else:
    print(f"File '{test_file_path}' not found.")

test_State = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df_test.iloc[:,0]])
test_Action = np.array([float(action.strip("[]")) for action in df_test.iloc[:,1]])
test_NextState = np.array([np.array(state.strip("[]").split(), dtype=float) for state in df_test.iloc[:,2]])

test_S_A = torch.tensor(np.hstack((test_State, test_Action.reshape(-1, 1))), dtype=torch.float32)
test_NextS = torch.tensor(test_NextState, dtype=torch.float32)

# Use the loaded model to make predictions
NextS_pred = dynamic_model(test_S_A)
test_loss = criterion(NextS_pred.squeeze(), test_NextS)
print(f"test loss = {test_loss.item():.4f}")
df_pred = pd.DataFrame(NextS_pred.detach().numpy(), columns=["Pred_theta", "Pred_angle", "Pred_velocity"])
df_true = pd.DataFrame(test_NextState, columns=['True_theta', 'True_angle', "True_velocity"])
pd.concat([df_pred[40:60], df_true[40:60]], axis=1)

test loss = 0.0019


Unnamed: 0,Pred_theta,Pred_angle,Pred_velocity,True_theta,True_angle,True_velocity
40,1.027967,0.031741,-0.125057,0.999911,0.013311,-0.12625
41,1.014144,0.025954,-0.065739,0.999938,0.011093,-0.044367
42,1.026677,0.030941,-0.080394,0.999976,0.006876,-0.084346
43,1.016484,0.028903,-0.039143,0.999983,0.005903,-0.019444
44,1.024203,0.027891,-0.055147,0.999996,0.002874,-0.06059
45,1.016186,0.029528,-0.020115,0.999996,0.002721,-0.003049
46,1.022206,0.025058,-0.041962,1.0,0.000365,-0.047123
47,1.015077,0.027996,-0.008795,1.0,0.000773,0.008146
48,1.019843,0.022222,-0.035367,0.999999,-0.001217,-0.039786
49,1.013664,0.025561,-0.002163,1.0,-0.000425,0.015844


# 8. True Dynamic Test with gym

In [9]:
import gymnasium as gym
env = gym.make("Pendulum-v1", render_mode="human")

# Load the saved model
model = NeuralNetwork(3,1)
model.load_state_dict(torch.load('model_1_NN.pth'))
dynamic_model = NeuralNetwork(4,3)
dynamic_model.load_state_dict(torch.load('model_2_NN.pth'))
print("Model loaded successfully!")


for episode in range(5):
    obs = env.reset()[0]
    done = False
    count = 10
    steps = 0
    while not done:
        steps +=1
        state = torch.tensor(obs)
        action = model(state)
        NextS_pred = dynamic_model(torch.cat((state, action)))
        env.render()
        obs, reward, done, info, _ = env.step(action.detach().numpy())
        print(NextS_pred.detach().numpy(), obs)
        if 1 - abs(obs[0]) < 0.001 and abs(action[0]) < 0.6:
            count -=1
            if count <0:
                done = True
                print(episode+1, " steps: ", steps)
env.close()


Model loaded successfully!
[-0.34826702 -0.941996    0.45156395] [-0.34863672 -0.93725795  0.44323868]
[-0.35065877 -0.95853823 -0.19232383] [-0.35791728 -0.9337533  -0.19840606]
[-0.36563265 -0.9190511  -1.026166  ] [-0.40531734 -0.91417605 -1.0257894 ]
[-0.5161614 -0.9131864 -2.022463 ] [-0.49490035 -0.8689497  -2.0078843 ]
[-0.64481133 -0.80529284 -2.9651868 ] [-0.6176086 -0.7864856 -2.9595659]
[-0.8109655  -0.65812105 -3.7967024 ] [-0.75462574 -0.65615547 -3.7877018 ]
[-0.9258825  -0.46678716 -4.52363   ] [-0.8826382 -0.470053  -4.52724  ]
[-0.98856527 -0.22536701 -5.1278124 ] [-0.9732347 -0.2298134 -5.149297 ]
[-0.982156    0.05401915 -5.581967  ] [-0.99880326  0.04890838 -5.6162767 ]
[-0.90317804  0.3384418  -5.8520746 ] [-0.94178104  0.33622688 -5.8795953 ]
[-0.7548757  0.6015621 -5.908253 ] [-0.8027324   0.59633934 -5.9205093 ]
[-0.5459661   0.82800096 -5.74611   ] [-0.60063255  0.7995252  -5.7514243 ]
[-0.32095915  0.9686722  -5.438033  ] [-0.36408338  0.9313664  -5.432871  ]
