In [1]:
import gym
from gym import spaces
import numpy as np

In [2]:
# Define the equations of motion for the CRTBP environment 
class CRTBPEnv(gym.Env):
    def __init__(self, initial_condition, finial_state, mu):
        self.initial_condition = initial_condition
        self.finial_state = finial_state
        self.state = None
        if mu == None:
            self.mu = 0.012277471
        else:
            self.mu = mu
        # Define the action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        self.action_space = spaces.Box(low=-.1, high=.1, shape=(2,), dtype=np.float32)
        # Example for using image as input:
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(6,), dtype=np.float32)

    def step(self, action):
        mu = self.mu
        # CRTBP equations in X-Y plane
        # X = x, y, vx, vy
        # U = mu
        # dot_X = f(X, U) = AX + BU
        # y = Cx + DU
        X = self.state
        A = np.array([[0, 0, 1, 0],
                        [0, 0, 0, 1],
                        [1-(1-mu)/((X[0]+mu)**2+X[1]**2)**(3/2)-mu/((X[0]-1+mu)**2+X[1]**2)**(3/2), 0, 0 , 2],
                        [0, 1-(1-mu)/((X[0]+mu)**2+X[1]**2)**(3/2)-mu/((X[0]-1+mu)**2+X[1]**2)**(3/2), -2, 0]])
        B = np.array([[0],
                     [0],
                     [mu*(1-mu)/((X[0]+mu)**2+X[1]**2)**(3/2)+mu*(1-mu)/((X[0]-1+mu)**2+X[1]**2)**(3/2)],
                        [0]])
        C = np.array([[1, 0, 0, 0],
                        [0, 1, 0, 0]])
        D = np.array([0, 0])
        dot_X = np.dot(A, X) + B + np.array([0, 0, action[0], action[1]])

        observation = X + dot_X

        # Calculate the reward
        reward = -np.linalg.norm(observation - self.finial_state)

        # Check if the episode is done
        done = bool(np.linalg.norm(observation - self.finial_state) < 0.001)

        info = {}
        return observation, reward, done, info

    def reset(self):
        # Reset the state of the environment to an initial state
        self.state = self.initial_condition
        return self.initial_condition

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        pass

In [4]:
# create earth moon three body problem #
# initial condition #
initial_condition = np.array([0.994, 0, 0, -2.00158510637908252240537862224])
# finial state #
finial_state = np.array([0.994, 0, 0, -2.00158510637908252240537862224])
# create the environment #
env = CRTBPEnv(initial_condition, finial_state, mu = 0.012277471)
# reset the environment #
env.reset()
# take a random action #
action = env.action_space.sample()
# take a step #
observation, reward, done, info = env.step(action)
# print the observation #
print(observation)
# print the reward #
print(reward)
# print the done #
print(done)
# print the info #
print(info)
# print the action #
print(action)

[[ 9.94000000e-01 -2.00158511e+00 -4.93373545e+04 -1.99813006e+00]
 [ 9.94000000e-01 -2.00158511e+00 -4.93373545e+04 -1.99813006e+00]
 [ 4.90228598e+04  4.90198642e+04 -3.15488676e+02  4.90198677e+04]
 [ 9.94000000e-01 -2.00158511e+00 -4.93373545e+04 -1.99813006e+00]]
-120465.1703843846
False
{}
[0.03054477 0.00345505]


In [None]:
# Define the 3 body problem transfer reinforcment learning agent
class ThreeBodyProblemAgent():
    def __init__(self, env):
        self.is_discrete = \
            type(env.action_space) == gym.spaces.discrete.Discrete
        if self.is_discrete:
            self.action_size = env.action_space.n
            print("Action size:", self.action_size)
        
        else:
            self.action_low = env.action_space.low
            self.action_high = env.action_space.high
            self.action_shape = env.action_space.shape
            print("Action range:", self.action_low, self.action_high)
        
    def get_action(self, state):
        if self.is_discrete:
            action = np.random.choice(range(self.action_size))
        else:
            action = np.random.uniform(self.action_low, self.action_high, self.action_shape)
        return action
    
# create the agent #
agent = ThreeBodyProblemAgent(env)
# get the action #
action = agent.get_action(env.reset())
# print the action #
print(action)

In [None]:
# agent learning #
import numpy as np
import gym
import random
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from collections import deque

class DQNAgent:
    def __init__(self, env):
        # Initialize parameters
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.shape[0]
        self.action_low = env.action_space.low
        self.action_high = env.action_space.high
        
        # Initialize memory
        self.memory = deque(maxlen=100000)

        # Initialize discount and exploration rate
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01

        # Initialize model
        self.model = self.build_model()

    def build_model(self):
        # Build a neural network
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(48, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001))
        return model

    def remember(self, state, action, reward, next_state, done):
        # Store experiences in memory
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        # Choose an action based on given state
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            return np.argmax(self.model.predict(state)[0])

    def replay(self, batch_size):
        # Train the neural network with experiences in memory
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            if not done:
                target = reward + \
                         self.gamma * np.amax(self.model.predict(next_state)[0])
            else:
                target = reward
            train_target = self.model.predict(state)
            train_target[0][action] = target
            self.model.fit(state, train_target, verbose=0)
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)

    def load(self, name):
        # Load the neural network weights
        self.model.load_weights(name)

    def save(self, name):
        # Save the neural network weights
        self.model.save_weights(name)

# create the agent #
agent = DQNAgent(env)
# print the state size #
print(agent.state_size)
# print the action size #
print(agent.action_size)
# print the action low #
print(agent.action_low)
# print the action high #
print(agent.action_high)
# print the gamma #
print(agent.gamma)
# print the epsilon #
print(agent.epsilon)
# print the epsilon decay #
print(agent.epsilon_decay)
# print the epsilon min #
print(agent.epsilon_min)
# print the model #
print(agent.model)


In [None]:

# classical gym 
import gym
# instead of gym, import gymnasium 
#import gymnasium as gym

# create environment
# env=gym.make('CartPole-v1')

# select the parameters
gamma=1
# probability parameter for the epsilon-greedy approach
epsilon=0.1
# number of training episodes
# NOTE HERE THAT AFTER CERTAIN NUMBERS OF EPISODES, WHEN THE PARAMTERS ARE LEARNED
# THE EPISODE WILL BE LONG, AT THAT POINT YOU CAN STOP THE TRAINING PROCESS BY PRESSING CTRL+C
# DO NOT WORRY, THE PARAMETERS WILL BE MEMORIZED
numberEpisodes=1000

# create an object
LearningQDeep=DeepQLearning(env,0.99,1,10)

# run the learning process
LearningQDeep.trainingEpisodes()
# get the obtained rewards in every episode
LearningQDeep.sumRewardsEpisode

#  summarize the model
LearningQDeep.mainNetwork.summary()

In [None]:
import gym
import numpy as np

# Define the RL agent
class RLAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.q_table = np.zeros((self.state_size, self.action_size))

    def get_action(self, state):
        return np.argmax(self.q_table[state, :])

    def train(self, num_episodes, learning_rate, discount_rate, epsilon):
        for episode in range(num_episodes):
            state = self.env.reset()
            done = False
            while not done:
                # Exploration vs. Exploitation
                if np.random.rand() <= epsilon:
                    action = self.env.action_space.sample()
                else:
                    action = self.get_action(state)

                next_state, reward, done, _ = self.env.step(action)

                # Q-Learning update
                q_value = self.q_table[state, action]
                max_q_value = np.max(self.q_table[next_state, :])
                new_q_value = q_value + learning_rate * (reward + discount_rate * max_q_value - q_value)
                self.q_table[state, action] = new_q_value

                state = next_state

# Create the environment
# env = gym.make('CartPole-v1')

# Create the RL agent
agent = RLAgent(env)

# Set hyperparameters
num_episodes = 1000
learning_rate = 0.1
discount_rate = 0.99
epsilon = 0.1

# Train the agent
agent.train(num_episodes, learning_rate, discount_rate, epsilon)

# Test the agent
total_rewards = 0
num_test_episodes = 10
for _ in range(num_test_episodes):
    state = env.reset()
    done = False
    while not done:
        action = agent.get_action(state)
        state, reward, done, _ = env.step(action)
        total_rewards += reward

average_reward = total_rewards / num_test_episodes
print("Average reward:", average_reward)


In [5]:
import gym
import numpy as np
import tensorflow as tf

# Define the RL agent using TensorFlow
class RLAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(action_size, activation='softmax')
        ])
        # Define any additional components or parameters for your RL agent

    def get_action(self, state):
        state = np.reshape(state, [1, self.state_size])
        action_probs = self.model.predict(state)[0]
        action = np.random.choice(range(self.action_size), p=action_probs)
        return action

    def train(self, state, action, reward, next_state, done):
        state = np.reshape(state, [1, self.state_size])
        next_state = np.reshape(next_state, [1, self.state_size])
        target = reward
        if not done:
            target += np.amax(self.model.predict(next_state)[0])
        target_f = self.model.predict(state)
        target_f[0][action] = target
        self.model.fit(state, target_f, epochs=1, verbose=0)

# Create the three-body problem transfer environment and RL agent
state_size = 4
action_size = 2
agent = RLAgent(state_size, action_size)

# Training loop
num_episodes = 1000
for episode in range(num_episodes):
    state = env.reset()
    done = False
    while not done:
        action = agent.get_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.train(state, action, reward, next_state, done)
        state = next_state


2023-06-23 21:38:48.641617: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




IndexError: invalid index to scalar variable.