In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard
import datetime

from other import *

np.random.seed(1024)

In [2]:
class Task:
    def __init__(self, x, y):
        self.name = "Task"
        self.x = x
        self.y = y
        self.reward = np.random.randint(0, 100)

    def __str__(self):
        return f"Task at ({self.x}, {self.y})"

    def __repr__(self):
        return f"Task at ({self.x}, {self.y}) with reward {self.reward}"
    
    def __eq__(self, other):
        return self.x == other.x and self.y == other.y
    
# Robot class with random position and speed initialization
class Robot:
    def __init__(self, grid_size):
        # Random x, y position within grid boundaries
        self.x = np.random.randint(0, grid_size[0])
        self.y = np.random.randint(0, grid_size[1])
        # Random speed between 0.5 and 1.0
        self.speed = np.random.uniform(0.5, 1.0)

    def __repr__(self):
        return f"Robot at ({self.x}, {self.y}) with speed {self.speed:.2f}"

In [10]:
grid_size = (10, 10)
num_robots = 3
num_tasks = 5

robots = [Robot(grid_size) for _ in range(num_robots)]

tasks = [Task(
    np.random.randint(0, grid_size[0]),  # random x position
    np.random.randint(0, grid_size[1]))  # random y position
    for _ in range(num_tasks)]

rewards = [task.reward for task in tasks]

# Initialize the r x t x 2 matrix
matrix = np.zeros((num_robots, num_tasks, 2))

for i, robot in enumerate(robots):
    for j, task in enumerate(tasks):
        # Calculate Euclidean distance
        distance = np.sqrt((robot.x - task.x)**2 + (robot.y - task.y)**2)
        
        # Populate the matrix with (speed, distance)
        matrix[i, j] = [robot.speed, distance]
    
print(matrix[1, 1, :])

[0.58529937 7.07106781]


In [22]:
import tensorflow as tf
import datetime
import numpy as np
from tensorflow.keras.callbacks import TensorBoard
import os

# Load TensorBoard extension
%load_ext tensorboard

# Define hyperparameters
target_val = 1000  # Target value to be guessed
num_epochs = 100
steps_per_epoch = 10  # Number of guesses per epoch
learning_rate = 0.01

# Define the input space dimension A
A = 1

# Initialize the model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(A,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), 
    loss='mean_squared_error', 
    metrics=['accuracy']
)

# Create the log directory for TensorBoard
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

# Create a file writer for logging custom scalars
file_writer = tf.summary.create_file_writer(logdir)

# Main training loop
for epoch in range(num_epochs):
    epoch_reward = 0  # Track the cumulative reward for each epoch
    
    for step in range(steps_per_epoch):
        # Generate a fixed input
        input_val = np.array([[1]], dtype=np.float32)
        
        # Get the model's guess
        guess = model(input_val, training=False)
        
        # Calculate reward as the negative of the absolute difference between guess and target_val
        reward = -abs(guess - target_val)
        
        # Accumulate the reward for logging later
        epoch_reward += reward
        
        # Create a "target" to minimize the loss
        target = np.array([[target_val]], dtype=np.float32)
        
        # Train the model to minimize the difference to the target
        model.fit(input_val, target, epochs=1, verbose=0, callbacks=[tensorboard_callback])
    
    # Calculate the average reward for the epoch
    avg_reward = epoch_reward / steps_per_epoch
    
    # Log the reward to TensorBoard
    with file_writer.as_default():
        tf.summary.scalar('Average Reward', avg_reward[0][0], step=epoch)

    # Print progress at the end of each epoch
    print(f"Epoch {epoch+1}/{num_epochs}: Last guess = {guess[0][0]:.2f}, Target = {target_val}, Avg Reward = {avg_reward[0][0]:.2f}")

# Close the file writer when training is complete
file_writer.close()

# Launch TensorBoard within the notebook
%tensorboard --logdir logs


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
Epoch 1/100: Last guess = 1.88, Target = 1000, Avg Reward = -999.13
Epoch 2/100: Last guess = 6.81, Target = 1000, Avg Reward = -995.79
Epoch 3/100: Last guess = 18.28, Target = 1000, Avg Reward = -987.60
Epoch 4/100: Last guess = 41.23, Target = 1000, Avg Reward = -970.29
Epoch 5/100: Last guess = 82.13, Target = 1000, Avg Reward = -938.06
Epoch 6/100: Last guess = 148.57, Target = 1000, Avg Reward = -883.76
Epoch 7/100: Last guess = 249.41, Target = 1000, Avg Reward = -799.23
Epoch 8/100: Last guess = 392.28, Target = 1000, Avg Reward = -675.63
Epoch 9/100: Last guess = 576.97, Target = 1000, Avg Reward = -509.23
Epoch 10/100: Last guess = 785.46, Target = 1000, Avg Reward = -308.82
Epoch 11/100: Last guess = 967.52, Target = 1000, Avg Reward = -109.28
Epoch 12/100: Last guess = 1057.57, Target = 1000, Avg Reward = -31.11
Epoch 13/100: Last guess = 1052.30, Target = 1000, Avg Reward = -60.26
Epo

KeyboardInterrupt: 