In [10]:
import torch
from torch.nn import Sequential, Linear, ReLU, Softmax
import numpy as np
import json

In [151]:
"""
Simple Neural Net
Initially, input is features, output is action
"""
class SimpleNet:
    def __init__(self):                 
        hidden_size = 64

        self.device = torch.device("cuda")

        self.neural_net = Sequential(
            # feature size by hidden size
            Linear(11, hidden_size),
            ReLU(),
            Linear(hidden_size, hidden_size),
            ReLU(),
            # hidden size by action size
            Linear(hidden_size, 8),
            Softmax(dim=-1)
        ).to(self.device)
        self.optimizer = torch.optim.Adam(self.neural_net.parameters(), lr=1e-3)
        self.gamma = 0.999

        self.reset()


    def reset(self):
        self.ep_feats = []
        self.ep_rewards = []
        self.ep_actions = []

    def forward(self, input_feats):
        # Take as input a featurized state, output action probs
        probs = self.neural_net(input_feats.to(self.device))
        # Randomly select action according to probs
        return probs
    
    def get_action(self, input_feats):
        probs = self.forward(input_feats)
        action = np.random.choice(probs.shape[0], p=probs.cpu().detach().numpy())
        self.ep_feats.append(input_feats)
        self.ep_actions.append(action)
        return action

    def backward(self):
        print("Computing backward pass")
        log_probs = []
        
        # Get log probs for features
        for feats in self.ep_feats[1:]:
            log_probs.append(torch.log(self.forward(feats)))

        # convert log probs and actions to tensors
        log_probs = torch.stack(log_probs).to(self.device)
        actions = torch.IntTensor(self.ep_actions).to(self.device)

        # Compute discounted returns
        disc_rets = torch.zeros(actions.shape[0])
        for i in range(1, len(self.ep_rewards)):
            disc_rets[i-1] = self.discounted_return(self.ep_rewards[i:])
        disc_rets = disc_rets.to(self.device)

        # Compute loss
        # Page 328: http://www.incompleteideas.net/book/RLbook2020.pdf
        sel_log_probs = disc_rets * log_probs[np.arange(len(self.ep_actions)), self.ep_actions]
        loss = -sel_log_probs.mean()
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.reset()

    def discounted_return(self, rewards):
        return np.dot(self.gamma ** np.arange(len(rewards)), rewards)

In [152]:
# Handle json

def convert_to_feats(response_dict):
    """Takes a response dictionary and returns tensor of features"""
    return torch.Tensor(
        (
            response_dict['feat1'],
            response_dict['feat2'],
            response_dict['feat3'],
            response_dict['feat4'],
            response_dict['feat5'],
            response_dict['feat6'],
            response_dict['feat7'],
            response_dict['feat8'],
            response_dict['feat9'],
            response_dict['feat10'],
            response_dict['feat11']
        )
    )

def handle_response(response, net: SimpleNet):
    """Take as input a websocket response from C#, return serialized data to send back"""
    # Initialize response
    instruction = ''
    action = 0
    if response is None:
        print("Response none")
        return {}
    response_dict = json.loads(response)

    # Store the reward for the previous action. 
    # Don't worry about the first one in the array, it will be ignored
    net.ep_rewards.append(response_dict['reward'])

    # Handle forward pass
    if response_dict['instruction'] == 'forward':
        features = convert_to_feats(response_dict)
        action = net.get_action(features)
        instruction = 'step'
    elif response_dict['instruction'] == 'update_weights':
        instruction = 'reset'
        last_feats = convert_to_feats(response_dict)
        net.ep_feats.append(last_feats)
        net.backward()

    json_return_dict = {
        'instruction': instruction,
        'action': action,
    }
    
    return json_return_dict

In [154]:
import socket
from time import time


host, port = "127.0.0.1", 25001

# SOCK_STREAM means TCP socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

neural_net = SimpleNet()
request = {
    'instruction': 'init',
    'action': 0,
}

def on_receive(message):
    print(message)

try:
    # Connect to the server and send the data
    sock.connect((host, port))

    start = 0

    while(True):
        #print(request)
        request = json.dumps(request)
        sock.sendall(request.encode("utf-8"))
        response = sock.recv(1024).decode("utf-8")
        print(f"Took {time() - start:.4f} between, that's {1/(time() - start):.2f} Hz")
        start = time()
        #print(response)
        request = handle_response(response, neural_net)
finally:
    sock.close()

Took 1714165817.0446 between, that's 0.00 Hz
Took 0.0200 between, that's 50.00 Hz
Took 0.0030 between, that's 333.38 Hz
Took 0.0110 between, that's 90.91 Hz
Took 0.0020 between, that's 499.86 Hz
Took 0.0010 between, that's 997.69 Hz
Took 0.0020 between, that's 500.69 Hz
Took 0.0010 between, that's 999.83 Hz
Took 0.0010 between, that's 999.83 Hz
Took 0.0010 between, that's 1000.07 Hz
Took 0.0020 between, that's 500.04 Hz
Took 0.0010 between, that's 997.93 Hz
Took 0.0010 between, that's 1000.55 Hz
Took 0.0010 between, that's 997.93 Hz
Took 0.0010 between, that's 1001.03 Hz
Took 0.0010 between, that's 1000.07 Hz
Took 0.0010 between, that's 1000.31 Hz
Took 0.0020 between, that's 499.92 Hz
Took 0.0010 between, that's 1001.74 Hz
Took 0.0010 between, that's 999.83 Hz
Took 0.0110 between, that's 90.91 Hz
Took 0.0020 between, that's 499.62 Hz
Took 0.0020 between, that's 500.16 Hz
Took 0.0010 between, that's 1001.74 Hz
Took 0.0010 between, that's 999.12 Hz
Took 0.0010 between, that's 999.60 Hz
T