# Version Info (V5)


Additional features / Modifications

- Separation of Insul / Vent networks (separation of information)

---
V4

- Start from scratch
- modified actions and states to exactly match rules



---
V3

- Switch to class for NN (pretraining)
- Add further training


___

V2
- Add further training
- Check computation accuracy

___


V1
Pretraining code
- Load and train NN

- Deploy for validation

In [None]:
import numpy as np
from policygradient_tf3_dualV import PolicyGradientAgentVent as PGAV# from BP_NN import FCNN
from policygradient_tf3_dualI import PolicyGradientAgentInsul as PGAI# from BP_NN import FCNN

import matplotlib.pyplot as plt
import tensorflow as tf

# Echo server program
import socket
import random
import os
import shutil

import scipy.io as sio
from scipy import io

In [3]:
ID1_ = 'NN_PaperPretrain_v_V101_TE3'
path1 = './{0}'.format(ID1_)

ID2_ = 'NN_PaperPretrain_i_V101_TE3'
path2 = './{0}'.format(ID2_)

In [4]:
def action_combine(a_i, a_v):

    single_action_index = (4*(a_v)+a_i)
    
    if single_action_index > 27:
        print("Error: Large index")
    
    return single_action_index

In [5]:
def byteConv_scratch_V1(rawBA, n_byte):
    
    tfd = np.zeros([1,n_byte])
    rw = 0
    
    for i in range(0,(n_byte)*2,2): # move by two steps
        if rawBA[i] < 200: # positive or negative, (positive)
            tfd[0,int(i/2)] = (rawBA[i]*256)+rawBA[i+1]
        else:
            tfd[0,int(i/2)] = -256*(256-rawBA[i])+rawBA[i+1]
    
    fd = tfd[0,:-1]
    fd = fd/400 # normalize by a factor of 40
    rw = tfd[0,n_byte-1]
    
    
    return fd, rw

In [None]:
def measure_splitter(measure_vector):
    state_ins = np.zeros([1,15])
    state_vent = np.zeros([1,10])
    
    state_vent[0,:5] = measure_vector[:5]
    state_vent[0,5:] = measure_vector[15:]
    
    state_ins[0,:] = measure_vector[5:]
    
    return state_ins, state_vent

In [None]:
# Run first and then run MATLAB script

HOST = 'localhost' 
PORT = 50000 
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((HOST, PORT))
s.listen(1)
print("waiting for response from client at port ",PORT)
conn, addr = s.accept()
print('Connected by', addr)
print('Welcome')

md_size = 21

GLR = 0.00005

agent_v = PGAV(ALPHA=GLR, input_dims=10, GAMMA=0.99,
                            n_actions=7, layer1_size=100, layer2_size=100, chkpt_dir=path1,ID=ID1_)
agent_i = PGAI(ALPHA=GLR, input_dims=15, GAMMA=0.99,
                            n_actions=4, layer1_size=100, layer2_size=100, chkpt_dir=path2,ID=ID2_)

agent_i.load_checkpoint(20000)
agent_v.load_checkpoint(20000)

score_history = []
score = 0
num_episodes = 1001
n_save = 100

# for debugging purposes
action_hist = []
ai_hist = []
av_hist = []

state_hist = []
si_hist = []
sv_hist = []

aip_hist = []
avp_hist = []

while True:
    for i in range(num_episodes):
        print('episode: ', i,'score: ', score)
        done = False
        score = 0

        data = conn.recv(1024)

        if not data: 
            print('Connection terminated (out)')
            if i ==num_episodes:
                done=True
                break
        
        rdt, rw = byteConv_scratch_V1(data,md_size)

        observation = rdt
        ts = 0
        
        while not done:
            
            obs_i, obs_v = measure_splitter(observation)
            
            av, tavp = agent_v.choose_action(obs_v)
            ai, taip = agent_i.choose_action(obs_i)
            
            action = action_combine(ai,av)
            
            sa = int(action) # conversion to int to use .to_bytes() method
            send_action_temp = sa.to_bytes(1,'big') # conversion to bytes
            send_action=send_action_temp
            conn.sendall(send_action)

            data = conn.recv(1024)
            
            action_hist.append(action)
            ai_hist.append(ai)
            av_hist.append(av)
            state_hist.append(observation)
            si_hist.append(obs_i)
            sv_hist.append(obs_v)
            
            if not data: 
                print('Connection terminated (in)')
                if i == num_episodes:
                    done=True
                    break
                

            observation_, reward = byteConv_scratch_V1(data, md_size)

            agent_i.store_transition(obs_i, ai, reward)
            agent_v.store_transition(obs_v, av, reward)
            observation = observation_
            score += reward
            ts += 1
            
            
            
            if ts == 2881:
                done = True
        
            
        score_history.append(score)
        agent_i.learn()
        agent_v.learn()
        if i%n_save == 0:
            agent_i.save_checkpoint(i)
            agent_v.save_checkpoint(i)

waiting for response from client at port  50000
Connected by ('127.0.0.1', 50756)
Welcome
...Loading checkpoint...
INFO:tensorflow:Restoring parameters from ././NN_PaperPretrain_i_V101_TE3/checkpoint/NN_PaperPretrain_i_V101_TE3_20000
...Loading checkpoint...
INFO:tensorflow:Restoring parameters from ././NN_PaperPretrain_v_V101_TE3/checkpoint/NN_PaperPretrain_v_V101_TE3_20000
episode:  0 score:  0
Model saved in file: 0
Model saved in file: 0
episode:  1 score:  12860.0
episode:  2 score:  12976.0
episode:  3 score:  13034.0
episode:  4 score:  13181.0
episode:  5 score:  13089.0
episode:  6 score:  13005.0
episode:  7 score:  13067.0
episode:  8 score:  13091.0
episode:  9 score:  12980.0
episode:  10 score:  13038.0
episode:  11 score:  12954.0
episode:  12 score:  13036.0
episode:  13 score:  12847.0
episode:  14 score:  12997.0
episode:  15 score:  13095.0
episode:  16 score:  12998.0
episode:  17 score:  13176.0
episode:  18 score:  13132.0
episode:  19 score:  13178.0
episode:  20