In [None]:
import shutil
import os
import numpy as np
import matplotlib.pylab as plt

import sys
sys.path.append(os.path.abspath("../framework"))
sys.path.append(os.path.abspath("../concrete"))

In [None]:
from ConcAgent import ConcAgent
from ConcEnvironment import ConcEnvironment
from ConcAgentFactory import ConcAgentFactory
from ConcAgentMemento import ConcAgentMemento
from ConcBuildOrder import ConcBuildOrder
from ConcStore import ConcStore
from ConcObservation import ConcObservation
from framework import  ObservationSequence 
from framework import TrainId, StoreField
from workUtils import *

# 10. Misc

## 12. load trainlog into a database

In [None]:
trainLogFolderPath = "./trainLog"
cur = loadTrainLog(trainLogFolderPath)

# Table TrainLog(
#     buildOrderId Text, /* accord to a build order */
#     trainId   Text, /* accord to a trained agent along with a simulation time*/
#     timeSimulation Integer,
#     timestamp timestamp
# )

## 14. a function to load gain and bias from the trains over a training:

In [None]:
def getGainAndBiasOverTrain(buildOrderIdGiven):

    concStore = ConcStore()
    agentFactory = ConcAgentFactory()

    y = np.random.randn(1, ConcEnvironment.nPv).astype(np.float32)
    anObservationSequence = ObservationSequence()
    anObservationSequence.add(ConcObservation(y))

    cur.execute('''
    Select
        count(*)
        From TrainLog
        Where buildOrderId = ?
        ''', (buildOrderIdGiven,))
    assert cur.fetchone()[0] > 0, "NO DATA WITH THE GIVEN BUILD ID"

    cur.execute('''
    Select 
        trainId
        , timeSimulation
        , timestamp
        From TrainLog
        Where buildOrderId = ?
        Order By timeSimulation
        ''', (buildOrderIdGiven,))


    TimeSimulation = []
    Gain = []
    Bias = []

    for (trainIdStr, timeSimulation, timestamp) in cur.fetchall():

        # storeField contains: agentMement, buildOrder
        storeField = concStore.load(TrainId(trainIdStr))

        # restore agent
        agent = agentFactory.create(storeField.getBuildOrder())
        agent.loadFromMemento(storeField.getAgentMemento())
        agent(anObservationSequence)

        TimeSimulation.append(timeSimulation)
        
        gain = agent.gainP.weights[0].numpy()
        if len(agent.gainP.weights) > 1:
            bias = agent.gainP.weights[1].numpy()
        else:
            bias = np.nan
            
        Gain.append(gain)
        Bias.append(bias)

    TimeSimulation = np.array(TimeSimulation) # (*)
    Gain = np.concatenate(Gain, axis=0) # (*, nMv)
    Bias = np.stack(Bias, axis=0) # (*, nMv)
    
    return TimeSimulation, Gain, Bias

# 20. Observe the trace of gain and bias over a traing

## 22. Choose a build order

In [None]:
cur.execute('''
Select
    Timestamp
    , buildOrderId
    , TrainId
    , count(*)
    From TrainLog
    Group By buildOrderId
    Order by Timestamp desc
    ''')

for row in cur.fetchall():
    print(row)

buildOrderId = lMyKekovolegOLpS

{"trainId": "0vpVNToGu8MXQ7UP", "body": {"timeSimulation": 40959, "agentMemento": {"saveFilePath": "./checkpoint/QfAUj7GQYfdlpj9m"}, "buildOrder": {"nIteration": 65536, "nSeq": 1, "nHorizonValueOptimization": 8, "nIntervalPolicyOptimization": 16, "nBatchPolicyOptimization": 32, "nSaveInterval": 1024, "description": "lMyKekovolegOLpS", "tConstant": 10, "nHiddenValueApproximator": 8, "sdPolicy": 0.1, "nActionsSampledFromPolicy": 8, "amplitudeDv": 0.0, "amplitudePeriodicDv": 1.0, "cyclePeriodicDv": 30, "agentUseBias": false, "learningRatePolicyOptimizer": 0.001, "learningRateValueFunctionOptimizer": 0.01, "weightOnError": 0.1}, "timeStamp": "2020-05-19 15:26:12"}}

buildOrderId = JPe2afqJFmAAnowh

{"trainId": "1kP1RV3pq2ERFMNd", "body": {"timeSimulation": 46079, "agentMemento": {"saveFilePath": "./checkpoint/T1IwYbdrSuiahqBs"}, "buildOrder": {"nIteration": 65536, "nSeq": 1, "nHorizonValueOptimization": 8, "nIntervalPolicyOptimization": 16, "nBatchPolicyOptimization": 32, "nSaveInterval": 1024, "description": "JPe2afqJFmAAnowh", "tConstant": 10, "nHiddenValueApproximator": 8, "sdPolicy": 0.1, "nActionsSampledFromPolicy": 8, "amplitudeDv": 0.0, "amplitudePeriodicDv": 1.0, "cyclePeriodicDv": 30, "agentUseBias": false, "learningRatePolicyOptimizer": 0.001, "learningRateValueFunctionOptimizer": 0.01, "weightOnError": 0.9}, "timeStamp": "2020-05-19 14:45:20"}}

In [None]:
cur.execute('''
Select
    buildOrderId
    From TrainLog
    Order By timestamp desc
    Limit 2
    ''')

buildOrderIdGiven = cur.fetchone()[0]

In [None]:
#buildOrderIdGiven = "lMyKekovolegOLpS" # weightOnError = 0.1
#buildOrderIdGiven = "JPe2afqJFmAAnowh" # weightOnError = 0.9
buildOrderIdGiven =  "u0oBOHOk2fPO3bgq" # weightOnError = 0.5

## 24. Load the parameters

In [None]:
TimeSimulation, Gain, Bias = getGainAndBiasOverTrain(buildOrderIdGiven)

## 26. Plot the trace of Gain and Bias along with the simulation time

In [None]:
fig = plt.figure()
fig.clf()
#
ax = fig.add_subplot(2,1,1)
ax.plot(TimeSimulation, Gain, 'o-')
ax.set_title('Gain')
ax.axhline(y = 0, color = "k", linestyle = "--")
ax.set_xlim(0, 2**16)
#
ax = fig.add_subplot(2,1,2)
ax.plot(TimeSimulation, Bias, 'o-')
ax.set_title('Bias')
#
plt.tight_layout()