In [None]:
# Mount google drive storage to the notebook.
from google.colab import drive
drive.mount('/content/gdrive/')
initFlag = 0

# Choose the name of the Github folder you want to create in MyDrive.
gitDir = 'Github'

In [None]:
# Initial setup. Execute this cell only when you want to frehsly setup the project.

# Setup cloned repository in google drive for the first time.

#initFlag = 1
#%cd 'gdrive/MyDrive'
#%mkdir $gitDir
#%cd $gitDir
#!git clone 'https://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@github.com/gohmmagn/HJDQN.git'

# Install hjdqn and gym_lqr for the first time.

#%cd 'HJDQN'
#!pip install -e .

In [None]:
# Import dolfinx for Google Colab.
try:
    import dolfinx
except ImportError:
    !wget "https://fem-on-colab.github.io/releases/fenicsx-install-real.sh" -O "/tmp/fenicsx-install.sh" && bash "/tmp/fenicsx-install.sh"
    import dolfinx

In [None]:
# Import PyTorch and check version.
!pip install torch
import torch
print('torch: '+torch.__version__)

# Import the OpenAI Gym and check version.
!pip install gymnasium
import gymnasium as gym
print('gym: '+gym.__version__)

# Install control package to calulate the solution of the riccati equation.
!pip install control
import control
print('control: '+control.__version__)

In [None]:
# Setup gdrive file handling and goto main project directory.
if initFlag == 0:
  subDir = 'gdrive/MyDrive/' + gitDir + '/HJDQN/fileHandling'
else:
  subDir = 'fileHandling'

%cd $subDir
from gdrive_File_Handler import gdriveFileHandler
%cd ..

In [None]:
# To train new gym environments we first need to register them.
%cd 'gym_lqr'
!pip install -e .
%cd ..

In [None]:
# Set the focus of the file handler to the environment specified with the (envId)
#envId = 'NonLinearPDEEnv-v0'
envId = 'Linear1dPDEEnv-v0'
#envId = 'Linear2dPDEEnv-v0'
gdriveFH = gdriveFileHandler(envId)

In [None]:
# Show models of given environment.
df_modelNames = gdriveFH.getModelsOfEnvironment()
df_modelNames

In [None]:
# Merge training and evaluation logs into single files.
gdriveFH.mergeAllLogsOfEnvironment()
#parFileId = 0
#gdriveFH.mergeLogs(df_modelNames['Model name'][k], 'eval_log', parFileId)
#gdriveFH.mergeLogs(df_modelNames['Model name'][k], 'train_log', parFileId)

In [None]:
# Checkout the checkpoints which were saved during model training.
df_checkpoints = gdriveFH.getCheckpointFiles(df_modelNames['Model name'][0])
df_checkpoints

In [None]:
# Checkout the parameters of the given model with the specified parameter file id (parFileId).
parFileId = 0
parameter_list = gdriveFH.getModelParameters(df_modelNames['Model name'][0],parFileId)
parameter_list

In [None]:
# Get an overview of the svaed ricatti solutions.
df_ricatti_solutions = gdriveFH.getRicattiSolutionFiles()
df_ricatti_solutions

In [None]:
# Train hjdqn model.

!python main.py --env='Linear1dPDEEnv-v0' --L=10 --tau=1e-6 --lr=1e-2 --sigma=0.1 --verboseLoopTraining --useExactSolution --useMatCARE --Kpath='K_2023-12-20T151527' --model='Critic_NN1' --algo='hjdqn' --T=2.0 --time_steps=40 --num_checkpoints=10 --noise='gaussian' --max_iter=5e4 --eval_interval=100 --fill_buffer=0 --start_train=400 --batch_size=512 --gamma=0.99999 --smooth

In [None]:
# Calculate state solution.

!python calculateStateSolution.py --algo='hjdqn' --device='cpu' --envId='NonLinearPDEEnv-v0' --T_nl=8 --num_steps_nl=800 --modelName='HJDQN_2024-01-04T120244' --savedModel='HJDQN_2024-01-04T120244_0_17776.pth.tar'

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

modelNames = [df_modelNames['Model name'][65]]#df_modelNames['Model name']#[df_modelNames['Model name'][0]]
modelId = ['0']#["0" for i in range(0,len(modelNames))] #['0']
switch = 1

def plotEvaluation(envId, modelName, modelId, i, switch):
  evallog_path = "{}_{}.csv".format(modelName, modelId)
  outputDirectory = '/content/gdrive/MyDrive/'+ gitDir +'/HJDQN/outputs'
  if switch==0:
    avgCostEvalAndExact = pd.read_csv('{}/{}/{}/eval_log/{}'.format(outputDirectory, envId, modelName, evallog_path)).values
    print("Minimal difference: {}, modelName: {}, modelId: {}, {}".format(min(np.abs(avgCostEvalAndExact[:,1]-avgCostEvalAndExact[:,2])), i, modelName, avgCostEvalAndExact.shape[0]))
    #return ax1.plot(avgCostEvalAndExact[:,0], np.abs(avgCostEvalAndExact[:,1]-avgCostEvalAndExact[:,2]))
    return ax1.plot(avgCostEvalAndExact[:,0], np.log(avgCostEvalAndExact[:,1]/avgCostEvalAndExact[:,2]))
  elif switch==1:
    trainlog = pd.read_csv('{}/{}/{}/train_log/{}'.format(outputDirectory, envId, modelName, evallog_path)).values
    print("Minimal value: {}, modelName: {}, modelId: {}".format(min(np.abs(trainlog[:,1])), i, modelName))
    return ax1.plot(trainlog[:,0], -np.log(np.abs(trainlog[:,1])))
  elif switch==2:
    trainlog = pd.read_csv('{}/{}/{}/train_log/{}'.format(outputDirectory, envId, modelName, evallog_path)).values
    sumOfRewards = np.sum(np.log(np.abs(trainlog[:,1])))
    #print(sumOfRewards)
    return sumOfRewards

if switch!=2:
  for i in range(0,len(modelNames)):
    fig = plt.figure()
    ax1 = fig.add_subplot()
    ax1.set_ylabel('abs(Average Return - Average Exact Return)')
    ax1.set_xlabel('steps')
    #for i in range(0,len(modelNames)):
    plotEvaluation(envId, modelNames[i], modelId[i], i, switch)
    plt.show()
else:
  M = 0
  j = 0
  for i in range(0,len(modelNames)):
    sumOfRewards = plotEvaluation(envId, modelNames[i], modelId[i], i, switch)
    if sumOfRewards >= M:
      M = sumOfRewards
      j = i
  print("modelName: {}, modelId: {}, sumOfRewards: {}".format(modelNames[j], j, M))