# Install needed deps

#### Don't forget to run ```apt-get update --fix-missing && sudo apt-get install build-essential``` and ```apt-get install zlib1g-dev``` in case you are running on an Ubuntu image

In [1]:
%pip install pandas-ta==0.3.14b --pre
%pip install gym==0.21.0
%pip install ipywidgets
%pip install ray
%pip install -U "ray[tune]"
%pip install -U "ray[rllib]"
%pip install -U "ray[serve]"
%pip install ta
%pip install quantstats
%pip install sklearn
%pip install feature_engine
%pip install --upgrade mplfinance
%pip install optuna

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Requirement already up-to-date: mplfinance in ./.venv/lib/python3.8/site-packages (0.12.8b9)
Note: you may need to restart the kernel to use updated packages.


# Prepare and fetch the data

In [3]:
from tensortrade.data.cdd import CryptoDataDownload

import numpy as np
import pandas as pd
pd.options.mode.use_inf_as_na = True

def prepare_data(df):
    df['volume'] = np.int64(df['volume'])
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by='date', ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['date'] = df['date'].dt.strftime('%Y-%m-%d %I:%M %p')
    return df

def fetch_data():
    cdd = CryptoDataDownload()
    bitfinex_data = cdd.fetch("Bitfinex", "USD", "BTC", "1h")
    bitfinex_data = bitfinex_data[['date', 'open', 'high', 'low', 'close', 'volume']]
    bitfinex_data = prepare_data(bitfinex_data)
    return bitfinex_data

def load_csv(filename):
    df = pd.read_csv('data/' + filename, skiprows=1)
    #df.drop(columns=['symbol', 'volume_btc'], inplace=True)

    # Fix timestamp from "2019-10-17 09-AM" to "2019-10-17 09-00-00 AM"
    df['date'] = df['date'].str[:14] + '00-00 ' + df['date'].str[-2:]

    return prepare_data(df)

In [5]:
data = fetch_data()
data

Unnamed: 0,date,open,high,low,close,volume
0,2018-05-15 06:00 AM,8723.800000,8793.000000,8714.9,8739.000000,8988053
1,2018-05-15 07:00 AM,8739.000000,8754.800000,8719.3,8743.000000,2288904
2,2018-05-15 08:00 AM,8743.000000,8743.100000,8653.2,8723.700000,8891773
3,2018-05-15 09:00 AM,8723.700000,8737.800000,8701.2,8708.100000,2054868
4,2018-05-15 10:00 AM,8708.100000,8855.700000,8695.8,8784.400000,17309722
...,...,...,...,...,...,...
34450,2022-04-21 09:00 PM,40605.699426,40861.193731,40573.0,40751.000000,5542905
34451,2022-04-21 10:00 PM,40750.000000,40826.000000,39789.0,40436.000000,22782604
34452,2022-04-21 11:00 PM,40425.000000,40569.000000,40363.0,40502.000000,2919318
34453,2022-04-22 12:00 AM,40502.000000,40682.000000,40245.0,40442.000000,6851412


## Create features for the feed module

In [6]:
import os
import numpy as np
import ta as ta1
import pandas_ta as ta

import quantstats as qs
qs.extend_pandas()

def fix_dataset_inconsistencies_without_backfilling(dataframe, fill_value=None):
    dataframe = dataframe.replace([-np.inf, np.inf], np.nan)

    return dataframe.fillna(axis='index', method='pad').dropna(axis='columns')

def fix_dataset_inconsistencies(dataframe, fill_value=None):
    dataframe = dataframe.replace([-np.inf, np.inf], np.nan)

    #This is done to avoid filling middle holes with backfilling.
    if fill_value is None:
        dataframe.iloc[0,:] = \
            dataframe.apply(lambda column: column.iloc[column.first_valid_index()], axis='index')
    else:
        dataframe.iloc[0,:] = \
            dataframe.iloc[0,:].fillna(fill_value)

    return dataframe.fillna(axis='index', method='pad').dropna(axis='columns')

def rsi(price: 'pd.Series[pd.Float64Dtype]', period: float) -> 'pd.Series[pd.Float64Dtype]':
    r = price.diff()
    upside = np.minimum(r, 0).abs()
    downside = np.maximum(r, 0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)

def macd(price: 'pd.Series[pd.Float64Dtype]', fast: float, slow: float, signal: float) -> 'pd.Series[pd.Float64Dtype]':
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

def generate_all_default_quantstats_features(data):
    excluded_indicators = [
        'compare',
        'greeks',
        'information_ratio',
        'omega',
        'r2',
        'r_squared',
        'rolling_greeks',
        'warn',
    ]
    
    indicators_list = [f for f in dir(qs.stats) if f[0] != '_' and f not in excluded_indicators]
    
    df = data.copy()
    df = df.set_index('date')
    df.index = pd.DatetimeIndex(df.index)

    for indicator_name in indicators_list:
        try:
            #print(indicator_name)
            indicator = qs.stats.__dict__[indicator_name](df['close'])
            if isinstance(indicator, pd.Series):
                indicator = indicator.to_frame(name=indicator_name)
                df = pd.concat([df, indicator], axis='columns')
        except (pd.errors.InvalidIndexError, ValueError):
            pass

    df = df.reset_index()
    return df

def generate_features(data):

    # Generate all default indicators from ta library
    ta1.add_all_ta_features(data, 
                            'open', 
                            'high', 
                            'low', 
                            'close', 
                            'volume', 
                            fillna=True)

    # Naming convention across most technical indicator libraries
    data = data.rename(columns={'open': 'Open', 
                                'high': 'High', 
                                'low': 'Low', 
                                'close': 'Close', 
                                'volume': 'Volume'})
    data = data.set_index('date')

    # Custom indicators
    features = pd.DataFrame.from_dict({
        'prev_open': data['Open'].shift(1),
        'prev_high': data['High'].shift(1),
        'prev_low': data['Low'].shift(1),
        'prev_close': data['Close'].shift(1),
        'prev_volume': data['Volume'].shift(1),
        'vol_5': data['Close'].rolling(window=5).std().abs(),
        'vol_10': data['Close'].rolling(window=10).std().abs(),
        'vol_20': data['Close'].rolling(window=20).std().abs(),
        'vol_30': data['Close'].rolling(window=30).std().abs(),
        'vol_50': data['Close'].rolling(window=50).std().abs(),
        'vol_60': data['Close'].rolling(window=60).std().abs(),
        'vol_100': data['Close'].rolling(window=100).std().abs(),
        'vol_200': data['Close'].rolling(window=200).std().abs(),
        'ma_5': data['Close'].rolling(window=5).mean(),
        'ma_10': data['Close'].rolling(window=10).mean(),
        'ma_20': data['Close'].rolling(window=20).mean(),
        'ma_30': data['Close'].rolling(window=30).mean(),
        'ma_50': data['Close'].rolling(window=50).mean(),
        'ma_60': data['Close'].rolling(window=60).mean(),
        'ma_100': data['Close'].rolling(window=100).mean(),
        'ma_200': data['Close'].rolling(window=200).mean(),
        'ema_5': ta1.trend.ema_indicator(data['Close'], window=5, fillna=True),
        'ema_9': ta1.trend.ema_indicator(data['Close'], window=9, fillna=True),
        'ema_21': ta1.trend.ema_indicator(data['Close'], window=21, fillna=True),
        'ema_60': ta1.trend.ema_indicator(data['Close'], window=60, fillna=True),
        'ema_64': ta1.trend.ema_indicator(data['Close'], window=64, fillna=True),
        'ema_120': ta1.trend.ema_indicator(data['Close'], window=120, fillna=True),
        'lr_open': np.log(data['Open']).diff().fillna(0),
        'lr_high': np.log(data['High']).diff().fillna(0),
        'lr_low': np.log(data['Low']).diff().fillna(0),
        'lr_close': np.log(data['Close']).diff().fillna(0),
        'r_volume': data['Close'].diff().fillna(0),
        'rsi_5': rsi(data['Close'], period=5),
        'rsi_10': rsi(data['Close'], period=10),
        'rsi_100': rsi(data['Close'], period=100),
        'rsi_7': rsi(data['Close'], period=7),
        'rsi_28': rsi(data['Close'], period=28),
        'rsi_6': rsi(data['Close'], period=6),
        'rsi_14': rsi(data['Close'], period=14),
        'rsi_26': rsi(data['Close'], period=24),
        'macd_normal': macd(data['Close'], fast=12, slow=26, signal=9),
        'macd_short': macd(data['Close'], fast=10, slow=50, signal=5),
        'macd_long': macd(data['Close'], fast=200, slow=100, signal=50),
        'macd_wolfpack': macd(data['Close'], fast=3, slow=8, signal=9),
    })

    # Concatenate both manually and automatically generated features
    data = pd.concat([data, features], axis='columns').fillna(method='pad')

    # Remove potential column duplicates
    data = data.loc[:,~data.columns.duplicated()]

    # Revert naming convention
    data = data.rename(columns={'Open': 'open', 
                                'High': 'high', 
                                'Low': 'low', 
                                'Close': 'close', 
                                'Volume': 'volume'})

    data = data.reset_index()

    # Generate all default quantstats features
    df_quantstats = generate_all_default_quantstats_features(data)

    # Concatenate both manually and automatically generated features
    data = pd.concat([data, df_quantstats], axis='columns').fillna(method='pad')

    # Remove potential column duplicates
    data = data.loc[:,~data.columns.duplicated()]

    # A lot of indicators generate NaNs at the beginning of DataFrames, so remove them
    data = data.iloc[200:]
    data = data.reset_index(drop=True)

    data = fix_dataset_inconsistencies_without_backfilling(data, fill_value=None)
    return data

In [7]:
data = generate_features(data)
# remove not needed features
to_drop = ['others_dlr', 'compsum']
data = data.drop(columns=to_drop)
data.shape

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)
  self._psar_up = pd.Series(index=self._psar.index)
  self._psar_down = pd.Series(index=self._psar.index)
  return bound(*args, **kwds)
  return _pd.concat(dfs, 1, sort=True)


(34255, 140)

## Remove features with low variance before splitting the dataset

In [8]:
from sklearn.feature_selection import VarianceThreshold
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
date = data[['date']].copy()
data = data.drop(columns=['date'])
sel.fit(data)
data[data.columns[sel.get_support(indices=True)]]
data = pd.concat([date, data], axis='columns')
data

Unnamed: 0,date,open,high,low,close,volume,volume_adi,volume_obv,volume_cmf,volume_fi,...,rsi_26,macd_normal,macd_short,macd_long,macd_wolfpack,pct_rank,rolling_sharpe,rolling_sortino,rolling_volatility,to_drawdown_series
0,2018-05-23 02:00 PM,7897.300000,7898.800000,7849.8,7877.400000,9341499,-1.219515e+08,-153103304,-0.175983,-1.548039e+08,...,65.542059,11.190548,10.871904,31.873058,19.596642,10.000000,-0.811487,-1.144302,0.072620,-0.103251
1,2018-05-23 03:00 PM,7877.400000,7889.700000,7661.0,7700.000000,23679375,-1.375548e+08,-176782679,-0.228723,-7.327921e+08,...,72.698849,1.333779,-5.426751,34.355233,-24.639480,1.666667,-1.248391,-1.633909,0.079103,-0.123446
2,2018-05-23 04:00 PM,7700.000000,7700.100000,7548.1,7605.400000,42144843,-1.479246e+08,-218927522,-0.216859,-1.197665e+09,...,75.527202,-10.060459,-21.497215,37.504922,-51.837145,1.666667,-1.612964,-2.069373,0.080681,-0.134215
3,2018-05-23 05:00 PM,7605.400000,7623.600000,7441.8,7511.100000,38711817,-1.571235e+08,-257639339,-0.221424,-1.548073e+09,...,77.907846,-21.778972,-36.146245,41.269618,-66.773623,1.666667,-1.797159,-2.272346,0.082309,-0.144950
4,2018-05-23 06:00 PM,7511.100000,7551.600000,7403.0,7489.100000,23046091,-1.534634e+08,-280685430,-0.149460,-1.399351e+09,...,78.418914,-28.422775,-41.976877,44.917996,-57.191729,1.666667,-1.879146,-2.372706,0.082361,-0.147455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34250,2022-04-21 09:00 PM,40605.699426,40861.193731,40573.0,40751.000000,5542905,1.027003e+10,-4027954971,-0.411267,-2.855949e+09,...,59.192397,-192.743017,-234.756296,-204.219091,-241.127344,3.333333,0.452479,0.619429,0.081587,-0.405971
34251,2022-04-21 10:00 PM,40750.000000,40826.000000,39789.0,40436.000000,22782604,1.027568e+10,-4050737575,-0.330010,-3.473173e+09,...,61.997754,-215.485628,-250.477048,-189.287005,-211.886326,1.666667,0.165956,0.225038,0.082163,-0.410563
34252,2022-04-21 11:00 PM,40425.000000,40569.000000,40363.0,40502.000000,2919318,1.027670e+10,-4047818257,-0.326317,-2.949480e+09,...,61.079716,-214.354627,-233.575998,-175.784013,-119.538792,3.333333,0.246678,0.334725,0.082156,-0.409600
34253,2022-04-22 12:00 AM,40502.000000,40682.000000,40245.0,40442.000000,6851412,1.027602e+10,-4054669669,-0.318578,-2.586853e+09,...,61.618846,-205.871866,-212.287901,-162.437701,-52.397292,3.333333,0.177256,0.240407,0.082164,-0.410475


# Setup which data to use for training and which data to use for evaluation of RL Model

In [9]:
from sklearn.model_selection import train_test_split

def split_data(data):
    X = data.copy()
    y = X['close'].pct_change()

    X_train_test, X_valid, y_train_test, y_valid = \
        train_test_split(data, data['close'].pct_change(), train_size=0.67, test_size=0.33, shuffle=False)

    X_train, X_test, y_train, y_test = \
        train_test_split(X_train_test, y_train_test, train_size=0.50, test_size=0.50, shuffle=False)

    return X_train, X_test, X_valid, y_train, y_test, y_valid

In [10]:
X_train, X_test, X_valid, y_train, y_test, y_valid = \
    split_data(data)

## Implement basic feature engineering

In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

from feature_engine.selection import SelectBySingleFeaturePerformance

from scipy.stats import iqr


def estimate_outliers(data):
    return iqr(data) * 1.5

def estimate_percent_gains(data, column='close'):
    returns = get_returns(data, column=column)
    gains = estimate_outliers(returns)
    return gains

def get_returns(data, column='close'):
    return fix_dataset_inconsistencies(data[[column]].pct_change(), fill_value=0)

def precalculate_ground_truths(data, column='close', threshold=None):
    returns = get_returns(data, column=column)
    gains = estimate_outliers(returns) if threshold is None else threshold
    binary_gains = (returns[column] > gains).astype(int)
    return binary_gains

def is_null(data):
    return data.isnull().sum().sum() > 0



rf = RandomForestClassifier(n_estimators=100, 
                            random_state=1990, 
                            n_jobs=-1)

sel = SelectBySingleFeaturePerformance(variables=None, 
                                       estimator=rf, 
                                       scoring="roc_auc", 
                                       cv=5, 
                                       threshold=0.65)

sel.fit(X_train, precalculate_ground_truths(X_train, column='close'))

SelectBySingleFeaturePerformance(cv=5,
                                 estimator=RandomForestClassifier(n_jobs=-1,
                                                                  random_state=1990),
                                 threshold=0.65)

In [11]:
# import matplotlib.pyplot as plt

feature_performance = pd.Series(sel.feature_performance_).sort_values(ascending=False)
# feature_performance.plot.bar(figsize=(40, 10))
# plt.title('Performance of ML models trained with individual features')
# plt.ylabel('roc-auc')

In [13]:
features_to_drop = sel.features_to_drop_
to_drop = list(set(features_to_drop) - set(['open', 'high', 'low', 'close', 'volume']))
len(to_drop)
# features_to_drop

124

In [14]:
X_train = X_train.drop(columns=to_drop)
X_test = X_test.drop(columns=to_drop)
X_valid = X_valid.drop(columns=to_drop)

X_train.shape, X_test.shape, X_valid.shape

((11475, 16), (11475, 16), (11305, 16))

In [15]:
X_train.columns.tolist()

['date',
 'open',
 'high',
 'low',
 'close',
 'volume',
 'volume_em',
 'volume_vpt',
 'volatility_kchi',
 'trend_aroon_up',
 'momentum_stoch_rsi',
 'others_dr',
 'lr_high',
 'lr_close',
 'r_volume',
 'macd_wolfpack']

## Normalize the dataset subsets to make the model converge faster

In [16]:
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler

scaler_type = MinMaxScaler

def get_feature_scalers(X, scaler_type=scaler_type):
    scalers = []
    for name in list(X.columns[X.columns != 'date']):
        scalers.append(scaler_type().fit(X[name].values.reshape(-1, 1)))
    return scalers

def get_scaler_transforms(X, scalers):
    X_scaled = []
    for name, scaler in zip(list(X.columns[X.columns != 'date']), scalers):
        X_scaled.append(scaler.transform(X[name].values.reshape(-1, 1)))
    X_scaled = pd.concat([pd.DataFrame(column, columns=[name]) for name, column in \
                          zip(list(X.columns[X.columns != 'date']), X_scaled)], axis='columns')
    return X_scaled

def scale_numpy_array(np_arr, scaler_type = scaler_type):
    return scaler_type().fit_transform(np_arr, (-1,1))

def normalize_data(X_train, X_test, X_valid):
    X_train_test = pd.concat([X_train, X_test], axis='index')
    X_train_test_valid = pd.concat([X_train_test, X_valid], axis='index')

    X_train_test_dates = X_train_test[['date']]
    X_train_test_valid_dates = X_train_test_valid[['date']]

    X_train_test = X_train_test.drop(columns=['date'])
    X_train_test_valid = X_train_test_valid.drop(columns=['date'])

    train_test_scalers = \
        get_feature_scalers(X_train_test, 
                            scaler_type=scaler_type)
    train_test_valid_scalers = \
        get_feature_scalers(X_train_test_valid, 
                            scaler_type=scaler_type)

    X_train_test_scaled = \
        get_scaler_transforms(X_train_test, 
                              train_test_scalers)
    X_train_test_valid_scaled = \
        get_scaler_transforms(X_train_test_valid, 
                              train_test_scalers)
    X_train_test_valid_scaled_leaking = \
        get_scaler_transforms(X_train_test_valid, 
                              train_test_valid_scalers)

    X_train_test_scaled = \
        pd.concat([X_train_test_dates, 
                   X_train_test_scaled], 
                  axis='columns')
    X_train_test_valid_scaled = \
        pd.concat([X_train_test_valid_dates, 
                   X_train_test_valid_scaled], 
                  axis='columns')
    X_train_test_valid_scaled_leaking = \
        pd.concat([X_train_test_valid_dates, 
                   X_train_test_valid_scaled_leaking], 
                  axis='columns')

    X_train_scaled = X_train_test_scaled.iloc[:X_train.shape[0]]
    X_test_scaled = X_train_test_scaled.iloc[X_train.shape[0]:]
    X_valid_scaled = X_train_test_valid_scaled.iloc[X_train_test.shape[0]:]
    X_valid_scaled_leaking = X_train_test_valid_scaled_leaking.iloc[X_train_test.shape[0]:]

    return (train_test_scalers, 
            train_test_valid_scalers, 
            X_train_scaled, 
            X_test_scaled, 
            X_valid_scaled, 
            X_valid_scaled_leaking)

train_test_scalers, train_test_valid_scalers, X_train_scaled, X_test_scaled, X_valid_scaled, X_valid_scaled_leaking = \
    normalize_data(X_train, X_test, X_valid)

In [2]:
import os
cwd = os.getcwd()

train_csv = os.path.join(cwd, 'train.csv')
test_csv = os.path.join(cwd, 'test.csv')
valid_csv = os.path.join(cwd, 'valid.csv')
train_scaled_csv = os.path.join(cwd, 'train_scaled.csv')
test_scaled_csv = os.path.join(cwd, 'test_scaled.csv')
valid_scaled_csv = os.path.join(cwd, 'valid_scaled.csv')
valid_scaled_leaking_csv = os.path.join(cwd, 'valid_scaled_leaking.csv')




### Save to CSV

In [18]:
# X_train.to_csv(train_csv, index=False)
# X_test.to_csv(test_csv, index=False)
# X_valid.to_csv(valid_csv, index=False)
# X_train_scaled.to_csv(train_scaled_csv, index=False)
# X_test_scaled.to_csv(test_scaled_csv, index=False)
# X_valid_scaled.to_csv(valid_scaled_csv, index=False)
# X_valid_scaled_leaking.to_csv(valid_scaled_leaking_csv, index=False)

### Load from CSV if data previously saved

In [3]:
import pandas as pd

X_train = pd.read_csv(train_csv)
X_test = pd.read_csv(test_csv)
X_valid = pd.read_csv(valid_csv)
X_train_scaled = pd.read_csv(train_scaled_csv)
X_test_scaled = pd.read_csv(test_scaled_csv)
X_valid_scaled = pd.read_csv(valid_scaled_csv)
X_valid_scaled_leaking = pd.read_csv(valid_scaled_leaking_csv)

In [4]:
X_train.shape

(11475, 16)

# Defining the environment

In [4]:
import random
import gym
from gym import spaces
from sklearn import preprocessing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# infinite number in python
MAX_NET_WORTH = 2147483647
MAX_NUM_QUOTE_OR_BASE_ASSET = 2147483647

INITIAL_QUOTE_ASSET = 10000
INITIAL_BASE_ASSET = 0
OBSERVATION_WINDOW_SIZE = 24 # Probably we should put it as param ?

class SimpleTradingEnv(gym.Env):
    
    metadata = {'render.modes': ['live', 'human', 'none']}
    visualization = None

    def __init__(self, df_scaled, df_normal, trading_fee):
        
        self.df_scaled = df_scaled.reset_index(drop=True)
        self.df_normal = df_normal.reset_index(drop=True)
        self.window_size = OBSERVATION_WINDOW_SIZE
        self.prices, self.features = self._process_data(df_scaled)
        # The shape of the observation is (window_size * features + environment_features) the environment_features are: quote_asset, base_asset, net_worth. The entire observation is flattened in a 1D np array. 
        # NOT USED ANYMORE, KEPT FOR REFERENCE
        # self.obs_shape = ((OBSERVATION_WINDOW_SIZE * self.features.shape[1] + 3),) 

        # The shape of the observation is number of candles to look back, and the number of features (candle_features) + 3 (quote_asset, base_asset, net_worth)
        self.obs_shape = (OBSERVATION_WINDOW_SIZE, self.features.shape[1] + 3)

        # Action space
        #self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([3.0, 1.0]), dtype=np.float32)
        self.action_space = spaces.MultiDiscrete([3, 100])
        # Observation space
        self.observation_space = spaces.Box(low=-1, high=1, shape=self.obs_shape, dtype=np.float32)

        # Initialize the episode environment

        self._start_candle = OBSERVATION_WINDOW_SIZE # We assume that the first observation is not the first row of the dataframe, in order to avoid the case where there are no calculated indicators.
        self._end_candle = len(self.features) - 1
        self._trading_fee = trading_fee

        self._quote_asset = None
        self._base_asset = None
        self._done = None
        self._current_candle = None
        self._net_worth = None
        self._previous_net_worth = None

        # Array that will contain observation history needed for appending it to the observation space
        # It will contain observations consisting of the net_worth, base_asset and quote_asset as list of floats
        # Other features (OHLC + Indicators) will be appended to the current observation in the _get_observation method that takes the data directly from the available dataframe
        self._obs_env_history = None

        # Render and analysis data
        self._total_reward_accumulated = None
        self.trade_history = None
        self._first_rendering = None
        

    def reset(self):
        self._done = False
        self._current_candle = self._start_candle
        self._quote_asset = INITIAL_QUOTE_ASSET
        self._base_asset = INITIAL_BASE_ASSET 
        self._net_worth = INITIAL_QUOTE_ASSET # at the begining our net worth is the initial quote asset
        self._previous_net_worth = INITIAL_QUOTE_ASSET # at the begining our previous net worth is the initial quote asset
        self._total_reward_accumulated = 0.
        self._first_rendering = True
        self.trade_history = []
        self._obs_env_history = []
        
        self._initial_obs_data()

        return self._get_observation()

    def _take_action(self, action):
        self._done = False
        current_price = random.uniform(
            self.df_normal.loc[self._current_candle, "low"], self.df_normal.loc[self._current_candle, "high"])


        action_type = action[0]
        amount = action[1] / 100
        
        if action_type == 0: # Buy
            # Buy % assets
            # Determine the maximum amount of quote asset that can be bought
            available_amount_to_buy_with = self._quote_asset / current_price
            # Buy only the amount that agent chose
            assets_bought = available_amount_to_buy_with * amount
            # Update the quote asset balance
            self._quote_asset -= assets_bought * current_price
            # Update the base asset
            self._base_asset += assets_bought
            # substract trading fee from base asset based on the amount bought
            self._base_asset -= self._trading_fee * assets_bought

            # Add to trade history the amount bought if greater than 0
            if assets_bought > 0:
                self.trade_history.append({'step': self._current_candle, 'type': 'Buy', 'amount': assets_bought, 'price': current_price, 'total' : assets_bought * current_price, 'percent_amount': action[1]})
        

        elif action_type == 1: # Sell
            # Sell % assets
            # Determine the amount of base asset that can be sold
            amount_to_sell = self._base_asset * amount
            received_quote_asset = amount_to_sell * current_price
            # Update the quote asset
            self._quote_asset += received_quote_asset
            # Update the base asset
            self._base_asset -= amount_to_sell
            
            # substract trading fee from quote asset based on the amount sold
            self._quote_asset -= self._trading_fee * received_quote_asset

            # Add to trade history the amount sold if greater than 0
            if amount_to_sell > 0:
                self.trade_history.append({'step': self._current_candle, 'type': 'Sell', 'amount': amount_to_sell, 'price': current_price, 'total' : received_quote_asset, 'percent_amount': action[1]})

        else:
            # Hold
            self.trade_history.append({'step': self._current_candle, 'type': 'Hold', 'amount': '0', 'price': current_price, 'total' : 0, 'percent_amount': action[1]})


        # Update the current net worth
        self._net_worth = self._base_asset * current_price + self._quote_asset


    def step(self, action):
        """
        Returns the next observation, reward, done and info.
        """
        
        self._take_action(action)

        # Calculate reward comparing the current net worth with the previous net worth
        reward = self._net_worth - self._previous_net_worth

        self._total_reward_accumulated += reward

        # Update the previous net worth to be the current net worth after the reward has been applied
        self._previous_net_worth = self._net_worth

        obs = self._get_observation()
        # Update the info and add it to history data
        info = dict (
            total_reward_accumulated = self._total_reward_accumulated,
            net_worth = self._net_worth,
            last_action_type = self.trade_history[-1]['type'] if len(self.trade_history) > 0 else None,
            last_action_amount = self.trade_history[-1]['amount'] if len(self.trade_history) > 0 else None,
            current_step = self._current_candle
        )

        self._current_candle += 1

        # Update observation history
        self._obs_env_history.append([self._net_worth, self._base_asset, self._quote_asset])

        self._done = self._net_worth <= 0 or self._current_candle >= (len(
            self.df_normal.loc[:, 'open'].values) - 30)# We assume that the last observation is not the last row of the dataframe, in order to avoid the case where there are no calculated indicators.

        if self._done:
            print('I have finished the episode')
        
        return obs, reward, self._done, info


    def _get_observation(self):
        """
        Returns the current observation.
        """
        data_frame = self.features[(self._current_candle - self.window_size):self._current_candle]

        obs_env_history = np.array(self._obs_env_history).astype(np.float32)

        #TODO We definetely need to scale the observation history in a better way, this might influence training results
        # Doing it ad-hoc might change the scale of the min and max, thus changing the results
        obs_env_history = preprocessing.minmax_scale(obs_env_history, (-0.9,0.9)) 

        obs = np.hstack((data_frame, obs_env_history[(self._current_candle - self.window_size):self._current_candle]))

        return obs


    def render(self, mode='human', **kwargs):
        """
        Renders a plot with trades made by the agent.
        """
        
        if mode == 'human':
            print(f'Accumulated Reward: {self._total_reward_accumulated} ---- Current Net Worth: {self._net_worth}')
            print(f'Current Quote asset: {self._quote_asset} ---- Current Base asset: {self._base_asset}')
            print(f'Number of trades: {len(self.trade_history)}')
        
            if(len(self.trade_history) > 0):
                print(f'Last Action: {self.trade_history[-1]["type"]} {self.trade_history[-1]["amount"]} assets ({self.trade_history[-1]["percent_amount"]} %) at price {self.trade_history[-1]["price"]}, total: {self.trade_history[-1]["total"]}')
            print(f'--------------------------------------------------------------------------------------')
        elif mode == 'live':
            pass
            # if self.visualization == None:
            #     self.visualization = LiveTradingGraph(self.df_normal, kwargs.get('title', None))

            # if self._current_candle > OBSERVATION_WINDOW_SIZE:
            #     self.visualization.render(self._current_candle, self._net_worth, self.trade_history, window_size=OBSERVATION_WINDOW_SIZE)

    def close(self):
        if self.visualization != None:
            self.visualization.close()
            self.visualization = None
         

    def _process_data(self, df_scaled):
        """
        Processes the dataframe into features.
        """
        
        prices = self.df_scaled.loc[:, 'close'].to_numpy(dtype=np.float32)

        data_frame = df_scaled.iloc[:, 1:] # drop first column which is date TODO: Should be probably fixed outside of this class
        # Convert df to numpy array
        return prices, data_frame.to_numpy(dtype=np.float32)

    def _initial_obs_data(self):
        for i in range(self.window_size - len(self._obs_env_history)):
            self._obs_env_history.append([self._net_worth, self._base_asset, self._quote_asset])


### Create a vectorized ENV as wrapper for parallelization

### Init ray and trainer config

### How to setup a cluster fast:

1. You need to decide which PC/server will be the head node and just run, this will be the master node
``` ray start --head --port 6379 ```
2. On the worker nodes you need to make sure that
a. You have the same ray and python version
b. You have installed all needed pip deps:
```pip install ray torch torchvision tabulate tensorboard tensorflow sklearn```
c. You have network conectivity to the head node (```telnet head-node-ip 6379```)
3. On all the machines that you want them to act as worker nodes you run :
```ray start --address="192.168.0.206:6379" --node-ip-address="192.168.0.150" --num-cpus=4```
a. address is the head node ip
b. node ip address is the ip of the local node
c. num cpus - how many cpu power you want to alocate from the current worker node

That's all :) Enjoy


Very important notes about parallelism :

1. ```num_worker``` is per trial ! so if you increase this you will eat up resources and won't be able to scale to multiple parallel trials when doing hyperparam tuning, see below
2. a trial uses 2 CPU resources to run (1 for the driver ```num_cpus_for_driver``` -> and 1 for the worker (```num_cpus_per_worker```)). 
3. a trial has only 1 driver, while it can have multiple workers

In [19]:
import os
import time
import ray
import os
from ray import tune
from ray.rllib.env.vector_env import VectorEnv
from ray.tune.registry import register_env



# Get the current working directory and create a folder to store the results
# cwd = os.getcwd()
# local_dir = "~/ray_results/"
# if not os.path.exists(local_dir):
#     os.makedirs(local_dir)


# Let's define some tuning parameters
FC_SIZE = tune.grid_search([[256, 256], [1024], [128, 64, 32]])  
LEARNING_RATE = tune.grid_search([0.001, 0.0005, 0.00001])  
MINIBATCH_SIZE = tune.grid_search([5, 10, 20])  
GAMMA = tune.grid_search([0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])


# Initialize Ray
# ray.init()
# ray.init(num_gpus=1, num_cpus=4, object_store_memory=8e+7, _memory=1e+8)
ray.shutdown()
ray.init(address="localhost:6379")


# Register our environment, specifying which is the environment creation function
trading_fee = 0.0075
env = SimpleTradingEnv(X_train_scaled, X_train, trading_fee)


vectorized_env = VectorEnv.vectorize_gym_envs(lambda _: env, num_envs=2)

tune.register_env("SimpleTradingEnv-v01", lambda env_ctx: vectorized_env)

# Create the ppo trainer configuration
ppo_trainer_config = {
        "env": "SimpleTradingEnv-v01",
        "horizon": len(env.df_scaled) - 30,
        "log_level": "WARNING",
        "framework": "torch",
        "ignore_worker_failures": True,
        "num_workers": 1, # Number of workers is per trial run, so the more we put the less parallelism we have
        "num_envs_per_worker": 5, 
        "rollout_fragment_length": 200, # Size of batches collected from each worker. If num_envs_per_worker is > 1 the rollouts will be multiplied by num_envs_per_worker
        "train_batch_size": 4000, # Number of timesteps collected for each SGD round. This defines the size of each SGD epoch. the batch size is composed of fragments defined above
        "num_gpus": 0,
        "clip_rewards": None,
        "num_cpus_per_worker": 1,
        "num_cpus_for_driver": 1,
        #"vf_clip_param": 100, # Default is 10, but we increase it to 100 to adapt it to rewards scale which can go as in thousands scale 
        #"lr": LEARNING_RATE,  # Hyperparameter grid search defined above
        "gamma": GAMMA,  # This can have a big impact on the result and needs to be properly tuned
        "observation_filter": "MeanStdFilter",
        "model": {
        #    "fcnet_hiddens": FC_SIZE,  # Hyperparameter grid search defined above
            "use_lstm": True,
        },
        #"sgd_minibatch_size": MINIBATCH_SIZE,  # Hyperparameter grid search defined above
        "evaluation_interval": 3,  # Run one evaluation step on every 3rd `Trainer.train()` call.
        "evaluation_config": {
            "explore": False,  # We don't want to explore during evaluation. All actions have to be repeatable.
        },
        "logger_config": {
            "logdir": "/tmp/ray_logging/",
            "type": "ray.tune.logger.UnifiedLogger",
        }
    }

ppo_trainer_config_default = {
        "env": "SimpleTradingEnv-v01",
}


2022-04-26 01:53:59,470	INFO worker.py:946 -- Connecting to existing Ray cluster at address: 172.23.158.93:6379


In [20]:
import time

@ray.remote
def f():
    time.sleep(0.01)
    return ray._private.services.get_node_ip_address()

# Get a list of the IP addresses of the nodes that have joined the cluster.
set(ray.get([f.remote() for _ in range(1000)]))

{'172.23.158.93', '192.168.0.150'}

### Run ray tune 

In [22]:
analysis = tune.run(
    run_or_experiment="PPO",  
    name="Trading_Experiment_Distributed_v01",
    metric='episode_reward_mean',
    mode='max',
    stop={
        "training_iteration": 10 # Condition to stop the experiment
        
    },
    config=ppo_trainer_config,
    num_samples=1,  # Have one sample for each hyperparameter combination. You can have more to average out randomness.
    keep_checkpoints_num=10,  # Keep the last 10 checkpoints
    checkpoint_freq=3,  # Checkpoint every 3 iterations
    local_dir="/tmp/ray_results/",  # Local directory to store checkpoints and results
    
)

2022-04-26 01:56:09,685	INFO trial_runner.py:803 -- starting PPO_SimpleTradingEnv-v01_e5a17_00000
[2m[36m(PPOTrainer pid=29081)[0m 2022-04-26 01:56:12,308	INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.


Trial name,status,loc,gamma
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9
PPO_SimpleTradingEnv-v01_e5a17_00001,PENDING,,0.95
PPO_SimpleTradingEnv-v01_e5a17_00002,PENDING,,0.98
PPO_SimpleTradingEnv-v01_e5a17_00003,PENDING,,0.99
PPO_SimpleTradingEnv-v01_e5a17_00004,PENDING,,0.995
PPO_SimpleTradingEnv-v01_e5a17_00005,PENDING,,0.999
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999


2022-04-26 01:56:15,220	INFO trial_runner.py:803 -- starting PPO_SimpleTradingEnv-v01_e5a17_00001
[2m[36m(PPOTrainer pid=29175)[0m 2022-04-26 01:56:17,949	INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.


Trial name,status,loc,gamma
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95
PPO_SimpleTradingEnv-v01_e5a17_00002,PENDING,,0.98
PPO_SimpleTradingEnv-v01_e5a17_00003,PENDING,,0.99
PPO_SimpleTradingEnv-v01_e5a17_00004,PENDING,,0.995
PPO_SimpleTradingEnv-v01_e5a17_00005,PENDING,,0.999
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999


2022-04-26 01:56:20,768	INFO trial_runner.py:803 -- starting PPO_SimpleTradingEnv-v01_e5a17_00002
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m 2022-04-26 01:56:49,823	INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.


Trial name,status,loc,gamma
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98
PPO_SimpleTradingEnv-v01_e5a17_00003,PENDING,,0.99
PPO_SimpleTradingEnv-v01_e5a17_00004,PENDING,,0.995
PPO_SimpleTradingEnv-v01_e5a17_00005,PENDING,,0.999
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999


2022-04-26 01:57:21,053	INFO trial_runner.py:803 -- starting PPO_SimpleTradingEnv-v01_e5a17_00003
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m 2022-04-26 01:57:21,131	INFO trainable.py:152 -- Trainable.setup took 33.710 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(PPOTrainer pid=29406)[0m 2022-04-26 01:57:23,656	INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.


Trial name,status,loc,gamma
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99
PPO_SimpleTradingEnv-v01_e5a17_00004,PENDING,,0.995
PPO_SimpleTradingEnv-v01_e5a17_00005,PENDING,,0.999
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999


2022-04-26 01:57:26,488	INFO trial_runner.py:803 -- starting PPO_SimpleTradingEnv-v01_e5a17_00004
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m 2022-04-26 01:57:52,703	INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.


Trial name,status,loc,gamma
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995
PPO_SimpleTradingEnv-v01_e5a17_00005,PENDING,,0.999
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999


2022-04-26 01:58:21,459	INFO trial_runner.py:803 -- starting PPO_SimpleTradingEnv-v01_e5a17_00005
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m 2022-04-26 01:58:21,538	INFO trainable.py:152 -- Trainable.setup took 29.509 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(PPOTrainer pid=29614)[0m 2022-04-26 01:58:24,018	INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.


Trial name,status,loc,gamma
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999


Result for PPO_SimpleTradingEnv-v01_e5a17_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2022-04-26_01-56-41
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 32eea118660d4a2c9ed46d3b84cf9914
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.20000000000000004
          cur_lr: 5.0000000000000016e-05
          entropy: 5.681170410751014
          entropy_coeff: 0.0
          kl: 0.025381647583137275
          policy_loss: -0.031834741572659184
          total_loss: 6.5573145791276115
          vf_explained_var: -0.12255806903685293
          vf_loss: 6.584072956587038
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 4000
    num_agent_steps_t



Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00002:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2022-04-26_01-57-54
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: dd0d57cba8984fe49e2124ecf89294cb
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.20000000000000004
          cur_lr: 5.0000000000000016e-05
          entropy: 5.68106423347227
          entropy_coeff: 0.0
          kl: 0.025004559543023826
          policy_loss: -0.038480183818647935
          total_loss: 9.04142698536637
          vf_explained_var: -0.11779553140363386
          vf_loss: 9.074906243560134
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 4000
    num_agent_steps_trai

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00004:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2022-04-26_01-58-53
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 6862bef14da44b7480e7539873d2a2e6
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.20000000000000004
          cur_lr: 5.0000000000000016e-05
          entropy: 5.685612210407052
          entropy_coeff: 0.0
          kl: 0.022625190785695467
          policy_loss: -0.035615519398663156
          total_loss: 9.207500391621743
          vf_explained_var: -0.06348847323848356
          vf_loss: 9.238590919843285
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 4000
    num_agent_steps_tr

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,,,,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00005:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2022-04-26_01-59-03
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 45dd10d3265e4bf99e19ea258fa447b9
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.20000000000000004
          cur_lr: 5.0000000000000016e-05
          entropy: 5.682134931318221
          entropy_coeff: 0.0
          kl: 0.023711955980948544
          policy_loss: -0.040056765109540954
          total_loss: 9.382460492144348
          vf_explained_var: -0.027788645862251198
          vf_loss: 9.417774873138756
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 4000
    num_agent_steps_t

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,1.0,26.8764,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,1.0,33.1913,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,1.0,23.755,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00002:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2022-04-26_01-59-09
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: dd0d57cba8984fe49e2124ecf89294cb
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 5.659064213947583
          entropy_coeff: 0.0
          kl: 0.022753507680990683
          policy_loss: -0.018446277996702922
          total_loss: 0.9244933666441069
          vf_explained_var: -0.6343606709793049
          vf_loss: 0.9361135930423775
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
   

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,1.0,26.5896,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2022-04-26_01-59-13
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 32eea118660d4a2c9ed46d3b84cf9914
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 5.673946264738678
          entropy_coeff: 0.0
          kl: 0.01952777456652413
          policy_loss: -0.04690189845459435
          total_loss: 0.10494205026026134
          vf_explained_var: -0.5289339612889034
          vf_loss: 0.14598561780607347
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
   

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,1.0,31.9298,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00004:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2022-04-26_01-59-34
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 6862bef14da44b7480e7539873d2a2e6
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 5.675625635475241
          entropy_coeff: 0.0
          kl: 0.026063532095843105
          policy_loss: -0.025352835046347753
          total_loss: 1.3625130150930316
          vf_explained_var: -0.7387294402045589
          vf_loss: 1.3800467948599529
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
   

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,1.0,36.6875,4000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00005:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2022-04-26_01-59-48
  done: false
  episode_len_mean: .nan
  episode_media: {}
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 45dd10d3265e4bf99e19ea258fa447b9
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 5.678498045603434
          entropy_coeff: 0.0
          kl: 0.024487365357657914
          policy_loss: -0.022532328859131822
          total_loss: 1.4954438427856231
          vf_explained_var: -0.5054594349476599
          vf_loss: 1.5106299402192236
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
   

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(RolloutWorker pid=34513, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(RolloutWorker pid=34513, ip=127.0.0.1)[0m I have finished the episode




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(RolloutWorker pid=29674)[0m I have finished the episode
[2m[36m(RolloutWorker pid=29674)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,2.0,73.7513,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,2.0,73.6399,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,2.0,70.2148,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
Result for PPO_SimpleTradingEnv-v01_e5a17_00001:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2022-04-26_02-04-21
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4157.7215076296425
  episode_reward_mean: -4999.990532114716
  episode_reward_min: -5842.259556599789
  episodes_this_iter: 2
  episodes_total: 2
  evaluation:
    custom_metrics: {}
    episode_len_mean: 5711.0
    episode_media: {}
    episode_reward_max: -3133.6689443818486
    episode_reward_mean: -4689.315162959441
    episode_reward_min: -6242.50267558146
    episodes_this_iter: 10
    hist_stats:
      episode_lengths:
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      episode_reward:
      - -5093.169677224441
      - -3133.6689443818486
      - -5168.92566536

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,3.0,382.807,12000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,3.0,381.146,12000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,3.0,380.71,12000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,3.0,382.807,12000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,3.0,381.146,12000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,3.0,380.71,12000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,3.0,382.807,12000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,3.0,381.146,12000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,2.0,75.5658,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,3.0,380.71,12000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


2022-04-26 02:04:41,122	ERROR trial_runner.py:1136 -- Trial PPO_SimpleTradingEnv-v01_e5a17_00002: Error handling checkpoint /tmp/ray_results/Trading_Experiment_Distributed_v01/PPO_SimpleTradingEnv-v01_e5a17_00002_2_gamma=0.98_2022-04-26_01-56-20/checkpoint_000003/checkpoint-3
Traceback (most recent call last):
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/trial_runner.py", line 1126, in _process_trial_save
    self._callbacks.on_checkpoint(
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/callback.py", line 280, in on_checkpoint
    callback.on_checkpoint(**info)
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/syncer.py", line 566, in on_checkpoint
    self._sync_trial_checkpoint(trial, checkpoint)
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/syncer.py", line 516, in _sync_trial_checkpoint
    raise TuneError(
ra

[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
Result for PPO_SimpleTradingEnv-v01_e5a17_00002:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2022-04-26_02-04-41
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4772.401930310689
  episode_reward_mean: -4999.995257370149
  episode_reward_min: -5227.58858442961
  episodes_this_iter: 2
  episodes_total: 2
  evaluation:
    custom_metrics: {}
    episode_len_mean: 5711.0
    episode_media: {}
    episode_reward_max: -2933.420228181613
    episode_reward_mean: -4998.636144919812
    episode_reward_min: -7063.804642674202
    episodes_this_iter: 10
    hist_stats:
      episode_lengths:
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      episode_reward:
      - -4635.901590160951
      - -5362.0860335634

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,3.0,382.807,12000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,3.0,381.146,12000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,3.0,406.948,12000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,3.0,380.71,12000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,3.0,382.807,12000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,3.0,381.146,12000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,3.0,406.948,12000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,3.0,380.71,12000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00001:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-04-26_02-04-55
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4157.7215076296425
  episode_reward_mean: -4999.990532114716
  episode_reward_min: -5842.259556599789
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 2db40fbd67dd43c7a882286a0552d14a
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.682248837460754
          entropy_coeff: 0.0
          kl: 0.01905840966169552
          policy_loss: -0.016983288263161016
          total_loss: 6.642844277423035
          vf_explained_var: -0.10908033943945361
          vf_loss: 6.651251280275725
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agen



Result for PPO_SimpleTradingEnv-v01_e5a17_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-04-26_02-04-57
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4042.515057156027
  episode_reward_mean: -4999.99712882373
  episode_reward_min: -5957.479200491434
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 32eea118660d4a2c9ed46d3b84cf9914
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 5.678740779815182
          entropy_coeff: 0.0
          kl: 0.023771057364040164
          policy_loss: -0.03500677759215231
          total_loss: 5.703503365303961
          vf_explained_var: -0.1174770475074809
          vf_loss: 5.731378823743071
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_steps_sampled: 1



Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,4.0,415.489,16000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,3.0,406.948,12000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,3.0,380.71,12000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,2.0,72.8757,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,2.0,82.0171,8000.0,,,,
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00003:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-04-26_02-04-59
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4377.291455997336
  episode_reward_mean: -4999.99534000337
  episode_reward_min: -5622.699224009404
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 0270b1c273654a7fa6e5019e3dab01a5
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.676320572822324
          entropy_coeff: 0.0
          kl: 0.02136589528852573
          policy_loss: -0.025520169182169823
          total_loss: 8.107837352123592
          vf_explained_var: -0.14049506879621937
          vf_loss: 8.12374287330976
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_s



[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode
Result for PPO_SimpleTradingEnv-v01_e5a17_00005:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2022-04-26_02-05-00
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4725.853526615661
  episode_reward_mean: -4999.99774917355
  episode_reward_min: -5274.141971731439
  episodes_this_iter: 2
  episodes_total: 2
  evaluation:
    custom_metrics: {}
    episode_len_mean: 5711.0
    episode_media: {}
    episode_reward_max: -3048.380953487484
    episode_reward_mean: -4991.018719392328
    episode_reward_min: -6932.8383707794055
    episodes_this_iter: 10
    hist_stats:
      episode_lengths:
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      episode_reward:
      - -5655.339205791124
      - -4320.315624759771
      - -3048.3809534874

2022-04-26 02:05:01,849	ERROR trial_runner.py:1136 -- Trial PPO_SimpleTradingEnv-v01_e5a17_00004: Error handling checkpoint /tmp/ray_results/Trading_Experiment_Distributed_v01/PPO_SimpleTradingEnv-v01_e5a17_00004_4_gamma=0.995_2022-04-26_01-57-26/checkpoint_000003/checkpoint-3
Traceback (most recent call last):
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/trial_runner.py", line 1126, in _process_trial_save
    self._callbacks.on_checkpoint(
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/callback.py", line 280, in on_checkpoint
    callback.on_checkpoint(**info)
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/syncer.py", line 566, in on_checkpoint
    self._sync_trial_checkpoint(trial, checkpoint)
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/ray/tune/syncer.py", line 516, in _sync_trial_checkpoint
    raise TuneError(
r

[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
Result for PPO_SimpleTradingEnv-v01_e5a17_00004:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2022-04-26_02-05-01
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4955.408180240646
  episode_reward_mean: -4999.996214069799
  episode_reward_min: -5044.584247898952
  episodes_this_iter: 2
  episodes_total: 2
  evaluation:
    custom_metrics: {}
    episode_len_mean: 5711.0
    episode_media: {}
    episode_reward_max: -3622.849990382024
    episode_reward_mean: -4748.174436794776
    episode_reward_min: -5370.963855548011
    episodes_this_iter: 10
    hist_stats:
      episode_lengths:
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      - 5711
      episode_reward:
      - -3622.849990382024
      - -5173.270665346

Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,4.0,415.489,16000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,3.0,406.948,12000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,4.0,416.055,16000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,3.0,400.233,12000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,3.0,393.906,12000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,4.0,415.489,16000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,3.0,406.948,12000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,4.0,416.055,16000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,3.0,400.233,12000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,3.0,393.906,12000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00002:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-04-26_02-05-13
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4772.401930310689
  episode_reward_mean: -4999.995257370149
  episode_reward_min: -5227.58858442961
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: dd0d57cba8984fe49e2124ecf89294cb
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.680268366618823
          entropy_coeff: 0.0
          kl: 0.02303272418237181
          policy_loss: -0.021133587156893105
          total_loss: 7.789317800024504
          vf_explained_var: -0.1838640655881615
          vf_loss: 7.800086670216694
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_s



Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,4.0,415.489,16000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,4.0,439.021,16000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,4.0,416.055,16000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,3.0,400.233,12000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,3.0,393.906,12000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,4.0,415.489,16000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,4.0,439.021,16000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,4.0,416.055,16000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,3.0,400.233,12000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,3.0,393.906,12000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,4.0,415.489,16000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,4.0,439.021,16000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,4.0,416.055,16000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,3.0,400.233,12000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,3.0,393.906,12000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,4.0,415.489,16000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,4.0,439.021,16000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,4.0,416.055,16000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,3.0,400.233,12000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,3.0,393.906,12000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00004:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-04-26_02-05-35
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4955.408180240646
  episode_reward_mean: -4999.996214069799
  episode_reward_min: -5044.584247898952
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 6862bef14da44b7480e7539873d2a2e6
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.684784034503403
          entropy_coeff: 0.0
          kl: 0.019697399403379168
          policy_loss: -0.01634556937722429
          total_loss: 8.903224942248354
          vf_explained_var: -0.07075109026765311
          vf_loss: 8.910706675821736
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent



Result for PPO_SimpleTradingEnv-v01_e5a17_00005:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2022-04-26_02-05-36
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4725.853526615661
  episode_reward_mean: -4999.99774917355
  episode_reward_min: -5274.141971731439
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 45dd10d3265e4bf99e19ea258fa447b9
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.683405365482454
          entropy_coeff: 0.0
          kl: 0.01922646278054804
          policy_loss: -0.030017339810729026
          total_loss: 8.935008511107455
          vf_explained_var: -0.021012871880685128
          vf_loss: 8.95637397048294
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_



Result for PPO_SimpleTradingEnv-v01_e5a17_00001:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-04-26_02-05-39
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4157.7215076296425
  episode_reward_mean: -4999.990532114716
  episode_reward_min: -5842.259556599789
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 2db40fbd67dd43c7a882286a0552d14a
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.65735073243418
          entropy_coeff: 0.0
          kl: 0.017071897113916053
          policy_loss: -0.032812143148202196
          total_loss: 0.153512404881598
          vf_explained_var: -0.6446443013606533
          vf_loss: 0.1786421950851437
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agen



Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,4.0,417.654,16000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,4.0,439.021,16000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,4.0,416.055,16000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,4.0,433.437,16000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-04-26_02-05-41
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4042.515057156027
  episode_reward_mean: -4999.99712882373
  episode_reward_min: -5957.479200491434
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 32eea118660d4a2c9ed46d3b84cf9914
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.66605870134087
          entropy_coeff: 0.0
          kl: 0.019472370682264206
          policy_loss: -0.052602503420684926
          total_loss: 0.1305563926686763
          vf_explained_var: -0.4921989772268521
          vf_loss: 0.1743963327609323
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent



Result for PPO_SimpleTradingEnv-v01_e5a17_00003:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-04-26_02-05-42
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4377.291455997336
  episode_reward_mean: -4999.99534000337
  episode_reward_min: -5622.699224009404
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 0270b1c273654a7fa6e5019e3dab01a5
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 5.663873540201495
          entropy_coeff: 0.0
          kl: 0.01955950681160214
          policy_loss: -0.03866636152789172
          total_loss: 0.36164645090579023
          vf_explained_var: -0.45324370777735146
          vf_loss: 0.3871101453419655
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agen



Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,4.0,439.021,16000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,4.0,433.437,16000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,4.0,439.021,16000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,4.0,433.437,16000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00002:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-04-26_02-05-55
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4772.401930310689
  episode_reward_mean: -4999.995257370149
  episode_reward_min: -5227.58858442961
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: dd0d57cba8984fe49e2124ecf89294cb
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.6750000000000002
          cur_lr: 5.0000000000000016e-05
          entropy: 5.666619034736387
          entropy_coeff: 0.0
          kl: 0.019616792276902224
          policy_loss: -0.035929911792959256
          total_loss: 0.5317441868936262
          vf_explained_var: -0.3735417292964074
          vf_loss: 0.5544327656988816
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agen



[2m[36m(RolloutWorker pid=29244)[0m I have finished the episode
[2m[36m(RolloutWorker pid=29244)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,4.0,433.437,16000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(RolloutWorker pid=29134)[0m I have finished the episode
[2m[36m(RolloutWorker pid=29134)[0m I have finished the episode
[2m[36m(RolloutWorker pid=29466)[0m I have finished the episode
[2m[36m(RolloutWorker pid=29466)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,4.0,433.437,16000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,4.0,433.437,16000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,4.0,433.437,16000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(RolloutWorker pid=32539, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(RolloutWorker pid=32539, ip=127.0.0.1)[0m I have finished the episode
Result for PPO_SimpleTradingEnv-v01_e5a17_00004:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-04-26_02-06-17
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4955.408180240646
  episode_reward_mean: -4999.996214069799
  episode_reward_min: -5044.584247898952
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 6862bef14da44b7480e7539873d2a2e6
  hostname: it-omy-as.local
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.678874909493231
          entropy_coeff: 0.0
          kl: 0.019783113893842505
          policy_loss: -0.034241922685157654
          total_loss: 0.3670887526670491
     



Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,4.0,429.11,16000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Result for PPO_SimpleTradingEnv-v01_e5a17_00005:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2022-04-26_02-06-24
  done: false
  episode_len_mean: 5711.0
  episode_media: {}
  episode_reward_max: -4725.853526615661
  episode_reward_mean: -4999.99774917355
  episode_reward_min: -5274.141971731439
  episodes_this_iter: 0
  episodes_total: 2
  experiment_id: 45dd10d3265e4bf99e19ea258fa447b9
  hostname: DESKTOP-TRKQ95T
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 5.669939839968118
          entropy_coeff: 0.0
          kl: 0.02317972671721251
          policy_loss: -0.03086036063570489
          total_loss: 0.5597495572265958
          vf_explained_var: -0.7459573031753622
          vf_loss: 0.5801790401379587
        model: {}
        num_agent_steps_trained: 127.74193548387096
    num_agent_



Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(RolloutWorker pid=34513, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(RolloutWorker pid=34513, ip=127.0.0.1)[0m I have finished the episode




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(RolloutWorker pid=29674)[0m I have finished the episode
[2m[36m(RolloutWorker pid=29674)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,




Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=33531, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29614)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29081)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29175)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode
[2m[36m(PPOTrainer pid=29406)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode
[2m[36m(PPOTrainer pid=31576, ip=127.0.0.1)[0m I have finished the episode


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


Trial name,status,loc,gamma,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_SimpleTradingEnv-v01_e5a17_00000,RUNNING,172.23.158.93:29081,0.9,5.0,461.377,20000.0,-5000.0,-4042.52,-5957.48,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00001,RUNNING,172.23.158.93:29175,0.95,5.0,459.412,20000.0,-4999.99,-4157.72,-5842.26,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00002,RUNNING,192.168.0.150:31576,0.98,5.0,480.826,20000.0,-5000.0,-4772.4,-5227.59,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00003,RUNNING,172.23.158.93:29406,0.99,5.0,459.234,20000.0,-5000.0,-4377.29,-5622.7,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00004,RUNNING,192.168.0.150:33531,0.995,5.0,475.648,20000.0,-5000.0,-4955.41,-5044.58,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00005,RUNNING,172.23.158.93:29614,0.999,5.0,477.389,20000.0,-5000.0,-4725.85,-5274.14,5711.0
PPO_SimpleTradingEnv-v01_e5a17_00006,PENDING,,0.9999,,,,,,,


### Evaluate trained model from checkpoint

In [None]:
from ray.rllib.agents import ppo
agent = ppo.PPOTrainer(env="SimpleTradingEnv-v01")
agent.restore(analysis.get_last_checkpoint())

In [None]:
from ray.rllib.agents import ppo
from ray.tune.registry import register_env

trading_fee = 0.0075
env = SimpleTradingEnv(X_train_scaled, X_train, trading_fee)
register_env("SimpleTradingEnv-v01", lambda _: env)

ray.init()
trainer = ppo.PPOTrainer(env="SimpleTradingEnv-v01", config=ppo_trainer_config)

while True:
    print(trainer.train())



In [16]:
import ray
ray.shutdown()

In [1]:
import tensorflow as tf
tf.config.list_physical_devices()

2022-04-23 02:30:40.046472: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-23 02:30:40.057895: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-23 02:30:40.058428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]