In [1]:
from typing import Dict, List, Tuple, Union, Any, Optional

import pandas as pd
import numpy as np
import os

import ray
from ray import train, tune

from src.datasets import daocensus

%load_ext autoreload
%autoreload 2

In [2]:
SEED: int = 57

# Dataset splits config
N_SPLITS: int = 5
SKIP_SPLIT: int = 1

# Number of samples to check hyperparamters
N_SAMPLES: int = 10

# Training config
MAX_EPOCHS: int = 15
EPOCHS_PER_ITER: int = 5

# Eval config
TOP_K: int = 5

# Load data

In [3]:
dfv, dfp = daocensus.get("./data/daos-census", 'Decentraland', 'snapshot')
print(dfv.info())
print(dfp.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116560 entries, 0 to 116559
Data columns (total 10 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   platform       116560 non-null  object        
 1   name           116560 non-null  object        
 2   id             116560 non-null  object        
 3   proposal       116560 non-null  category      
 4   deployment     116560 non-null  object        
 5   platform_vote  116560 non-null  object        
 6   voter          116560 non-null  category      
 7   date           116560 non-null  datetime64[ns]
 8   choice         116560 non-null  object        
 9   weight         116560 non-null  float64       
dtypes: category(2), datetime64[ns](1), float64(1), object(6)
memory usage: 7.9+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1962 entries, 0 to 1961
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------    

## Transform data

In [4]:
def to_microsoft(dfv):
    df = dfv[['voter', 'proposal', 'date']].rename(columns={
        'voter': 'userID',
        'proposal': 'itemID',
        'date': 'timestamp',
    })
    df['itemID'] = df['itemID'].astype('str')
    df['rating'] = 1
    return df

df = to_microsoft(dfv)
df.head()

Unnamed: 0,userID,itemID,timestamp,rating
0,0xe7af1c70f8f089c4c3bd71999692c6c5a15d9e2a,b86aa059-3d31-5d41-a472-70962816f779,2021-12-17 12:28:01,1
1,0xc54a6c3778016b06cbd126ccc3b5bc06c5f666fb,b86aa059-3d31-5d41-a472-70962816f779,2021-12-17 02:16:23,1
2,0xd82d005e8f8d5385db40ba23884a5c967bb1e8af,b86aa059-3d31-5d41-a472-70962816f779,2021-12-17 00:38:22,1
3,0xf4c64db66ffb301985f5ecd85c8f3f9c02f2659d,b86aa059-3d31-5d41-a472-70962816f779,2021-12-16 18:47:08,1
4,0xd5e9ef1cedad0d135d543d286a2c190b16cbb89e,b86aa059-3d31-5d41-a472-70962816f779,2021-12-16 18:32:15,1


# Split data

Each proposal remains open for a few days, our environment is different of a movies recommender system. For this reason, we will use a TimeSeriesSplit instead of a K-Fold to cross-validate the model.

![](https://scikit-learn.org/stable/_images/sphx_glr_plot_cv_indices_013.png)

El TimeSeriesSplit de scikit-learn no nos vale porque el número de elementos en cada split es el mismo, pero el tamaño del intervalo, no. Como queremos simular un comportamiento realista, haremos el split dividiendo por intervalos de igual longitud.

In [5]:
def timeIntervalSplit(df: pd.DataFrame, splits: int, timestamp_col: str = 'timestamp', skip: int = 0, remove_not_in_train: str = None):
    total_time_diff = df[timestamp_col].max() - df[timestamp_col].min()
    k_time_diff = total_time_diff / (splits+1)

    acc_time = df[timestamp_col].min() + (1+skip)*k_time_diff
    for i in range(splits - skip):
        end_time = acc_time + k_time_diff
        
        train = df[df[timestamp_col] <= acc_time]
        test = df[ (acc_time < df[timestamp_col]) & (df[timestamp_col] < end_time) ]

        if remove_not_in_train is not None:
            msk = test[remove_not_in_train].isin(set(train[remove_not_in_train]))
            test = test[msk]
        
        acc_time = end_time
        yield train, test

max_train_prev = df['timestamp'].min().date()
folds = list(timeIntervalSplit(df, N_SPLITS, skip=SKIP_SPLIT, remove_not_in_train='userID'))
for i, (dftrain, dftest) in enumerate(folds):
    min_train = dftrain['timestamp'].min().date()
    max_train = dftrain['timestamp'].max().date()
    min_test  = dftest['timestamp'].min().date()
    max_test  = dftest['timestamp'].max().date()
    train_diff = (max_train-max_train_prev).days
    test_diff = (max_test-min_test).days

    train_users = len(set(dftrain['userID']))
    test_users = len(set(dftest['userID']))
    
    print(f"Split {i}, train from: {max_train_prev} to {max_train}, test from: {min_test} to {max_test}")
    print(f"  len(train): {len(dftrain)}, len(test): {len(dftest)}")
    print(f"  users(train): {train_users}, users(test): {test_users}")

    max_train_prev = max_train

Split 0, train from: 2021-05-24 to 2022-02-10, test from: 2022-02-11 to 2022-06-22
  len(train): 21485, len(test): 7268
  users(train): 3040, users(test): 807
Split 1, train from: 2022-02-10 to 2022-06-22, test from: 2022-06-22 to 2022-10-31
  len(train): 32472, len(test): 8780
  users(train): 4031, users(test): 850
Split 2, train from: 2022-06-22 to 2022-10-31, test from: 2022-10-31 to 2023-03-11
  len(train): 49437, len(test): 15135
  users(train): 4870, users(test): 1089
Split 3, train from: 2022-10-31 to 2023-03-11, test from: 2023-03-11 to 2023-07-20
  len(train): 89511, len(test): 20209
  users(train): 6490, users(test): 1179


# Defining training

In [6]:
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN

class LightGCNCustom(LightGCN):
    # Copied from LightGCN.fit but RETURNING the data and deleting unnecessary things
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.epochs_done = 0
    
    def fit_epoch(self):
        """Fit the model on self.data.train. If eval_epoch is not -1, evaluate the model on `self.data.test`
        every `eval_epoch` epoch to observe the training status.

        """
        loss, mf_loss, emb_loss = 0.0, 0.0, 0.0
        n_batch = self.data.train.shape[0] // self.batch_size + 1
        for idx in range(n_batch):
            users, pos_items, neg_items = self.data.train_loader(self.batch_size)
            _, batch_loss, batch_mf_loss, batch_emb_loss = self.sess.run(
                [self.opt, self.loss, self.mf_loss, self.emb_loss],
                feed_dict={
                    self.users: users,
                    self.pos_items: pos_items,
                    self.neg_items: neg_items,
                },
            )
            loss += batch_loss / n_batch
            mf_loss += batch_mf_loss / n_batch
            emb_loss += batch_emb_loss / n_batch

        if np.isnan(loss):
            print("ERROR: loss is nan.")
            sys.exit()

        self.epochs_done += 1

        return loss, mf_loss, emb_loss

2023-10-19 18:09:33.090914: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-19 18:09:33.092418: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-19 18:09:33.114446: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-19 18:09:33.114472: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-19 18:09:33.114493: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

In [7]:
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF

class TrainLightGCN(tune.Trainable):
    def setup(
        self,
        config: Dict[str, Any],
        folds: List[Tuple[pd.DataFrame, pd.DataFrame]],
    ):
        self.config = config
        config['batch_size'] = 2**config['batch_size']
        self.fold = config['__trial_index__']

        self.hparams = prepare_hparams(
            model_type='lightgcn',
            n_layers=config['conv_layers'],
            batch_size=config['batch_size'],
            embed_size=config['embedding_dim'],
            epochs=EPOCHS_PER_ITER,
            learning_rate=config['learning_rate'],
            decay=config['l2'],
            metrics=["recall", "ndcg", "precision", "map"],
            eval_epoch=-1,
            top_k=TOP_K,
            save_model=False,
            MODEL_DIR='./data/model/lightgcn/',
        )

        self.dataloader = ImplicitCF(train=folds[self.fold][0], test=folds[self.fold][1], seed=SEED)
        self.model = LightGCNCustom(self.hparams, self.dataloader, seed=SEED)

    @property
    def iteration(self):
        return self.model.epochs_done

    @property
    def training_iteration(self):
        return self.model.epochs_done

    def step(self):
        """
        As a rule of thumb, the execution time of step should be large enough to avoid overheads 
        (i.e. more than a few seconds), but short enough to report progress periodically 
        (i.e. at most a few minutes).
        """
        assert EPOCHS_PER_ITER > 0
        
        for _ in range(EPOCHS_PER_ITER):
            ret = self.model.fit_epoch()

        return {
            'iteration': self.iteration,
            'loss': ret[0],
            'mf_loss': ret[1],
            'emb_loss': ret[2],
            **{k:v for k,v in zip(self.model.metrics, self.model.run_eval())},
        }

    def save_checkpoint(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir, "model")
        self.model.saver.save(
            sess=self.model.sess,
            save_path=checkpoint_path,
        )
        return checkpoint_dir

    def load_checkpoint(self, checkpoint_path):
        self.model.load(checkpoint_path)

In [8]:
len(folds[-1][0]), len(folds[-1][1])

(89511, 20209)

In [9]:
hparams = prepare_hparams(
    model_type='lightgcn',
    n_layers=3,
    batch_size=512,
    embed_size=64,
    epochs=3,
    learning_rate=0.001,
    decay=0.001,
    metrics=["recall", "ndcg", "precision", "map"],
    eval_epoch=2,
    top_k=TOP_K,
    save_model=False,
    MODEL_DIR='./data/model/lightgcn/',
)
dataloader = ImplicitCF(train=folds[-1][0], test=folds[-1][1], seed=SEED)
print("items:", dataloader.n_items, "user:", dataloader.n_users)
model = LightGCNCustom(
    hparams,
    dataloader,
    seed=SEED,
)

  df = train if test is None else train.append(test)


items: 1942 user: 6490
Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


2023-10-19 18:09:35.839858: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled


In [10]:
model.fit()

Epoch 1 (train)3.2s: train loss = 0.62072 = (mf)0.62018 + (embed)0.00053
Epoch 2 (train)3.0s + (eval)0.2s: train loss = 0.37196 = (mf)0.36881 + (embed)0.00315, recall = 0.00011, ndcg = 0.00052, precision = 0.00068, map = 0.00003
Epoch 3 (train)3.8s: train loss = 0.25453 = (mf)0.24863 + (embed)0.00590


In [None]:
import ray
from ray.tune.search.hyperopt import HyperOptSearch

search_alg = HyperOptSearch()
search_alg = tune.search.Repeater(search_alg, N_SPLITS)

if os.uname().nodename == 'lamarck':
    assert torch.cuda.is_available()
    
    NUM_SAMPLES = 250
    # Every run takes approx half a gig of vram (no optimizations)
    # The RTX 4090 has 24GB so we can run the model about 48 times
    resources_per_trial={
        'cpu': 1,
        'gpu': 1/32,
    }
else:
    NUM_SAMPLES = 10
    resources_per_trial={
        'cpu': 2,
        'memory': 2e9,
    }

tuner = tune.Tuner(
    tune.with_resources(
        tune.with_parameters(TrainLightGCN, folds=folds),
        resources_per_trial,
    ),
    run_config=train.RunConfig(
        stop={'training_iteration': MAX_EPOCHS/EPOCHS_PER_ITER}
    ),
    param_space=dict(
        # batch size between 2**4 (32) and 2**10 (1024)
        batch_size=tune.randint(4,10),
        embedding_dim=tune.qlograndint(10, 500, 5),
        conv_layers=tune.randint(2,6),
        learning_rate=tune.qloguniform(1e-5, 1, 1e-5),
        l2=tune.loguniform(1e-9, 1e-1),
    ),
    tune_config=tune.TuneConfig(
        search_alg=search_alg,
        num_samples=(N_SPLITS-SKIP_SPLIT)*N_SAMPLES,
        metric='loss',
        mode='min',
    )
)
tuner.fit()

0,1
Current time:,2023-10-19 18:31:13
Running for:,00:21:23.53
Memory:,13.5/15.3 GiB

Trial name,# failures,error file
TrainLightGCN_167299ac,1,"/home/davo/ray_results/TrainLightGCN_2023-10-19_18-09-46/TrainLightGCN_167299ac_5_trial_index=4,batch_size=6,conv_layers=2,embedding_dim=50,l2=0.0012,learning_rate=0.0877_2023-10-19_18-11-34/error.txt"

Trial name,status,loc,__trial_index__,batch_size,conv_layers,embedding_dim,l2,learning_rate,iter,total time (s),iteration,loss,mf_loss
TrainLightGCN_d77400b4,RUNNING,147.96.25.138:525162,2,5,3,90,0.000224256,1e-05,2.0,489.057,10.0,0.679903,0.67985
TrainLightGCN_8b340b7a,RUNNING,147.96.25.138:525477,3,5,3,90,0.000224256,1e-05,,,,,
TrainLightGCN_c1c3d069,PENDING,,4,5,3,90,0.000224256,1e-05,,,,,
TrainLightGCN_b887b41c,TERMINATED,147.96.25.138:523873,0,6,2,50,0.00121384,0.08772,3.0,43.6344,15.0,0.428304,0.131344
TrainLightGCN_4da34b60,TERMINATED,147.96.25.138:523935,1,6,2,50,0.00121384,0.08772,3.0,87.4525,15.0,0.513906,0.144161
TrainLightGCN_9f8694fc,TERMINATED,147.96.25.138:524264,2,6,2,50,0.00121384,0.08772,3.0,168.535,15.0,0.578789,0.167185
TrainLightGCN_1631ec7e,TERMINATED,147.96.25.138:524360,3,6,2,50,0.00121384,0.08772,3.0,453.478,15.0,0.615839,0.18426
TrainLightGCN_567e5c01,TERMINATED,147.96.25.138:524660,0,5,3,90,0.000224256,1e-05,3.0,155.308,15.0,0.677105,0.677041
TrainLightGCN_f00ee419,TERMINATED,147.96.25.138:524859,1,5,3,90,0.000224256,1e-05,3.0,354.418,15.0,0.672138,0.67207
TrainLightGCN_167299ac,ERROR,,4,6,2,50,0.00121384,0.08772,,,,,


[2m[36m(pid=523873)[0m 2023-10-19 18:09:51.819780: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=523873)[0m 2023-10-19 18:09:51.822034: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[2m[36m(pid=523873)[0m 2023-10-19 18:09:51.848179: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[2m[36m(pid=523873)[0m 2023-10-19 18:09:51.848212: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[2m[36m(pid=523873)[0m 2023-10-19 18:09:

[2m[36m(TrainLightGCN pid=523873)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=523873)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=523873)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=523873)[0m 2023-10-19 18:09:53.630715: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
[2m[36m(pid=523935)[0m 2023-10-19 18:09:55.918813: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=523935)[0m 2023-10-19 18:09:55.970066: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(pid=523935)[0m 2023-10-19 18:09:55.962725: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc

[2m[36m(TrainLightGCN pid=523935)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=523935)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=523935)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=523935)[0m   df = train if test is None else train.append(test)
[2m[36m(TrainLightGCN pid=523935)[0m 2023-10-19 18:09:59.001593: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
[2m[36m(TrainLightGCN pid=523873)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/davo/ray_results/TrainLightGCN_2023-10-19_18-09-46/TrainLightGCN_b887b41c_1_trial_index=0,batch_size=6,conv_layers=2,embedding_dim=50,l2=0.0012,learning_rate=0.0877_2023-10-19_18-09-50/checkpoint_000000)
[2m[36m(pid=524264)[0m 2023-10-19 18:10:41.167689: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=524264)[0m 2023-10-19 18:10:41.170527: I tensorflow/tsl/cuda/cudart_stub.cc:28]

[2m[36m(TrainLightGCN pid=524264)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=524264)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=524264)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=524264)[0m 2023-10-19 18:10:45.204113: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
[2m[36m(TrainLightGCN pid=523935)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/davo/ray_results/TrainLightGCN_2023-10-19_18-09-46/TrainLightGCN_4da34b60_2_trial_index=1,batch_size=6,conv_layers=2,embedding_dim=50,l2=0.0012,learning_rate=0.0877_2023-10-19_18-09-53/checkpoint_000000)
[2m[36m(pid=524360)[0m 2023-10-19 18:11:30.401243: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=524360)[0m 2023-10-19 18:11:30.404357: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[2m[36m(pid=524360)[0m

[2m[36m(TrainLightGCN pid=524360)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=524360)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=524360)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=524360)[0m 2023-10-19 18:11:34.720742: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
[2m[36m(TrainLightGCN pid=524264)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/davo/ray_results/TrainLightGCN_2023-10-19_18-09-46/TrainLightGCN_9f8694fc_3_trial_index=2,batch_size=6,conv_layers=2,embedding_dim=50,l2=0.0012,learning_rate=0.0877_2023-10-19_18-09-59/checkpoint_000000)
[2m[36m(pid=524620)[0m 2023-10-19 18:13:37.307207: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=524620)[0m 2023-10-19 18:13:37.309500: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[2m[36m(pid=524620)[0m

[2m[36m(TrainLightGCN pid=524660)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=524660)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=524660)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=524660)[0m 2023-10-19 18:13:45.473457: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
[2m[36m(TrainLightGCN pid=524660)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/davo/ray_results/TrainLightGCN_2023-10-19_18-09-46/TrainLightGCN_567e5c01_6_trial_index=0,batch_size=5,conv_layers=3,embedding_dim=90,l2=0.0002,learning_rate=0.0000_2023-10-19_18-13-40/checkpoint_000000)
[2m[36m(pid=524859)[0m 2023-10-19 18:16:23.885322: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=524859)[0m 2023-10-19 18:16:23.887746: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[2m[36m(pid=524859)[0m

[2m[36m(TrainLightGCN pid=524859)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=524859)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=524859)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=524859)[0m 2023-10-19 18:16:26.786180: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
[2m[36m(TrainLightGCN pid=524360)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/davo/ray_results/TrainLightGCN_2023-10-19_18-09-46/TrainLightGCN_1631ec7e_4_trial_index=3,batch_size=6,conv_layers=2,embedding_dim=50,l2=0.0012,learning_rate=0.0877_2023-10-19_18-10-45/checkpoint_000000)
[2m[36m(pid=525162)[0m 2023-10-19 18:19:11.971165: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=525162)[0m 2023-10-19 18:19:11.974118: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[2m[36m(pid=525162)[0m

[2m[36m(TrainLightGCN pid=525162)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=525162)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=525162)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=525162)[0m 2023-10-19 18:19:16.260014: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
[2m[36m(TrainLightGCN pid=524859)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/davo/ray_results/TrainLightGCN_2023-10-19_18-09-46/TrainLightGCN_f00ee419_7_trial_index=1,batch_size=5,conv_layers=3,embedding_dim=90,l2=0.0002,learning_rate=0.0000_2023-10-19_18-13-45/checkpoint_000000)
[2m[36m(pid=525477)[0m 2023-10-19 18:22:24.891528: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[2m[36m(pid=525477)[0m 2023-10-19 18:22:24.914396: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[2m[36m(pid=525477)[0m

[2m[36m(TrainLightGCN pid=525477)[0m Already create adjacency matrix.
[2m[36m(TrainLightGCN pid=525477)[0m Already normalize adjacency matrix.
[2m[36m(TrainLightGCN pid=525477)[0m Using xavier initialization.


[2m[36m(TrainLightGCN pid=525477)[0m 2023-10-19 18:22:30.483738: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
