<a href="https://colab.research.google.com/github/briag1/ParsingEval/blob/main/Hyperparams_tuning_with_reduced_search_space.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
# @title device
def get_device():
  if torch.cuda.is_available():
      device = torch.device("cuda")
      print("CUDA is available. Using GPU.")
  else:
      device = torch.device("cpu")
      print("CUDA is not available. Using CPU.")
  return device
device=get_device()

CUDA is available. Using GPU.


# Preprocessing

In [None]:
# @title code
from os import makedirs
import torch
import math
import os
import string
import shutil
import subprocess
import pandas as pd
import numpy as np
import pandas as pd

def get_x(value):

    if isinstance(value, str):
        return float(value.split("/")[0])
    elif isinstance(value, float):
        return value

def get_y(value):

    if isinstance(value, str):
        return float(value.split("/")[1])
    elif isinstance(value, float):
        return value

def read_dataframe(name):
  if not os.path.exists(name+".pkl"):
    print("reading dataframe: "+name+".xlsx")
    df=pd.read_excel(name+".xlsx")
    df.to_pickle(name+".pkl")
  else:
    print("using already read daframe")

def get_vocab(poses,vocab):
  for pos in poses:
    if pos not in vocab and not any(isinstance(n, float) and math.isnan(n) for n in pos):
        vocab[pos]=len(vocab)+1
  return vocab

def get_fix_time_encoding(df):

  df['month_sin'] = np.sin(2 * np.pi * df["start time"].dt.month / 12)
  df['month_cos'] = np.cos(2 * np.pi * df["start time"].dt.month / 12)

  df['day_sin'] = np.sin(2 * np.pi * df["start time"].dt.day / 31)
  df['day_cos'] = np.cos(2 * np.pi * df["start time"].dt.day / 31)

  df['hour_sin'] = np.sin(2 * np.pi * df["start time"].dt.hour / 24)
  df['hour_cos'] = np.cos(2 * np.pi * df["start time"].dt.hour / 24)

  df['minute_sin'] = np.sin(2 * np.pi * df["start time"].dt.minute / 60)
  df['minute_cos'] = np.cos(2 * np.pi * df["start time"].dt.minute / 60)

  df['second_sin'] = np.sin(2 * np.pi * df["start time"].dt.second / 60)
  df['second_cos'] = np.cos(2 * np.pi * df["start time"].dt.second / 60)
def get_time_data(df):
  df['month'] =  df["start time"].dt.month
  df['day'] =  df["start time"].dt.day
  df['hour'] =  df["start time"].dt.hour
  df['minute'] = df["start time"].dt.minute
  df['second'] = df["start time"].dt.second
  return df


def tokenize_pos(pos,vocab):

  if math.isnan(pos[0]) and math.isnan(pos[1]):
    return len(vocab)
  else:
    return vocab[pos]

def get_coordinates(df,input_position,full_dataset):

  if full_dataset:
    df['x'] = df['latitude']
    df['y'] = df['longitude']
  else:
    df['x'] = df['location(latitude/lontitude)'].apply(get_x)
    df['y'] = df['location(latitude/lontitude)'].apply(get_y)


  if input_position:
    df['x_normalised']=(df['x']-df['x'].mean())/(df['x'].std())
    df['y_normalised']=(df['y']-df['y'].mean())/df['y'].std()

  return df

def get_joined_coordinates(df):

  df['pos']= list(zip(df['x'],df['y']))
  poses=df['pos'].unique()

  return poses

def get_col_to_keep_and_drop(fixed_time_encoding,input_position,full_dataset):
  col_to_drop_in_df=['date', 'end time','pos']
  col_to_drop_in_dict=['x','y', 'time_to_end', 'time_to_next','start time', 'user id']
  col_to_add_to_dict=[]
  col_in_input=[]
  if not full_dataset:
    col_to_drop_in_df+=['location(latitude/lontitude)']
  else:
    col_to_drop_in_df+=['latitude','longitude']
  if fixed_time_encoding:
    col_to_drop_in_df+=[]
    col_to_drop_in_dict+=['month_sin', 'month_cos', 'day_sin', 'day_cos', 'hour_sin', 'hour_cos', 'minute_sin', 'minute_cos', 'second_sin', 'second_cos']
    col_in_input+=['month_sin', 'month_cos', 'day_sin', 'day_cos', 'hour_sin', 'hour_cos', 'minute_sin', 'minute_cos', 'second_sin', 'second_cos']
  else:
    col_to_add_to_dict+=['month','day','hour','minute','second']
  if input_position:
    col_to_drop_in_dict += ['x_normalised', 'y_normalised']
    col_in_input+=['x_normalised', 'y_normalised']
  return col_to_drop_in_df,col_to_drop_in_dict,col_in_input,col_to_add_to_dict

def process_user_data(df_user,vocab,col_in_input,col_to_drop_in_dict,col_to_add_to_dict,with_repeated_connections):
  #get the time to next connection
  df_user["time_to_next"] =  df_user["start time"].diff(-1).dt.total_seconds()
  dict_user=df_user.to_dict('list')
  #create input
  dict_user["pos_id"],dict_user["pos_id_target"]=torch.tensor(dict_user["pos_id"][:-1]),torch.tensor(dict_user["pos_id"][1:])

  if col_in_input:
    dict_user["input"]=torch.tensor([dict_user[col] for col in col_in_input]).T
    dict_user["input"]=dict_user["input"][:-1]

  if col_to_add_to_dict:
    for col in col_to_add_to_dict:
      dict_user[col]=torch.tensor(dict_user[col])
      dict_user[col]=dict_user[col][:-1]

  dict_user["time_target"]=torch.tensor([dict_user["time_to_end"],dict_user["time_to_next"]]).T
  dict_user["time_target"]=dict_user["time_target"][:-1]
  for e in col_to_drop_in_dict:
    dict_user.pop(e)

  if not with_repeated_connections:
    dict_user=combine_repeated_connections_in_sequence_user(dict_user)
    dict_user=delete_end_of_sequence_repeated_connections(dict_user)
  return dict_user

def delete_end_of_sequence_repeated_connections(dict_user):
  if dict_user['pos_id'][-1]==dict_user["pos_id_target"][-1]:
    for key in dict_user:
      dict_user[key]=dict_user[key][:-1]
  return dict_user


def combine_repeated_connections_in_sequence_user(dict_user):
  index=0
  while index < len(dict_user["pos_id"])-1:
    if dict_user["pos_id"][index]==dict_user["pos_id_target"][index]:
      dict_user["pos_id_target"][index]=dict_user["pos_id_target"][index+1]
      dict_user["time_target"][index]=dict_user["time_target"][index+1]
      for key in dict_user:
        dict_user[key]=torch.cat((dict_user[key][:index+1],dict_user[key][index+2:]))
    else:
      index+=1


  return dict_user


def normalize_output(list_users):
  #get means and stds
  time_targets=torch.cat([dict_user["time_target"] for dict_user in list_users],dim=0)
  time_targets_mean=time_targets.mean(dim=0)
  time_targets_std=time_targets.std(dim=0)
  #normalize
  for i in range(len(list_users)):
    list_users[i]["time_target"]=(list_users[i]["time_target"]-time_targets_mean)/time_targets_std
  return list_users



def process_dataframe(name,vocab,fixed_time_encoding,input_position,full_dataset,with_repeated_connections,min_sequence_size,format=".pkl"):
  df= pd.read_pickle(name+format)
  df=df.sort_values('start time')
  df=df.drop(['month'],axis=1)

  df=get_coordinates(df,input_position,full_dataset)

  poses=get_joined_coordinates(df)
  vocab=get_vocab(poses,vocab)
  df['pos_id'] = df['pos'].apply(lambda pos: tokenize_pos(pos,vocab))

  df['time_to_end']=df['end time']-df['start time']
  df['time_to_end']=df['time_to_end'].dt.total_seconds()
  if fixed_time_encoding:
    df=get_fix_time_encoding(df)
  else:
    df=get_time_data(df)

  col_to_drop_in_df,col_to_drop_in_dict,col_in_input,col_to_add_to_dict=get_col_to_keep_and_drop(fixed_time_encoding,input_position,full_dataset)
  df=df.drop(col_to_drop_in_df, axis=1)

  df_user_group = df.groupby('user id')
  list_users=[]
  for user, df_user in df_user_group:
    if len(df_user)>=min_sequence_size and not df_user['x'].isnull().values.any():
        prossessed_user_data=process_user_data(df_user,vocab,col_in_input,col_to_drop_in_dict,col_to_add_to_dict,with_repeated_connections)
        if prossessed_user_data["pos_id"].shape[0]>=min_sequence_size-1:
          list_users.append(prossessed_user_data)
  list_users=normalize_output(list_users)

  return list_users,vocab

def runcmd(cmd, verbose = False, *args, **kwargs):

    process = subprocess.Popen(
        cmd,
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE,
        text = True,
        shell = True
    )
    std_out, std_err = process.communicate()
    if verbose:
        print(std_out.strip(), std_err)
    pass

def get_raw_data(directory,src_directory,full_dataset):
  if  full_dataset:
    shutil.copytree(src_directory,directory)#telecomDataset6mont
  else:
    runcmd('wget http://sguangwang.com/dataset/telecom.zip', verbose = False)
    runcmd('unzip /content/telecom.zip')

def get_processed_dataset(load_dataset_path):
  saved_list_user_path = os.path.join(load_dataset_path,"list_users")
  saved_vocab_path = os.path.join(load_dataset_path,"vocab")
  print("loading already preprocessed data: ")
  print(saved_list_user_path)
  print(saved_vocab_path)
  list_users=torch.load(saved_list_user_path)
  vocab=torch.load(saved_vocab_path)
  return list_users,vocab

def process_raw_data(src_directory_raw_data,directory_raw_data,fixed_time_encoding,input_position,full_dataset,with_repeated_connections,min_sequence_size):
  list_users=[]
  vocab={}
  if not os.path.exists(directory_raw_data):
    print('getting raw data at: '+src_directory_raw_data)
    get_raw_data(directory_raw_data,src_directory_raw_data,full_dataset)
  for name in os.listdir(directory_raw_data):
    if not name.endswith(".pkl"):
      complete_name=os.path.join(directory_raw_data,".".join(name.split(".")[:-1]))
      print("processing dataframe: "+complete_name)
      read_dataframe(complete_name)
      new_list_users,vocab= process_dataframe(complete_name,vocab,fixed_time_encoding=fixed_time_encoding,input_position=input_position,full_dataset=full_dataset,with_repeated_connections=with_repeated_connections,min_sequence_size=min_sequence_size)
      list_users+=new_list_users
  return list_users,vocab

def split_long_sequences(list_users,max_sequence_length):
  new_list_users=[]
  for i in range(len(list_users)):
    seq_length=list_users[i]["input"].shape[0]
    if seq_length>=max_sequence_length:
      nb_of_seq=seq_length//max_sequence_length
      rest=seq_length%max_sequence_length
      list_splitted_seq=nb_of_seq*[{}]
      rest_splitted={}
      for key in list_users[i]:
        for j in range(nb_of_seq):
          list_splitted_seq[j][key]=list_users[i][key][max_sequence_length*j:max_sequence_length*(j+1)]
        if rest>2:
          rest_splitted[key]= list_users[i][key][-rest:]
      new_list_users=new_list_users+list_splitted_seq
      if len(rest_splitted)>0:
        new_list_users+=[rest_splitted]
    else:
      new_list_users.append(list_users[i])

  return new_list_users



def save_processed_data(list_users,vocab,path_to_save_dataset):
    print("creating directory: "+path_to_save_dataset)
    os.makedirs(path_to_save_dataset,exist_ok=True)
    print("saving processed data at: ")
    save_list_user_path = os.path.join(path_to_save_dataset,"list_users")
    save_vocab_path = os.path.join(path_to_save_dataset,"vocab")
    print(save_list_user_path)
    print(save_vocab_path)
    torch.save(list_users,save_list_user_path)
    torch.save(vocab,save_vocab_path)

def get_processed_data(src_directory_raw_data,directory_raw_data,fixed_time_encoding,input_position,full_dataset,spliting_long_sequences,with_repeated_connections,max_sequence_length=100,min_sequence_size=1,save=False,path_to_save_dataset="/content/drive/MyDrive/telecomDataset6month",download=False,load_dataset_path="/content/drive/MyDrive/telecomDataset6month"):
  if not download:
    list_users,vocab = get_processed_dataset(load_dataset_path)
  else:
    list_users,vocab=process_raw_data(src_directory_raw_data,directory_raw_data,fixed_time_encoding,input_position,full_dataset,with_repeated_connections,min_sequence_size=min_sequence_size)
  if spliting_long_sequences:
    print("spliting sequences longuer than : "+str(max_sequence_length)+ " steps")
    list_users=split_long_sequences(list_users,max_sequence_length)
  if save:
    save_processed_data(list_users,vocab,path_to_save_dataset)
  return list_users,vocab

In [None]:
list_users,vocab=get_processed_data(src_directory_raw_data="drive/MyDrive/Shanghai-Telcome-Six-Months-DataSet",
                                    directory_raw_data='/content/dataset-telecom-6month',
                                    fixed_time_encoding=False,
                                    input_position=True,
                                    full_dataset=True,
                                    spliting_long_sequences=False,
                                    with_repeated_connections=False,
                                    max_sequence_length=100,
                                    min_sequence_size=2,
                                    save=False,
                                    path_to_save_dataset="/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3",
                                    download=False,
                                    load_dataset_path="/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3",)

loading already preprocessed data: 
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/vocab


In [None]:
import numpy as np
import matplotlib.pyplot as plt
sum_len=0
len_list=[]
for user in list_users:
  len_list.append(len(user['pos_id']))
  sum_len+=len(user['pos_id'])
print(sum_len/len(list_users))
len_array=np.array(len_list)
print(len_array.mean(),len_array.std(),len_array.max(),len_array.min())
plt.hist(len_array,100)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
sum_len=0
len_list=[]
for user in list_users:
  len_list.append(len(user['pos_id']))
  sum_len+=len(user['pos_id'])
print(sum_len/len(list_users))
len_array=np.array(len_list)
print(len_array.mean(),len_array.std(),len_array.max(),len_array.min())
plt.hist(len_array,100)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
sum_len=0
len_list=[]
for user in list_users:
  len_list.append(len(user['pos_id']))
  sum_len+=len(user['pos_id'])
print(sum_len/len(list_users))
len_array=np.array(len_list)
print(len_array.mean(),len_array.std(),len_array.max(),len_array.min())
plt.hist(len_array,100)

In [None]:
nb_repeated_end_of_sequence=0
len_1=0
for user in list_users:
  if len(user['pos_id'])==1:
    len_1+=1
    if user['pos_id'][-1]==user['pos_id_target'][-1]:
      nb_repeated_end_of_sequence+=1
print(nb_repeated_end_of_sequence/len_1)

In [None]:
nb_repeated_end_of_sequence=0
len_1=0
for user in list_users:
  if len(user['pos_id'])==1:
    len_1+=1
    if user['pos_id'][-1]==user['pos_id_target'][-1]:
      nb_repeated_end_of_sequence+=1
print(nb_repeated_end_of_sequence/len_1)

# Dataset

## Reproducibility seed

In [None]:
import hashlib
import string
import random
def get_reproducible_seeds(name="ProjectLong",nb_seeds=100):
    # Calculate SHA-256 hash
    sha256_hash = hashlib.sha256(name.encode()).hexdigest()
    # Define character sets
    digits = string.digits
    # Use the hash to seed the random number generator
    hash_as_int = int(sha256_hash, 16)
    random.seed(hash_as_int)
    # Generate a random list of seed of desired length
    reproducibility_seeds = [random.randint(0,10000) for _ in range(nb_seeds)]

    return reproducibility_seeds

In [None]:
reproducibility_seed=get_reproducible_seeds()[0]

## Dataset

In [None]:
from torch.utils.data import Dataset, DataLoader
class VariableLengthDatasetWithPosID(Dataset):
    def __init__(self, time_series, transform=None):
        self.times_series=time_series
    def __len__(self):
        return len(self.times_series)
    def __getitem__(self, idx):
        user_dict=self.times_series[idx]
        return  user_dict

def create_dataset(list_users,split=[0.8,0.1,0.1]):
  dataset=VariableLengthDatasetWithPosID(list_users)
  generator = torch.Generator().manual_seed(reproducibility_seed)
  dataset_list=torch.utils.data.random_split(dataset,[0.8,0.1,0.1],generator)
  return dataset_list


## Dataloader

In [None]:
def collate_fn_padd(batch_dict):
    '''
    Padds batch of variable length

    note: it converts things ToTensor manually here since the ToTensor transform
    assume it takes in images rather than arbitrary tensors.
    '''


    dict_batch={key: [d[key] for d in batch_dict] for key in batch_dict[0]}
    dict_batch["lengths"] = torch.tensor([ user["input"].shape[0] for user in batch_dict ])
    if "input" in dict_batch:
      dict_batch["input"] = torch.nn.utils.rnn.pad_sequence(dict_batch["input"],batch_first=True,padding_value=0)
    dict_batch["month"] = torch.nn.utils.rnn.pad_sequence(dict_batch["month"],batch_first=True,padding_value=0)
    dict_batch["day"] = torch.nn.utils.rnn.pad_sequence(dict_batch["day"],batch_first=True,padding_value=0)
    dict_batch["hour"] = torch.nn.utils.rnn.pad_sequence(dict_batch["hour"],batch_first=True,padding_value=24)
    dict_batch["minute"] = torch.nn.utils.rnn.pad_sequence(dict_batch["minute"],batch_first=True,padding_value=60)
    dict_batch["second"] = torch.nn.utils.rnn.pad_sequence(dict_batch["second"],batch_first=True,padding_value=60)

    dict_batch["time_target"] = torch.nn.utils.rnn.pad_sequence(dict_batch["time_target"],batch_first=True,padding_value=-1)
    dict_batch["pos_id"] = torch.nn.utils.rnn.pad_sequence(dict_batch["pos_id"],batch_first=True,padding_value=len(vocab))
    dict_batch["pos_id_target"] = torch.nn.utils.rnn.pad_sequence(dict_batch["pos_id_target"],batch_first=True,padding_value=len(vocab))
    #print(dict_batch["input"])
    return dict_batch

## Instanciate

In [None]:
dataset_list=create_dataset(list_users)
train_dataset=dataset_list[0]
valid_dataset=dataset_list[1]
test_dataset=dataset_list[2]
train_dataloader=DataLoader(train_dataset,batch_size=128,collate_fn=collate_fn_padd,shuffle=True)
valid_dataloader=DataLoader(valid_dataset,batch_size=256,collate_fn=collate_fn_padd,shuffle=False)
test_dataloader=DataLoader(test_dataset,batch_size=256,collate_fn=collate_fn_padd,shuffle=False)

# Model

## Transformer Encoder followed by LSTM

### transformer

In [None]:
def get_mask(bath_size,sequence_length,lengths,device):
  mask=torch.zeros(bath_size,sequence_length).to(device)
  for i, length in enumerate(lengths):
    mask[i,length:]=float('-inf')
  return mask

#### Positional encoding

In [None]:
from torch import nn, Tensor
class VanillaPositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 500):
        super().__init__()

        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = (x.transpose(0,1) + self.pe[:x.transpose(0,1).size(0)]).transpose(0,1)
        return self.dropout(x)

In [None]:
class LearnablePositionalEncoding(nn.Module):

    def __init__(self,d_model: int, dropout: float = 0.1, max_len: int = 500):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.positional_embedding=nn.Embedding(num_embeddings=max_len,embedding_dim= d_model)
    @property
    def device(self):
      return next(self.parameters()).device
    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size,seq_len, embedding_dim]``
        """
        x += self.positional_embedding(torch.arange(0,x.shape[1]).to(self.device))
        return self.dropout(x)

In [None]:
def get_PositionalEncoding(d_model: int, dropout: float = 0.1, max_len: int = 2000, learnable=False):
  if learnable:
    return LearnablePositionalEncoding(d_model, dropout, max_len)
  else:
    return VanillaPositionalEncoding(d_model, dropout, max_len)

#### model

In [None]:
class Encoder_Decoder_Transformer(nn.Module):
    def __init__(self,d_model,num_layers=3,nhead=10,dropout=0.1,batch_first=True):
      super().__init__()
      self.transformer=torch.nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers,  dropout=dropout, batch_first=batch_first)
    def forward(self,x,mask,src_key_padding_mask,is_causal):
      return self.transformer(x,
                       x,
                       src_mask=mask,
                       tgt_mask=mask,
                       memory_mask=mask,
                       src_key_padding_mask=src_key_padding_mask,
                       tgt_key_padding_mask=src_key_padding_mask,
                       memory_key_padding_mask=src_key_padding_mask,
                       src_is_causal=is_causal,
                       tgt_is_causal=is_causal,
                       memory_is_causal=is_causal)



def get_Transformer_architecture(d_model,encoder_only=False,num_layers=3,nhead=10,dropout=0.1,batch_first=True):
  if encoder_only:
    encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead,batch_first=batch_first)
    return nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
  else:
    return Encoder_Decoder_Transformer(d_model,num_layers,nhead,dropout,batch_first=batch_first)

### feature embedding

In [None]:
class TimeStampEmbedding(nn.Module):
  def __init__(self,embedding_dim,dropout):
    super().__init__()
    self.dropout = nn.Dropout(p=dropout)
    self.month_embedding = nn.Embedding(num_embeddings=13,embedding_dim=embedding_dim)
    self.day_embedding = nn.Embedding(num_embeddings=32,embedding_dim=embedding_dim)
    self.hour_embedding = nn.Embedding(num_embeddings=25,embedding_dim=embedding_dim)
    self.minute_embedding = nn.Embedding(num_embeddings=61,embedding_dim=embedding_dim)
    self.second_embedding = nn.Embedding(num_embeddings=61,embedding_dim=embedding_dim)

  def forward(self,dict_batch):
    embedding= self.month_embedding(dict_batch['month'])
    embedding=+ self.day_embedding(dict_batch['day'])
    embedding=+ self.hour_embedding(dict_batch['hour'])
    embedding=+ self.minute_embedding(dict_batch['minute'])
    embedding=+ self.second_embedding(dict_batch['second'])
    return self.dropout(embedding)
class StationIdEmbedding(nn.Module):
  def __init__(self,embedding_dim,nb_of_pos_ids,dropout):
    super().__init__()
    self.dropout = nn.Dropout(p=dropout)
    self.stationIdEmbedding=nn.Embedding(num_embeddings=nb_of_pos_ids,embedding_dim=embedding_dim)
  def forward(self,dict_batch):
    embedding=self.stationIdEmbedding(dict_batch["pos_id"])
    return self.dropout(embedding)

In [None]:
class StationIdEmbedding(nn.Module):
  def __init__(self,embedding_dim,nb_of_pos_ids,dropout):
    super().__init__()
    self.dropout = nn.Dropout(p=dropout)
    self.stationIdEmbedding=nn.Embedding(num_embeddings=nb_of_pos_ids,embedding_dim=embedding_dim)
  def forward(self,dict_batch):
    embedding=self.stationIdEmbedding(dict_batch["pos_id"])
    return self.dropout(embedding)


#### graph_deepLearning

In [None]:
!pip install libpysal

Collecting libpysal
  Downloading libpysal-4.10-py3-none-any.whl (2.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: libpysal
Successfully installed libpysal-4.10


In [None]:
import os
import torch
if 'IS_GRADESCOPE_ENV' not in os.environ:
  torch_version = str(torch.__version__)
  scatter_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
  sparse_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
  !pip install torch-scatter -f $scatter_src
  !pip install torch-sparse -f $sparse_src
  !pip install torch-geometric
  !pip install ogb

Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_scatter-2.1.2%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt21cu121
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_sparse-0.6.18%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt21cu121
Collecting torch-geometric
  Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K     [9

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
from libpysal.cg import voronoi_frames
from libpysal import weights, examples
from torch_geometric.utils import from_networkx
from torch_geometric.nn import GCNConv
import numpy as np

def get_net(vocab):
  x_array=[key[0] for key in vocab]
  y_array=[key[1] for key in vocab]
  coordinates=np.column_stack((x_array,y_array))
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
  delaunay_graph = delaunay.to_networkx()
  positions = dict(zip(delaunay_graph.nodes, coordinates))
  nx.set_node_attributes(delaunay_graph,positions,"coordinates")
  distance=np.linalg.norm(np.concatenate([delaunay_graph.nodes[index[0]]["coordinates"][None,:] for index in delaunay_graph.edges], axis=0)-np.concatenate([delaunay_graph.nodes[index[1]]["coordinates"][None,:] for index in delaunay_graph.edges], axis=0), axis=1)
  nx.set_edge_attributes(delaunay_graph,dict(zip(delaunay_graph.edges,distance)),"distance")
  net=from_networkx(delaunay_graph)
  return net

class GCN(nn.Module):
  def __init__(self, hidden_dim1, hidden_dim2, output_dim,vocab,dropout,device):
    super(GCN, self).__init__()
    net=get_net(vocab)
    self.dropout = nn.Dropout(p=dropout)
    self.edge_index=edge_index = net.edge_index.long().to(device)
    self.distance= net.distance.float().to(device)
    self.coordinates=net.coordinates.float().to(device)
    mean_distance=self.distance.mean()
    std_distance=self.distance.std()
    self.distance=(((self.distance-mean_distance)/std_distance)+1)/2

    mean_coordinates=self.coordinates.mean(dim=0)
    std_coordinates=self.coordinates.std(dim=0)
    self.coordinates=(self.coordinates-mean_coordinates.unsqueeze(0))/std_coordinates.unsqueeze(0)
    self.conv1 = GCNConv(2, hidden_dim1)
    self.conv2 = GCNConv(hidden_dim1, hidden_dim2)
    self.conv3 = GCNConv(hidden_dim2, output_dim)

  @property
  def device(self):
    return next(self.parameters()).device

  def forward(self, dic_batch):
    x = self.conv1(self.coordinates, self.edge_index,self.distance)
    x = F.relu(x)
    x = F.dropout(x, p=0.5, training=self.training)

    x = self.conv2(x, self.edge_index,self.distance)
    x = F.relu(x)
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv3(x, self.edge_index,self.distance)
    x=torch.cat((x,torch.zeros(1,x.shape[1]).to(self.device)),dim=0)
    embedding=x[dic_batch["pos_id"]]
    return self.dropout(embedding)


 #### Combine feature embeddng

In [None]:
class Feature_embedding(nn.Module):

  def __init__(self,d_model,nb_of_pos_ids,use_gcn,vocab,hidden_dim1, hidden_dim2,batch_first,concatenate_features,keep_input_positions,dropout,device):
    super().__init__()
    self.num_features=2+use_gcn
    self.concatenate_features=concatenate_features
    self.embedding_dim=d_model
    self.keep_input_positions=keep_input_positions
    if keep_input_positions:
      self.embedding_dim=self.embedding_dim-2
    if self.concatenate_features:
      self.embedding_dim=int(self.embedding_dim/self.num_features)

    list_feature_embedding=[StationIdEmbedding(self.embedding_dim,nb_of_pos_ids,dropout),TimeStampEmbedding(self.embedding_dim,dropout)]
    if use_gcn:
      list_feature_embedding.append(GCN(hidden_dim1, hidden_dim2, self.embedding_dim, vocab, dropout,device))
    self.list_feature_embedding=nn.ModuleList(list_feature_embedding)

  @property
  def device(self):
    return next(self.parameters()).device

  def forward(self,dic_batch):
    if self.concatenate_features:
      list_embeddings=[]
      for feature_emebdding in self.list_feature_embedding:
        list_embeddings.append(feature_emebdding(dic_batch))
      embedding=torch.cat(list_embeddings,dim=2)
    else:
      embedding=torch.zeros(*dic_batch["pos_id"].shape,self.embedding_dim).to(self.device)
      for feature_emebdding in self.list_feature_embedding:
        embedding+=feature_emebdding(dic_batch)
    if self.keep_input_positions:
      embedding=torch.cat((dic_batch["input"],embedding),dim=2)
    return embedding

### model

In [None]:
from torch import nn
from torch.nn import Embedding, LSTM



class MLP(nn.Module):
  def __init__(self,d_model):
    super().__init__()
    self.dim_perceptron=2*d_model
    self.linear_perceptron_in=nn.Linear(d_model,self.dim_perceptron)
    self.linear_perceptron_out=nn.Linear(self.dim_perceptron,d_model)

  def forward(self,x):
    return self.linear_perceptron_out(F.relu(self.linear_perceptron_in(x)))


class Transformer_LSTM_Layer(nn.Module):
  def __init__(self,d_model,output_regression_size,output_classfication_size,num_layers,lstm_layer_with_perceptron,lstm_layer_with_layer_norm,dropout,batch_first):
    super().__init__()

    self.lstm=LSTM(input_size=d_model, hidden_size=d_model,batch_first=batch_first,num_layers=1,dropout=dropout)
    self.lstm_layer_with_perceptron=lstm_layer_with_perceptron
    self.lstm_layer_with_layer_norm=lstm_layer_with_layer_norm
    if self.lstm_layer_with_layer_norm:
      self.layer_normalisation=torch.nn.LayerNorm(d_model)
    if self.lstm_layer_with_perceptron:
      self.mlp=MLP(d_model)
    self.dropout=nn.Dropout(p=dropout)

  def forward(self,x,batch_sizes,sorted_indices,unsorted_indices,lengths):
    x=self.lstm(x)[0].data+x.data
    x=torch.torch.nn.utils.rnn.PackedSequence(x, batch_sizes, sorted_indices, unsorted_indices)
    if self.lstm_layer_with_layer_norm:
      x,_=torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, padding_value=0.0)
      x=self.layer_normalisation(x)
      x=self.dropout(x)
      x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths,batch_first=True, enforce_sorted=False)
    if self.lstm_layer_with_perceptron:
      x=x.data
      x=self.mlp(x)+x
      x=torch.torch.nn.utils.rnn.PackedSequence(x, batch_sizes, sorted_indices, unsorted_indices)
      if self.layer_normalisation:
        x,_=torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, padding_value=0.0)
        x=self.layer_normalisation(x)
        x=self.dropout(x)
        x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths,batch_first=True, enforce_sorted=False)
    return x






class  Transformer_encoder_LSTM_decoder(nn.Module):
  def __init__(self,d_model,nb_of_pos_ids,output_regression_size,output_classfication_size,num_layers_lstm,lstm_layer_with_perceptron,lstm_layer_with_layer_norm,num_layers_transformer,encoder_only,nhead,learnable_pos_encoding,new_station_binary_classification,use_gcn,vocab,hidden_dim1, hidden_dim2,max_len,dropout,batch_first,concatenate_features,keep_input_positions,device):
    super().__init__()
    self.dropout=nn.Dropout(p=dropout)
    self.layer_normalisation=torch.nn.LayerNorm(d_model)
    self.feature_embedding=Feature_embedding(d_model,nb_of_pos_ids,use_gcn,vocab,hidden_dim1, hidden_dim2,batch_first,concatenate_features,keep_input_positions,dropout,device)

    self.num_layers_transformer=num_layers_transformer
    if num_layers_transformer>0:
      self.pos_encoder = get_PositionalEncoding(d_model, dropout, max_len,learnable_pos_encoding)
      self.transformer_model=get_Transformer_architecture(d_model,encoder_only,num_layers_transformer,nhead,dropout,batch_first)

    self.num_layers_lstm=num_layers_lstm
    if num_layers_lstm>0:
      self.transformer_lstm__list = nn.ModuleList([Transformer_LSTM_Layer(d_model,output_regression_size,output_classfication_size,num_layers_lstm,lstm_layer_with_perceptron,lstm_layer_with_layer_norm,dropout,batch_first) for layer in range(num_layers_lstm)])
    self.linear_reg=nn.Linear(d_model,output_regression_size)
    self.classifier=nn.Linear(d_model,output_classfication_size)

    self.new_station_binary_classification=new_station_binary_classification
    if self.new_station_binary_classification:
      self.binary_classifier=nn.Linear(d_model,1)

  @property
  def device(self):
    return next(self.parameters()).device


  def forward(self,dic_batch,reg):
    if self.num_layers_transformer>0:
      x=self.feature_embedding(dic_batch)
      x=self.pos_encoder(x)
      with torch.no_grad():
        mask_x = get_mask(x.shape[0],x.shape[1],dic_batch["lengths"],self.device)
        causal_mask=torch.nn.Transformer.generate_square_subsequent_mask(x.shape[1],device=self.device)
      x=self.transformer_model(x,causal_mask,mask_x,is_causal=True)
    if self.num_layers_lstm>0:
      if self.num_layers_transformer>0:
        x+=self.feature_embedding(dic_batch)
      else:
        x=self.feature_embedding(dic_batch)

    x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=dic_batch["lengths"],batch_first=True, enforce_sorted=False)
    batch_sizes=x.batch_sizes
    sorted_indices=x.sorted_indices
    unsorted_indices=x.unsorted_indices
    if self.num_layers_lstm>0:
      for transformer_lstm in self.transformer_lstm__list:
        x=transformer_lstm(x,batch_sizes,sorted_indices,unsorted_indices,dic_batch["lengths"])
    x=F.relu(x.data)
    out={}
    out["next_station"]=torch.nn.utils.rnn.PackedSequence(self.classifier(x), batch_sizes, sorted_indices, unsorted_indices)
    if reg:
      out["time_regression"]=torch.nn.utils.rnn.PackedSequence(torch.exp(self.linear_reg(x)), batch_sizes, sorted_indices, unsorted_indices)
    if self.new_station_binary_classification:
      out["new_station"]=  torch.nn.utils.rnn.PackedSequence( self.binary_classifier(x), batch_sizes, sorted_indices, unsorted_indices)
    return out



## Baselines

In [None]:
from torch import nn
import torch.nn.functional as F
class  Baseline_model(nn.Module):
  def __init__(self,nb_of_pos_ids):
    super().__init__()
    self.nb_of_pos_ids=nb_of_pos_ids
  def forward(self,dic_batch,reg):
    out={}
    out["next_station"]=  torch.nn.utils.rnn.pack_padded_sequence(F.one_hot(dic_batch["pos_id"],self.nb_of_pos_ids).float(), lengths=dic_batch["lengths"],batch_first=True, enforce_sorted=False)
    return out

In [None]:
model=Baseline_model(len(vocab)+1)
criterion=Total_loss(False)
evaluate(model,valid_dataloader,criterion,device,reg=False)

TypeError: Total_loss.__init__() takes 1 positional argument but 2 were given

# Trainning

In [None]:
# @title loss
from torch import nn
class Loss_next_station_classification(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))

  def forward(self, out, target_pos_ids, index_training_element):
    loss_classification=self.criterion(out.data[index_training_element],target_pos_ids.data[index_training_element])
    return loss_classification

class Loss_time_regression(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion = nn.MSELoss(reduction='none')
  def forward(self,out,dict_batch):
    time_targets=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["time_target"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
    mask_time_targets = (time_targets.data != -1)
    loss_regression=self.criterion(out.data,time_targets.data)
    loss_regression = (loss_regression * mask_time_targets.float()).mean()
    return loss_regression

class Loss_new_station_binary_classification(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion =  nn.BCEWithLogitsLoss()
  def forward(self,out,target):
    loss_classification=self.criterion(out.data.squeeze(),target.float())
    return loss_classification

def get_repetition_labels(target_pos_ids,pos_ids):

  return (target_pos_ids.data==pos_ids.data).type(torch.LongTensor)

def upsampling_strategy(target, epoch, epochs_new_station_only,pourcentage_of_repeat_training_elment):

    index_non_repeat =(target==0).nonzero()
    coeff=pourcentage_of_repeat_training_elment/(1-pourcentage_of_repeat_training_elment)
    index_for_training= index_non_repeat
    if epoch>= epochs_new_station_only:
      index_repeat = target.nonzero().squeeze()
      nb_non_repeat= index_non_repeat.shape[0]
      slice_repeat=index_repeat[torch.randperm(index_repeat.shape[0])[:int(coeff*nb_non_repeat)]].squeeze()
      index_for_training = torch.cat((index_non_repeat.squeeze(),slice_repeat))
    return index_for_training.squeeze()


class Total_loss(nn.Module):
  def __init__(self,new_station_binary_classification) -> None:
    super().__init__()
    self.loss_next_station_classification = Loss_next_station_classification()
    self.loss_time_regression = Loss_time_regression()
    self.new_station_binary_classification=new_station_binary_classification
    if self.new_station_binary_classification:
      self.loss_new_station_binary_classification=Loss_new_station_binary_classification()

  def forward(self, out, dict_batch, upsampling,upsampling_strategy, reg=False):
    loss={}
    target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["pos_id_target"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
    if self.new_station_binary_classification or upsampling:
      pos_ids=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["pos_id"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
      target=get_repetition_labels(target_pos_ids,pos_ids)
    else:
      pos_ids=None
      target=None

    if upsampling:
      index_training_element=upsampling_strategy(target)
    else:
      index_training_element=torch.arange(0,target_pos_ids.data.shape[0])

    loss["classification"]=self.loss_next_station_classification(out["next_station"],target_pos_ids,index_training_element)
    loss["total"]=loss["classification"]
    if self.new_station_binary_classification:
      loss["new_station"]=self.loss_new_station_binary_classification(out["new_station"],target)
      loss["total"]+=loss["new_station"]

    if reg:
      loss["time_regression"]=self.loss_time_regression(out["time_regression"],dict_batch)
      loss["total"]+=loss["time_regression"]

    return loss





In [None]:
# @title evaluation
from torch import autocast
def evaluate(model,dataloader,upsampling,criterion,device,reg=True):
  model.eval()
  with torch.no_grad():
    acc=0
    nb_points=0
    valid_results={}
    for dict_batch in dataloader:
      for key in dict_batch:
        if key!="lengths":
          dict_batch[key]=dict_batch[key].to(device)
      with autocast(device_type=device.type):
        out=model(dict_batch,reg=reg)
        valid_result=criterion(out,dict_batch,upsampling,None,reg=reg)
        valid_results=get_sum_valid_results(valid_results,valid_result)
        target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["pos_id_target"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
        acc+=(out["next_station"].data.argmax(dim=1)==target_pos_ids.data).sum().item()
        nb_points+=out["next_station"].data.shape[0]
    valid_results=get_mean_valid_results(valid_results,nb_points)
    valid_results["acc"]=acc/nb_points

    return valid_results

In [None]:
# @title training
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch import autocast
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
def train(
          epochs_classifcation_only,
          epochs_complete_problem,
          input_size,
          num_heads,
          d_model,
          nb_of_pos_ids,
          num_layers_lstm,
          lstm_layer_with_perceptron,
          lstm_layer_with_layer_norm,
          num_layers_transformer,
          encoder_only,
          output_regression_size,
          output_classfication_size,
          nb_batchs,
          dropout,
          max_len,
          weight_decay,
          lr,
          learnable_pos_encoding,
          new_station_binary_classification,
          use_gcn,
          vocab,hidden_dim1, hidden_dim2,
          batch_first,
          concatenate_features,
          keep_input_positions,
          upsampling,
          upsampling_strategy,
          epochs_new_station_only,
          pourcentage_of_repeat_training_elment,
          save_best_model,
          path_best_model,
          batch_size,
          device):

  epochs=epochs_complete_problem+ epochs_classifcation_only
  model=Transformer_encoder_LSTM_decoder(d_model=d_model,
                                         nb_of_pos_ids=nb_of_pos_ids,
                                         output_regression_size=output_regression_size,
                                         output_classfication_size=output_classfication_size,
                                         num_layers_lstm=num_layers_lstm,
                                         lstm_layer_with_perceptron=lstm_layer_with_perceptron,
                                         lstm_layer_with_layer_norm=lstm_layer_with_perceptron,
                                         num_layers_transformer=num_layers_transformer,
                                         encoder_only=encoder_only,
                                         nhead=num_heads,
                                         learnable_pos_encoding=learnable_pos_encoding,
                                         new_station_binary_classification=new_station_binary_classification,
                                         use_gcn=use_gcn,
                                         vocab=vocab,
                                         hidden_dim1=hidden_dim1,
                                         hidden_dim2=hidden_dim2,
                                         max_len=max_len,
                                         dropout=dropout,
                                         batch_first = batch_first,
                                         concatenate_features = concatenate_features,
                                         keep_input_positions = keep_input_positions,device=device
                                         ).to(device)
  if save_best_model:
    os.makedirs(path_best_model,exist_ok =True)
  optimizer_encoder = optim.Adam( model.parameters(), lr=lr, weight_decay=weight_decay)
  criterion = Total_loss( new_station_binary_classification = new_station_binary_classification)
  train_losses, valid_results = {},{}
  best_results={}
  for epoch in range(epochs):
    reg=epoch >= epochs_classifcation_only
    epoch_losses={}
    model.train()
    i=0
    for dict_batch in train_dataloader:
      optimizer_encoder.zero_grad()
      i+=1
      if i>=nb_batchs:
        break
      dict_batch=set_dic_to(dict_batch,device)
      with autocast(device_type=device.type):
        out=model(dict_batch, reg)
        loss=criterion(out, dict_batch,upsampling,lambda target: upsampling_strategy(target,epoch,epochs_new_station_only,pourcentage_of_repeat_training_elment) ,reg)
        loss["total"].backward()
        optimizer_encoder.step()
      epoch_losses=update_epoch_losses(epoch_losses,loss)
      dict_batch.clear()
      loss.clear()
      out.clear()
      del out, loss,dict_batch
    epoch_loss=get_epoch_loss(epoch_losses,batch_size)
    train_losses=update_train_losses(train_losses,epoch_loss,epoch)
    valid_result = evaluate(model,valid_dataloader,upsampling,criterion,device)
    best_results = update_best(model,valid_result,best_results,save_best_model,path_best_model)
    valid_results = update_valid_results(valid_results,valid_result)
    print_results(epoch_loss,valid_result,epoch)

  return best_results

In [None]:
# @title utils

def set_dic_to(dict_batch,device):
  for key in dict_batch:
    if key!="lengths":
      dict_batch[key]=dict_batch[key].to(device)
  return dict_batch

def is_better(valid_result,best_result,key):
  match key:
    case "acc":
      return valid_result>best_result
    case _:
      return valid_result<best_result

def update_best(model,valid_result,best_results,save_best_model,path_best_model):
  if best_results:
    for key in valid_result:
      if is_better(valid_result[key],best_results[key],key):
        best_results[key]=valid_result[key]
        if save_best_model:
          save_model(model,path_best_model,key)
  else:
    for key in valid_result:
      best_results[key]=valid_result[key]
      if save_best_model:
        save_model(model,path_best_model,key)
  return best_results

def save_model(model,path_best_model,key):
  path=os.path.join(path_best_model,key)
  torch.save(model.state_dict(), path+".pth")

def get_sum_valid_results(valid_result,valid_result_batch):
  if valid_result:
    for key in valid_result_batch:
      valid_result[key]+=valid_result_batch[key].item()
  else:
    for key in valid_result_batch:
      valid_result[key]=valid_result_batch[key].item()
  return valid_result

def get_mean_valid_results(sum_valid_result,nb_element):
  for key in sum_valid_result:
    sum_valid_result[key]/=nb_element

  return sum_valid_result

def update_epoch_losses(dict_of_list,dic):
  if dict_of_list:
    for key in dic:
      dict_of_list[key].append(dic[key].item())
  else:
    for key in dic:
      dict_of_list[key]=[dic[key].item()]
  return dict_of_list

def update_valid_results(dict_of_list,dic):
  if dict_of_list:
    for key in dic:
      dict_of_list[key].append(dic[key])
  else:
    for key in dic:
      dict_of_list[key]=[dic[key]]
  return dict_of_list

def get_epoch_loss(epoch_losses,batch_size):

  epoch_loss={}
  for key in epoch_losses:
    epoch_loss[key]=np.array(epoch_losses[key]).mean()/batch_size
  return epoch_loss

def print_results(epoch_loss,valid_result,epoch):

  print("\nepoch: ",epoch)
  print("train :", end="\t")
  for key in epoch_loss:
    print(key,epoch_loss[key], end="\t")
  print("\nvalid :", end="\t")
  for key in valid_result:
    print(key,valid_result[key], end="\t")

def update_train_losses(train_losses,epoch_loss,epoch):

  if train_losses:
    for key in epoch_loss:
      if key in train_losses:
        train_losses[key].append(epoch_loss[key])
      else:
        train_losses[key]=[float('nan')]*(epoch+1)+[epoch_loss[key]]
  else:
    for key in epoch_loss:
      train_losses[key]=[epoch_loss[key]]
  return train_losses

## Instance of training

In [None]:
# @title Titre par défaut
model=train(
          epochs_classifcation_only=100,
          epochs_complete_problem =100,
          input_size=2,
          num_heads=12,
          d_model=1200,
          nb_of_pos_ids=len(vocab)+1,
          num_layers_lstm=6,
          lstm_layer_with_perceptron=False,
          lstm_layer_with_layer_norm=False,
          num_layers_transformer=6,
          encoder_only=True,
          output_regression_size=2,
          output_classfication_size=len(vocab)+1,
          nb_batchs=100,
          dropout=0.1,
          max_len=100,
          weight_decay=0,
          lr=3e-4,
          learnable_pos_encoding=True,
          new_station_binary_classification=False,
          use_gcn=False,
          vocab=vocab, hidden_dim1=128, hidden_dim2=256,
          batch_first= True,
          concatenate_features = False,
          keep_input_positions = False,
          upsampling=False,
          upsampling_strategy=upsampling_strategy,
          epochs_new_station_only=0,
          pourcentage_of_repeat_training_elment=0.1,
          save_best_model=True,
          path_best_model="test_0.5",
          device=device,
          batch_size=64
          )

# hyperparameter tuning

##model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
from libpysal.cg import voronoi_frames
from libpysal import weights, examples
from torch_geometric.utils import from_networkx
from torch_geometric.nn.models import GCN,GAT,GraphSAGE
import numpy as np

def get_net(vocab):
  x_array=[key[0] for key in vocab]
  y_array=[key[1] for key in vocab]
  coordinates=np.column_stack((x_array,y_array))
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
  delaunay_graph = delaunay.to_networkx()
  positions = dict(zip(delaunay_graph.nodes, coordinates))
  nx.set_node_attributes(delaunay_graph,positions,"coordinates")
  distance=np.linalg.norm(np.concatenate([delaunay_graph.nodes[index[0]]["coordinates"][None,:] for index in delaunay_graph.edges], axis=0)-np.concatenate([delaunay_graph.nodes[index[1]]["coordinates"][None,:] for index in delaunay_graph.edges], axis=0), axis=1)
  nx.set_edge_attributes(delaunay_graph,dict(zip(delaunay_graph.edges,distance)),"distance")
  net=from_networkx(delaunay_graph)
  return net
def get_layer(layer_type):
  print(layer_type)
  match layer_type:
    case "GraphSAGE":
      return GraphSAGE
    case "GCNConv":
      return GCN
    case "GAT":
      return GAT

class GCN_embedding(nn.Module):
  def __init__(self,output_dim,layer_type,num_layers_gcn,hidden_channels,activation_gcn,norm,net,device,normalize_features_independantly,dropout):
    super(GCN_embedding, self).__init__()
    self.normalize_features_independantly=normalize_features_independantly
    if self.normalize_features_independantly:
      self.layer_normalisation=torch.nn.LayerNorm(output_dim)
    self.edge_index=edge_index = net.edge_index.long().to(device)
    self.distance= net.distance.float().to(device)
    self.coordinates=net.coordinates.float().to(device)
    mean_distance=self.distance.mean()
    std_distance=self.distance.std()
    self.distance=(((self.distance-mean_distance)/std_distance)+1)/2

    mean_coordinates=self.coordinates.mean(dim=0)
    std_coordinates=self.coordinates.std(dim=0)
    self.coordinates=(self.coordinates-mean_coordinates.unsqueeze(0))/std_coordinates.unsqueeze(0)
    self.model=get_layer(layer_type)(in_channels=2, out_channels=output_dim, act=activation_gcn, norm=norm, num_layers=num_layers_gcn, hidden_channels=hidden_channels,dropout=dropout)
  @property
  def device(self):
    return next(self.parameters()).device

  def forward(self, dic_batch):
    x = self.model(self.coordinates,self.edge_index,self.distance)
    x=torch.cat((x,torch.zeros(1,x.shape[1]).to(self.device)),dim=0)
    embedding=x[dic_batch["pos_id"]]
    if self.normalize_features_independantly:
      embedding=self.layer_normalisation(embedding)
    return embedding

In [None]:
class TimeStampEmbedding(nn.Module):
  def __init__(self,embedding_dim,normalize_features_independantly):
    super().__init__()
    #self.dropout = nn.Dropout(p=dropout)
    self.month_embedding = nn.Embedding(num_embeddings=13,embedding_dim=embedding_dim)
    self.day_embedding = nn.Embedding(num_embeddings=32,embedding_dim=embedding_dim)
    self.hour_embedding = nn.Embedding(num_embeddings=25,embedding_dim=embedding_dim)
    self.minute_embedding = nn.Embedding(num_embeddings=61,embedding_dim=embedding_dim)
    self.second_embedding = nn.Embedding(num_embeddings=61,embedding_dim=embedding_dim)
    self.normalize_features_independantly=normalize_features_independantly
    if self.normalize_features_independantly:
      self.layer_normalisation=torch.nn.LayerNorm(embedding_dim)
  def forward(self,dict_batch):
    embedding= self.month_embedding(dict_batch['month'])
    embedding=+ self.day_embedding(dict_batch['day'])
    embedding=+ self.hour_embedding(dict_batch['hour'])
    embedding=+ self.minute_embedding(dict_batch['minute'])
    embedding=+ self.second_embedding(dict_batch['second'])
    if self.normalize_features_independantly:
      embedding = self.layer_normalisation(embedding)
    return embedding
class StationIdEmbedding(nn.Module):
  def __init__(self,embedding_dim,nb_of_pos_ids,normalize_features_independantly):
    super().__init__()
    self.normalize_features_independantly=normalize_features_independantly
    if self.normalize_features_independantly:
      self.layer_normalisation=torch.nn.LayerNorm(embedding_dim)
    self.stationIdEmbedding=nn.Embedding(num_embeddings=nb_of_pos_ids,embedding_dim=embedding_dim)
  def forward(self,dict_batch):
    embedding=self.stationIdEmbedding(dict_batch["pos_id"])
    if self.normalize_features_independantly:
      embedding = self.layer_normalisation(embedding)
    return embedding

In [None]:

class Feature_embedding(nn.Module):

  def __init__(self,config,net,device):
    super().__init__()
    self.num_features=2+config["use_gcn"]
    self.concatenate_features=config["concatenate_features"]
    self.embedding_dim=config["d_model"]
    if self.concatenate_features:
      self.embedding_dim=int(self.embedding_dim/self.num_features)

    list_feature_embedding=[StationIdEmbedding(self.embedding_dim,config["nb_of_pos_ids"],config["normalize_features_independantly"]),TimeStampEmbedding(self.embedding_dim,config["normalize_features_independantly"])]
    if config["use_gcn"]:
      list_feature_embedding.append(GCN_embedding( self.embedding_dim,config["layer_type"],config["num_layers_gcn"],config["hidden_channels"],config["activation_gcn"],config["norm"],net,device,config["normalize_features_independantly"],config['dropout']))
    self.list_feature_embedding=nn.ModuleList(list_feature_embedding)

  @property
  def device(self):
    return next(self.parameters()).device

  def forward(self,dic_batch):
    if self.concatenate_features:
      list_embeddings=[]
      for feature_emebdding in self.list_feature_embedding:
        list_embeddings.append(feature_emebdding(dic_batch))
      embedding=torch.cat(list_embeddings,dim=2)
    else:
      embedding=torch.zeros(*dic_batch["pos_id"].shape,self.embedding_dim).to(self.device)
      for feature_emebdding in self.list_feature_embedding:
        embedding+=feature_emebdding(dic_batch)

    return embedding


In [None]:
class Encoder_Decoder_Transformer(nn.Module):
    def __init__(self,d_model,num_layers_transformer,num_heads,dropout_transformers,activation_transformers,batch_first=True):
      super().__init__()
      self.transformer=torch.nn.Transformer(d_model=d_model, nhead=num_heads, num_encoder_layers=num_layers_transformer, num_decoder_layers=num_layers_transformer, dropout=dropout_transformers,activation=get_activation(activation_transformers), batch_first=batch_first)
    def forward(self,x,mask,src_key_padding_mask,is_causal):
      return self.transformer(x,
                       x,
                       src_mask=mask,
                       tgt_mask=mask,
                       memory_mask=mask,
                       src_key_padding_mask=src_key_padding_mask,
                       tgt_key_padding_mask=src_key_padding_mask,
                       memory_key_padding_mask=src_key_padding_mask,
                       src_is_causal=is_causal,
                       tgt_is_causal=is_causal,
                       memory_is_causal=is_causal)

def get_Transformer_architecture(d_model,encoder_only,num_layers_transformer,num_heads,dropout_transformers,activation_transformers,batch_first=True):
  if encoder_only:
    encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads,batch_first=batch_first,activation=get_activation(activation_transformers),dropout=dropout_transformers)
    return nn.TransformerEncoder(encoder_layer, num_layers=num_layers_transformer)
  else:
    return Encoder_Decoder_Transformer(d_model,num_layers_transformer,num_heads,dropout_transformers,activation_transformers,batch_first=batch_first)

In [None]:
# @title Model
from torch import nn
from torch.nn import Embedding, LSTM



class MLP(nn.Module):
  def __init__(self,d_model,activation_lstm):
    super().__init__()
    self.dim_perceptron=2*d_model
    self.linear_perceptron_in=nn.Linear(d_model,self.dim_perceptron)
    self.linear_perceptron_out=nn.Linear(self.dim_perceptron,d_model)
    self.activation=get_activation(activation_lstm)
  def forward(self,x):
    return self.linear_perceptron_out(self.activation(self.linear_perceptron_in(x)))


class Transformer_LSTM_Layer(nn.Module):
  def __init__(self,d_model,output_regression_size,output_classfication_size,num_layers,lstm_layer_with_perceptron,lstm_layer_with_layer_norm,dropout,activation_lstm,batch_first):
    super().__init__()

    self.lstm=LSTM(input_size=d_model, hidden_size=d_model,batch_first=batch_first,num_layers=1,dropout=dropout)
    self.lstm_layer_with_perceptron=lstm_layer_with_perceptron
    self.lstm_layer_with_layer_norm=lstm_layer_with_layer_norm
    if self.lstm_layer_with_layer_norm:
      self.layer_normalisation=torch.nn.LayerNorm(d_model)
    if self.lstm_layer_with_perceptron:
      self.mlp=MLP(d_model,activation_lstm)
    self.dropout=nn.Dropout(p=dropout)

  def forward(self,x,batch_sizes,sorted_indices,unsorted_indices,lengths):
    x=self.lstm(x)[0].data+x.data
    x=torch.torch.nn.utils.rnn.PackedSequence(x, batch_sizes, sorted_indices, unsorted_indices)
    if self.lstm_layer_with_layer_norm:
      x,_=torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, padding_value=0.0)
      x=self.layer_normalisation(x)
      x=self.dropout(x)
      x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths,batch_first=True, enforce_sorted=False)
    if self.lstm_layer_with_perceptron:
      x=x.data
      x=self.mlp(x)+x
      x=torch.torch.nn.utils.rnn.PackedSequence(x, batch_sizes, sorted_indices, unsorted_indices)
      if self.lstm_layer_with_layer_norm:
        x,_=torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, padding_value=0.0)
        x=self.layer_normalisation(x)
        x=self.dropout(x)
        x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths,batch_first=True, enforce_sorted=False)
    return x
class Abs(nn.Module):
  def __init__(self,):
    super().__init__()
  def forward(self,x):
    return torch.abs(x)

class Exp(nn.Module):
  def __init__(self,):
    super().__init__()
  def forward(self,x):
    return torch.exp(x)

class Sig(nn.Module):
  def __init__(self,):
    super().__init__()
  def forward(self,x):
    return torch.sigmoid(x)


def get_positive_function(config):
  match config["positive_function"]:
    case "relu":
      return nn.ReLU()
    case "abs":
      return Abs()
    case "exp":
      return Exp()
    case "sig":
      return Sig()


def get_activation(activation):
  match activation:
    case "ReLU":
      return nn.ReLU()
    case "Tanh":
      return nn.Tanh()
    case "LeakyReLU":
      return nn.LeakyReLU()
    case "SiLU":
      return nn.SiLU()
    case "GELU":
      return nn.GELU()
    case "ELU":
      return nn.ELU()
    case "Mish":
      return nn.Mish()
    case "ReLU6":
      return nn.ReLU6()
    case "PReLU":
      return nn.PReLU()
    case "SELU":
      return nn.SELU()
    case "CELU":
      return nn.CELU()
    case "Hardsigmoid":
      return nn.Hardsigmoid()
    case "Softplus":
      return nn.Softplus()
    case "Hardshrink":
      return nn.Hardshrink()
    case "Sigmoid":
      return nn.Sigmoid()
    case "Hardtanh":
      return nn.Hardtanh()
    case "Tanhshrink":
      return nn.Tanhshrink()
    case "RReLU":
      return nn.RReLU()
    case "Softshrink":
      return nn.Softshrink()
    case "Softsign":
      return nn.Softsign()
    case "LogSigmoid":
      return nn.LogSigmoid()
    case "Softmin":
      return nn.Softmin()
    case "Hardswish":
      return nn.Hardswish()

class  Transformer_encoder_LSTM_decoder(nn.Module):
  def __init__(self,config,net,device):
    super().__init__()
    self.dropout=nn.Dropout(p=config["dropout"])

    self.normalize_features_globally=config["normalize_features_globally"]
    if self.normalize_features_globally:
      self.global_layer_normalisation=torch.nn.LayerNorm(config["d_model"])
    self.feature_embedding=Feature_embedding(config,net,device)
    self.activation=get_activation(config["activation"])
    if config["reg"]:
      self.positive_function=get_positive_function(config)
    self.transformers_model=config["transformers_model"]
    if self.transformers_model>0:
      self.num_layers_transformer=config["num_layers_transformer"]
      self.pos_encoder = get_PositionalEncoding(config["d_model"], config["dropout_transformers"], 100,config["learnable_pos_encoding"])
      self.transformer_model=get_Transformer_architecture(config["d_model"],config["encoder_only"],config["num_layers_transformer"],config["num_heads"],config["dropout_transformers"],config["activation_transformers"],True,)

    self.lstm_model=config["lstm_model"]
    if self.lstm_model>0:
      self.num_layers_lstm=config["num_layers_lstm"]
      self.transformer_lstm__list = nn.ModuleList([Transformer_LSTM_Layer(config["d_model"],2,config["nb_of_pos_ids"],config["num_layers_lstm"],config["lstm_layer_with_perceptron"],config["lstm_layer_with_layer_norm"],config["dropout_lstm"],config["activation_lstm"],True) for layer in range(config["num_layers_lstm"])])
    self.linear_reg=nn.Linear(config["d_model"],2)
    self.classifier=nn.Linear(config["d_model"],config["nb_of_pos_ids"])

  @property
  def device(self):
    return next(self.parameters()).device


  def forward(self,dic_batch,reg):
    if self.transformers_model:
      x=self.feature_embedding(dic_batch)
      if self.normalize_features_globally:
        x= self.global_layer_normalisation(x)
      x=self.dropout(x)
      x=self.pos_encoder(x)
      with torch.no_grad():
        mask_x = get_mask(x.shape[0],x.shape[1],dic_batch["lengths"],self.device)
        causal_mask=torch.nn.Transformer.generate_square_subsequent_mask(x.shape[1],device=self.device)
      x=self.transformer_model(x,causal_mask,mask_x,is_causal=True)
    if self.lstm_model:
      if self.transformers_model:
        x+=self.feature_embedding(dic_batch)
      else:
        x=self.feature_embedding(dic_batch)
      if self.normalize_features_globally:
        x= self.global_layer_normalisation(x)
      x=self.dropout(x)
    x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=dic_batch["lengths"],batch_first=True, enforce_sorted=False)
    batch_sizes=x.batch_sizes
    sorted_indices=x.sorted_indices
    unsorted_indices=x.unsorted_indices
    if self.lstm_model>0:
      for transformer_lstm in self.transformer_lstm__list:
        x=transformer_lstm(x,batch_sizes,sorted_indices,unsorted_indices,dic_batch["lengths"])
    x=self.activation(x.data)
    out={}
    out["next_station"]=torch.nn.utils.rnn.PackedSequence(self.classifier(x), batch_sizes, sorted_indices, unsorted_indices)
    if reg:
      out["time_regression"]=torch.nn.utils.rnn.PackedSequence(self.positive_function(self.linear_reg(x)), batch_sizes, sorted_indices, unsorted_indices)
    return out

## training

In [None]:
from torch import autocast
def evaluate(model,dataloader,device,reg=False):
  model.eval()
  with torch.no_grad():
    acc=0
    nb_points=0
    valid_results={}
    for dict_batch in dataloader:
      for key in dict_batch:
        if key!="lengths":
          dict_batch[key]=dict_batch[key].to(device)
      with autocast(device_type=device.type):
        out=model(dict_batch,reg=reg)
        target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["pos_id_target"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
        acc+=(out["next_station"].data.argmax(dim=1)==target_pos_ids.data).sum().item()
        nb_points+=out["next_station"].data.shape[0]
    acc=acc/nb_points
    return acc

In [None]:
# @title loss
from torch import nn
class Loss_next_station_classification(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion=torch.nn.CrossEntropyLoss()

  def forward(self, out, target_pos_ids):
    loss_classification=self.criterion(out.data,target_pos_ids.data)
    return loss_classification

class Loss_time_regression(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion = nn.MSELoss(reduction='none')
  def forward(self,out,dict_batch):
    time_targets=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["time_target"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
    mask_time_targets = (time_targets.data != -1)
    loss_regression=self.criterion(out.data,time_targets.data)
    loss_regression = (loss_regression * mask_time_targets.float()).mean()
    return loss_regression

class Total_loss(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.loss_next_station_classification = Loss_next_station_classification()
    self.loss_time_regression = Loss_time_regression()

  def forward(self, out, dict_batch, reg=False):
    target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["pos_id_target"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
    loss_classification=self.loss_next_station_classification(out["next_station"],target_pos_ids)
    loss_total=loss_classification
    if reg:
      loss_time_regression=self.loss_time_regression(out["time_regression"],dict_batch)
      loss_total+=loss_time_regression
    return loss_total

In [None]:
# @title utils

def f_unpack_dict(dct):
    res = {}
    for (k, v) in dct.items():
        if isinstance(v, dict):
            res = {**res, **f_unpack_dict(v)}
        else:
            res[k] = v

    return res

def get_file_name(name,path="."):
  exist=True
  idx=0
  while exist:
    file_path=os.path.join(path,name+"_"+str(idx))
    exist=os.path.exists(file_path)
    idx+=1
  return file_path


def get_last_file_name(name,path="."):
  exist=True
  idx=0
  file_path=None
  while exist:
    last_file=file_path
    file_path=os.path.join(path,name+"_"+str(idx))
    exist=os.path.exists(file_path)
    idx+=1
  return last_file

def get_file_name_2(name,path="."):
  exist=True
  i=1
  for file_or_folder in os.listdir(path):
    if os.path.isfile(os.path.join(path,file_or_folder)) and file_or_folder.startswith(name):
        idx=file_or_folder.split("_")[-2]
        if idx.isdigit():
          i=max(i,int(idx)+1)
  return os.path.join(path,name+"_"+str(i))


## hyperopt

In [None]:
!pip install "ray[tune]"

Collecting ray[tune]
  Downloading ray-2.9.3-cp310-cp310-manylinux2014_x86_64.whl (64.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboardX>=1.9 (from ray[tune])
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorboardX, ray
Successfully installed ray-2.9.3 tensorboardX-2.6.2.2


In [None]:
import os
import torch.optim as optim
from ray import train, tune
from ray.tune.schedulers import ASHAScheduler,AsyncHyperBandScheduler
from ray.util.accelerators import NVIDIA_TESLA_V100
from hyperopt import hp,Trials
import ray
from ray.tune.search.hyperopt import HyperOptSearch

def get_model(config,net,device):
  return Transformer_encoder_LSTM_decoder(config,net=net,device=device
                                          ).to(device)

def update_best_acc(model,valid_acc,best_acc,nb_epochs_without_improvement):
    if valid_acc > best_acc :
      nb_epochs_without_improvement=0
      best_acc=valid_acc
    else:
      nb_epochs_without_improvement+=1
    return best_acc,nb_epochs_without_improvement
def get_LRScheduler(optimizer,config,epochs):
  match config["scheduler"]:
    case None:
      return None
    case "StepLR":
      return optim.lr_scheduler.StepLR(optimizer,step_size=config["step_size"],gamma=config["gamma"])
    case "ReduceLROnPlateau":
      return optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=config["factor"],patience=config["patience"],threshold=config["threshold"],cooldown=config["cooldown"])
    case "ExponentialLR":
      return optim.lr_scheduler.ExponentialLR(optimizer,gamma=config["gamma"])
    case "CosineAnnealingLR":
      return optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=config["T_max"],eta_min=config["eta_min"])
    case "CyclicLR":
      return optim.lr_scheduler.CyclicLR(optimizer,base_lr=config["base_lr"],max_lr=config["max_lr"],step_size_up=config["step_size_up"],mode=config["mode"],cycle_momentum=False)

def get_otimizer(parameters,config):
  match config["optimizer"]:
    case "Adam":
      return optim.Adam(parameters,lr=config["lr"],betas=(config["beta_1"],config["beta_2"]),eps=config["eps"],weight_decay=config["weight_decay"],amsgrad=config["amsgrad"])
    case "AdamW":
      return optim.AdamW(parameters,lr=config["lr"],weight_decay=config["weight_decay"],amsgrad=config["amsgrad"])
    case "SGD":
      return optim.SGD(parameters,lr=config["lr"],momentum=config["momentum"],weight_decay=config["weight_decay"],nesterov=config["nesterov"])
    case "RMSprop":
      return optim.RMSprop(parameters,lr=config["lr"],alpha=config["alpha"],eps=config["eps"],weight_decay=config["weight_decay"],momentum=config["momentum"],centered=config["centered"])
def apply_lr_scheduler(lr_scheduler,acc,config):
  match config["scheduler"]:
    case "ReduceLROnPlateau":
      lr_scheduler.step(acc)
    case None:
      pass
    case _:
      lr_scheduler.step()

def train_(config,model,dataloaders):
    print(config)
    device=get_device()
    epochs= config["epochs_classifcation_only"]
    if config["reg"]:
      epochs+=config["epochs_complete_problem"]
    optimizer_encoder = get_otimizer(model.parameters(),config)
    lr_scheduler=get_LRScheduler(optimizer_encoder,config,epochs)
    criterion = Total_loss()
    best_acc=-1
    nb_epochs_without_improvement=0
    for epoch in range(epochs):
      reg=epoch >= config["epochs_classifcation_only"]
      epoch_losses=[]
      model.train()
      i=0
      for dict_batch in dataloaders["train"]:
        optimizer_encoder.zero_grad()
        i+=1
        if i>=config["nb_batchs"]:
          break
        dict_batch=set_dic_to(dict_batch,device)
        with autocast(device_type=device.type):
          out=model(dict_batch, reg)
          loss=criterion(out,dict_batch, reg)
          if loss.isnan():
            print("loss is undifined")
            return -1
          loss.backward()

          optimizer_encoder.step()

        epoch_losses.append(loss.item())

        dict_batch.clear()
        out.clear()
        del out, loss, dict_batch
      epoch_loss=np.array(epoch_losses).mean()
      valid_acc = evaluate(model,dataloaders["valid"],device)
      apply_lr_scheduler(lr_scheduler,valid_acc,config)
      best_acc, nb_epochs_without_improvement = update_best_acc(model,valid_acc,best_acc,nb_epochs_without_improvement)
      if config["early_stopping"]< nb_epochs_without_improvement:
        return best_acc

      print("epoch: ", epoch, "loss : ", epoch_loss, "acc: ", valid_acc)
    return best_acc


def get_datasets():
    list_users,vocab=get_processed_data(src_directory_raw_data="drive/MyDrive/Shanghai-Telcome-Six-Months-DataSet",
                                      directory_raw_data='/content/dataset-telecom-6month',
                                      fixed_time_encoding=False,
                                      input_position=True,
                                      full_dataset=True,
                                      spliting_long_sequences=False,
                                      with_repeated_connections=False,
                                      max_sequence_length=100,
                                      min_sequence_size=2,
                                      save=False,
                                      path_to_save_dataset="/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3",
                                      download=False,
                                      load_dataset_path="/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3",)
    net=get_net(vocab)
    reproducibility_seed=get_reproducible_seeds()[0]
    dataset=VariableLengthDatasetWithPosID(list_users)
    generator = torch.Generator().manual_seed(reproducibility_seed)
    dataset_list=torch.utils.data.random_split(dataset,[0.8,0.1,0.1],generator)
    return dataset_list,net,len(vocab)+1

def get_dataloaders(datasets,batch_size):
    train_dataset=datasets[0]
    valid_dataset=datasets[1]
    train_dataloader=DataLoader(train_dataset,batch_size=batch_size,collate_fn=collate_fn_padd,shuffle=True)
    valid_dataloader=DataLoader(valid_dataset,batch_size=256,collate_fn=collate_fn_padd,shuffle=False)
    return {"train":train_dataloader,"valid":valid_dataloader}

def eval_config(config,data=None,net=None):
    device=get_device()
    config=f_unpack_dict(config)
    dataloaders=get_dataloaders(data,config["batch_size"])
    if config["use_gcn"]:
      model=get_model(config,net,device)
    else:
      model=get_model(config,None,device)
    best_acc = train_(config,model,dataloaders)
    return {"valid_accuracy": best_acc}

def run_xp(xp_name,storage_path,algo,num_samples=10, max_num_epochs=10, gpus_per_trial=1, test=True):
  os.environ["OMP_NUM_THREADS"] = '1'
  print(os.environ["OMP_NUM_THREADS"])
  datasets,net,nb_of_pos_ids=get_datasets()
  ray.shutdown()
  config_dict= {
        "max_len":100,
        "nb_of_pos_ids":nb_of_pos_ids,
        "batch_size":2**hp.uniformint("batch_size",4,7),
        "nb_batchs":12*hp.uniformint("nb_batchs",1,15),
        "early_stopping":5,
        "epochs_classifcation_only":50,
        "reg_choice":hp.choice("reg_choice",
                        [
                            {"reg":True,"epochs_complete_problem":hp.uniformint("epochs_complete_problem",0,50)},
                            {"reg":False},
                        ]),
        "learning_rate_scheduler_choice": hp.choice(
          "learning_rate_scheduler_choice",
          [
              {
                  "scheduler": "StepLR",
                  "step_size": hp.uniformint("step_size", 1, 30),
                  "gamma": hp.uniform("gamma_slr", 0, 0.99),
              },
              {
                  "scheduler": "ReduceLROnPlateau",
                  "factor": hp.uniform("factor", 0, 0.9),
                  "patience": hp.uniformint("patience", 1, 10),
                  "threshold": hp.loguniform("threshold",-12,-1),
                  "cooldown":hp.uniformint("cooldown",0,10)
              },
              {
                  "scheduler":"ExponentialLR",
                  "gamma":hp.uniform("gamma_elr", 0, 0.9),
              },
            {"scheduler": None}  # No scheduler
        ]
    ),
      "optimizer": "AdamW", "lr": hp.loguniform("lr", -15, -2),"weight_decay":hp.loguniform("weight_decay_adam",-21,-1),"amsgrad":hp.choice("amsgrad",[True,False]),
      "input_size":2,
      "d_model":24*hp.uniformint("d_model",1,34),
      "dropout":hp.uniform("dropout",0,0.5),
      "normalize_features_independantly":hp.choice("normalize_features_independantly",[True,False]),
      "normalize_features_globally":hp.choice("normalize_features_globally",[True,False]),
      "concatenate_features":hp.choice("concatenate_features",[True,False]),
      "use_gcn_choice":hp.choice("use_gcn_choice",
                        [
                            {"use_gcn":True,
                             "layer_type":hp.choice("layer_type",["GCNConv","GraphSAGE","GAT"]),
                             "num_layers_gcn":hp.uniformint("num_layers_gcn",1,10),
                             "activation_gcn": hp.choice("activation_gcn",
                              ['swish', 'ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']
                             ),
                             "norm": hp.choice("norm",
                                               ['BatchNorm', 'GraphNorm', 'LayerNorm', 'PairNorm', 'InstanceNorm']
                             ),
                             "dropout_gcn":hp.uniform("dropout_gcn",0,0.5),
                             "hidden_channels":2**hp.uniformint("hidden_channels",4,9)
                             },
                            {"use_gcn":False}
                        ]),
      "activation": hp.choice(
                "activation",
                 ['ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']),
      "positive_function":hp.choice("positive_function",["relu","exp","abs","sig"]),
      "transformers_model":True,
      "num_layers_transformer":hp.uniformint("num_layers_transformer",1,5),
      "encoder_only":hp.choice("encoder_only",[True,False]),
      "num_heads":3*2**hp.uniformint('num_heads', 0, 3),
      "learnable_pos_encoding": hp.choice("learnable_pos_encoding",[True,False]),
      "activation_transformers": hp.choice("activation_transformers",['ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']),
      "dropout_transformers":hp.uniform("dropout_transformers",0,0.5),

      "lstm_model_choice": hp.choice("lstm_model_choice",
                                     [{"lstm_model":True,
                                       "num_layers_lstm":hp.uniformint("num_layers_lstm",1,5),
                                       "lstm_layer_with_perceptron":
                                        hp.choice("lstm_layer_with_perceptron",
                                         [{"lstm_layer_with_perceptron":True,
                                           "activation_lstm":hp.choice("activation_lstm",['ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']),},
                                          {"lstm_layer_with_perceptron":False,
                                           "activation_lstm":None}]),
                                       "lstm_layer_with_layer_norm":hp.choice("lstm_layer_with_layer_norm",[True,False]),
                                       "dropout_lstm":hp.uniform("dropout_lstm",0,0.5),
                                       },
                                      {"lstm_model":False}])}
  if algo==None:
    algo = HyperOptSearch(space=config_dict, metric="valid_accuracy", mode="max", random_state_seed=get_reproducible_seeds()[0])
  trainable_with_gpu = tune.with_resources(eval_config, {"cpu": 2, "gpu": 1})
  tuner = tune.Tuner(
        tune.with_parameters(trainable_with_gpu,data=datasets,net=net),
        tune_config=tune.TuneConfig(
                                search_alg=algo,
                                max_concurrent_trials=1,
                                num_samples=1 if test else num_samples,
                                    ),
        run_config=train.RunConfig(
            name=xp_name,
            storage_path=storage_path,
            verbose=0)
    )
  # To enable GPUs, use this instead:
  results = tuner.fit()
  return results, algo


def save_config_xps_to_drive(xps_name,drive_path,xp_size,xps_number,accuracy_target,max_num_epochs):
  dic_config={
      "xps_name":xps_name,
      "xp_size":xp_size,
      "xps_number":xps_number,
      "current_xp": -1,
      "best_xp": {"idx":-1, "valid_accuracy": -1}
  }
  xps_path=os.path.join(drive_path,xps_name)
  xps_configs= os.path.join(xps_path,"xps_configs")
  os.makedirs(xps_path,exist_ok=True)
  if not os.path.exists(xps_configs):
    torch.save(dic_config,xps_configs)
  return xps_path,xps_configs


def update_config_dictionnary(xps_configs,best_results,num_xp):

  config_dic=torch.load(xps_configs)
  config_dic["current_xp"]=num_xp
  if config_dic["best_xp"]["valid_accuracy"]<best_results:
    config_dic["best_xp"]["valid_accuracy"]=best_results
    config_dic["best_xp"]["idx"]=num_xp
  torch.save(config_dic,xps_configs)



def update_and_save(xp_name,xps_path,xps_configs,storage_path,results,algo,num_xp,accuracy_target):
  best_results=results.get_best_result(metric='valid_accuracy',mode='max').metrics['valid_accuracy']
  accarucy_target_not_reached= best_results< accuracy_target
  update_config_dictionnary(xps_configs,best_results,num_xp)
  shutil.copytree(os.path.join(storage_path,xp_name),os.path.join(xps_path,xp_name),dirs_exist_ok=True)
  if num_xp>=1:
    shutil.rmtree(os.path.join(xps_path,"xp_num_"+str(num_xp-1)))
  shutil.rmtree(os.path.join(storage_path,xp_name))
  return accarucy_target_not_reached


def run_all_xp(xps_name="hyperparameter_tuning_projet_long", num_xp=0,algo=None, xp_size=10, xps_number=10, accuracy_target=0.98, max_num_epochs=30, storage_path='/content/',drive_path="/content/drive/MyDrive"):
    accarucy_target_not_reached=True
    num_xp=num_xp
    xps_path,xps_configs=save_config_xps_to_drive(xps_name,drive_path,xp_size,xps_number,accuracy_target,max_num_epochs)
    while num_xp<xps_number and accarucy_target_not_reached:
      xp_name= "xp_num_"+str(num_xp)
      results,algo=run_xp(xp_name,storage_path,algo,num_samples=xp_size, max_num_epochs=max_num_epochs, gpus_per_trial=1, test=False)
      accarucy_target_not_reached=update_and_save(xp_name,xps_path,xps_configs,storage_path,results,algo,num_xp,accuracy_target)
      num_xp+=1
    return,results,algo

In [None]:
import os

xps_path=os.path.join("/content/drive/MyDrive","hyperparameter_tuning_projet_long")
os.path.exists(xps_path)

True

# test hyperparameter tuning

In [None]:
# @title test tuning
from hyperopt import hp, pyll
datasets,net,nb_of_pos_ids=get_datasets()
space={
        "max_len":100,
        "nb_of_pos_ids":nb_of_pos_ids,
        "batch_size":2**hp.uniformint("batch_size",4,7),
        "nb_batchs":12*hp.uniformint("nb_batchs",1,16),
        "early_stopping":hp.uniformint("early_stopping",1,10),
        "epochs_classifcation_only":hp.uniformint("epochs_classifcation_only",1,80),
        "reg_choice":hp.choice("reg_choice",
                        [
                            {"reg":True,"epochs_complete_problem":hp.uniformint("epochs_complete_problem",0,50)},
                            {"reg":False},
                        ]),
        "learning_rate_scheduler_choice": hp.choice(
          "learning_rate_scheduler_choice",
          [
              {
                  "scheduler": "StepLR",
                  "step_size": hp.uniformint("step_size", 1, 30),
                  "gamma": hp.uniform("gamma_slr", 0, 0.99),
              },
              {
                  "scheduler": "ReduceLROnPlateau",
                  "factor": hp.uniform("factor", 0, 0.9),
                  "patience": hp.uniformint("patience", 1, 10),
                  "threshold": hp.loguniform("threshold",-12,-1),
                  "cooldown":hp.uniformint("cooldown",0,10)
              },
              {
                  "scheduler":"ExponentialLR",
                  "gamma":hp.uniform("gamma_elr", 0, 0.9),
              },
            {"scheduler": None}  # No scheduler
        ]
    ),
      "optimizer": hp.choice("optimizer",["Adam","AdamW"]), "lr": hp.loguniform("lr", -17, -2),"beta_1":hp.uniform("beta_1", 0.8, 1), "beta_2" : hp.uniform("beta_2", 0.95, 1),"eps": hp.loguniform("eps_adam", -20, -12),"weight_decay":hp.loguniform("weight_decay_adam",-20,-1),"amsgrad":hp.choice("amsgrad",[True,False]),
      "input_size":2,
      "d_model":24*hp.uniformint("d_model",1,60),
      "dropout":hp.uniform("dropout",0,1),
      "normalize_features_independantly":hp.choice("normalize_features_independantly",[True,False]),
      "normalize_features_globally":hp.choice("normalize_features_globally",[True,False]),
      "concatenate_features":hp.choice("concatenate_features",[True,False]),
      "use_gcn_choice":hp.choice("use_gcn_choice",
                        [
                            {"use_gcn":True,
                             "layer_type":hp.choice("layer_type",["GCNConv","GraphSAGE","GAT"]),
                             "num_layers_gcn":hp.uniformint("num_layers_gcn",1,10),
                             "activation_gcn": hp.choice("activation_gcn",
                              ['swish', 'ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']
                             ),
                             "norm": hp.choice("norm",
                                               ['BatchNorm', 'GraphNorm', 'LayerNorm', 'PairNorm', 'InstanceNorm']
                             ),
                             "dropout_gcn":hp.uniform("dropout_gcn",0,1),
                             "hidden_channels":2**hp.uniformint("hidden_channels",6,11)
                             },

                        ]),
      "activation": hp.choice(
                "activation",
                 ['ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']),
      "positive_function":hp.choice("positive_function",["relu","exp","abs","sig"]),
      "transformers_model":True,
      "num_layers_transformer":hp.uniformint("num_layers_transformer",1,6),
      "encoder_only":hp.choice("encoder_only",[True,False]),
      "num_heads":3*2**hp.uniformint('num_heads', 0, 3),
      "learnable_pos_encoding": hp.choice("learnable_pos_encoding",[True,False]),
      "activation_transformers": hp.choice("activation_transformers",['ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']),
      "dropout_transformers":hp.uniform("dropout_transformers",0,1),

      "lstm_model_choice": hp.choice("lstm_model_choice",
                                     [{"lstm_model":True,
                                       "num_layers_lstm":hp.uniformint("num_layers_lstm",1,6),
                                       "lstm_layer_with_perceptron":
                                        hp.choice("lstm_layer_with_perceptron",
                                         [{"lstm_layer_with_perceptron":True,
                                           "activation_lstm":hp.choice("activation_lstm",['ReLU6', 'PReLU', 'SELU', 'ELU', 'Mish', 'CELU', 'ReLU', 'Hardsigmoid', 'Tanh', 'LeakyReLU', 'Softplus', 'Hardshrink','Sigmoid', 'Hardtanh', 'SiLU', 'Tanhshrink', 'RReLU', 'Softshrink', 'Softsign', 'LogSigmoid', 'Softmin', 'GELU', 'Hardswish']),},
                                          {"lstm_layer_with_perceptron":False,
                                           "activation_lstm":None}]),
                                       "lstm_layer_with_layer_norm":hp.choice("lstm_layer_with_layer_norm",[True,False]),
                                       "dropout_lstm":hp.uniform("dropout_lstm",0,1),
                                       },
                                      {"lstm_model":False}])}
config=pyll.stochastic.sample(space)
#print(config)
eval_config(config,data=datasets,net=net)

In [None]:
run_xp('test',"/content/test",None,num_samples=1, max_num_epochs=1, gpus_per_trial=1, test=False)

In [None]:
run_all_xp(xps_name="hyperparameter_tuning_projet_long_with_reduced_search_space_and_layer_normalization", num_xp=0, algo=None, xp_size=10, xps_number=30, accuracy_target=0.98, max_num_epochs=None, storage_path='/content/tuning',drive_path="/content/drive/MyDrive")

1
loading already preprocessed data: 
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/vocab


  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
  data_dict[key] = torch.as_tensor(value)
2024-03-10 15:58:48,418	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 15:59:03,888	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 15:59:03,892	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_0        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_0
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_0`
[36m(eval_config pid=3581)[0m CUDA is available. Using GPU.
[36m(eval_config pid=3581)[0m GraphSAGE




[36m(eval_config pid=3581)[0m {'activation': 'CELU', 'activation_transformers': 'LeakyReLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 240, 'dropout': 0.3308533620831129, 'dropout_transformers': 0.10530742664412235, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 8, 'factor': 0.22615437911339334, 'patience': 4, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.15438065049384908, 'lr': 0.0034272191380994447, 'dropout_lstm': 0.351885955804556, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'GELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 24, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 3, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_gc



[36m(eval_config pid=3581)[0m {'activation': 'Hardtanh', 'activation_transformers': 'SELU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 552, 'dropout': 0.12898822103770274, 'dropout_transformers': 0.07317228017938249, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.2380489027418383, 'scheduler': 'StepLR', 'step_size': 17, 'lr': 0.002929487539519571, 'dropout_lstm': 0.04776205521636728, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'Softshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': True, 'num_heads': 6, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'abs', 'reg': False, 'transformers_model': True, 'activation_gcn': 'LogSigmoid', 'dropout_gcn': 0.47232527547133546,



[36m(eval_config pid=3581)[0m {'activation': 'SiLU', 'activation_transformers': 'Softsign', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': False, 'd_model': 792, 'dropout': 0.35107325126754746, 'dropout_transformers': 0.0702825014820036, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.7333551390392534, 'scheduler': 'StepLR', 'step_size': 22, 'lr': 0.0018692153030350851, 'dropout_lstm': 0.4884235046925847, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': True, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 34, 'reg': True, 'transformers_model': True, 'activation_gcn': 'Softplus', 'dropout_gcn': 



[36m(eval_config pid=3581)[0m epoch:  0 loss :  7.856042272261991 acc:  0.0010048455887278656
[36m(eval_config pid=3581)[0m epoch:  1 loss :  7.304978257827177 acc:  0.005091217649554518
[36m(eval_config pid=3581)[0m epoch:  2 loss :  7.269775288705607 acc:  0.003572784315476855
[36m(eval_config pid=3581)[0m epoch:  3 loss :  7.257794321948335 acc:  0.0035951142174485856
[36m(eval_config pid=3581)[0m epoch:  4 loss :  7.258833812393305 acc:  0.005560145590960856
[36m(eval_config pid=3581)[0m epoch:  5 loss :  7.2352835604252705 acc:  0.0061183931402541145
[36m(eval_config pid=3581)[0m epoch:  6 loss :  7.237831323201419 acc:  0.003572784315476855
[36m(eval_config pid=3581)[0m epoch:  7 loss :  7.224393695365381 acc:  0.0061183931402541145
[36m(eval_config pid=3581)[0m loss is undifined
[36m(eval_config pid=3581)[0m CUDA is available. Using GPU.
[36m(eval_config pid=3581)[0m {'activation': 'ReLU6', 'activation_transformers': 'Hardtanh', 'amsgrad': False, 'batch_siz

[36m(eval_config pid=3581)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=3581)[0m epoch:  0 loss :  8.091187409951653 acc:  0.0004465980394346069
[36m(eval_config pid=3581)[0m epoch:  1 loss :  8.08701518555762 acc:  0.0004465980394346069
[36m(eval_config pid=3581)[0m epoch:  2 loss :  8.068031284171091 acc:  0.0004689279414063372
[36m(eval_config pid=3581)[0m epoch:  3 loss :  8.084559836857755 acc:  0.0004912578433780676
[36m(eval_config pid=3581)[0m epoch:  4 loss :  8.083657472905978 acc:  0.0004912578433780676
[36m(eval_config pid=3581)[0m epoch:  5 loss :  8.068970707100881 acc:  0.000513587745349798
[36m(eval_config pid=3581)[0m epoch:  6 loss :  8.075658173628256 acc:  0.000513587745349798
[36m(eval_config pid=3581)[0m epoch:  7 loss :  8.06440319813473 acc:  0.0005359176473215282
[36m(eval_config pid=3581)[0m epoch:  8 loss :  8.084796999541807 acc:  0.0005359176473215282
[36m(eval_config pid=3581)[0m epoch:  9 loss :  8.079370915050237 acc:  0.0005359176473215282
[36m(eval_config pid=3581)[0m epoch:  10 l

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-10 16:24:28,464	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 16:24:43,587	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 16:24:43,589	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_1        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_1
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_1`
[36m(eval_config pid=10506)[0m CUDA is available. Using GPU.
[36m(eval_config pid=10506)[0m GAT




[36m(eval_config pid=10506)[0m {'activation': 'Softshrink', 'activation_transformers': 'LeakyReLU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 168, 'dropout': 0.01834855461248658, 'dropout_transformers': 0.09168808713381432, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8931843056885272, 'scheduler': 'ExponentialLR', 'lr': 0.02947138104242833, 'dropout_lstm': 0.3839730900124464, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'Softshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'abs', 'reg': False, 'transformers_model': True, 'activation_gcn': 'Hardshrink', 'dropout_gcn': 0.38965632820385276, '



[36m(eval_config pid=10506)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'GELU', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 504, 'dropout': 0.20452309705094546, 'dropout_transformers': 0.3225236367445611, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.7734896597011514, 'scheduler': 'StepLR', 'step_size': 3, 'lr': 0.00022473415354181788, 'dropout_lstm': 0.32969834865691566, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 48, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'relu', 'reg': False, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 2.5750105157114694e-05}
[36m(eval



[36m(eval_config pid=10506)[0m CUDA is available. Using GPU.
[36m(eval_config pid=10506)[0m GCNConv
[36m(eval_config pid=10506)[0m {'activation': 'Mish', 'activation_transformers': 'RReLU', 'amsgrad': False, 'batch_size': 64, 'concatenate_features': True, 'd_model': 216, 'dropout': 0.43759535220262225, 'dropout_transformers': 0.27297184335365693, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.3028036681167195, 'scheduler': 'StepLR', 'step_size': 27, 'lr': 5.155488148806924e-06, 'dropout_lstm': 0.24901146005644376, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 24, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': True, 'num_heads': 12, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'abs', 'reg': False,

[36m(eval_config pid=10506)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=10506)[0m epoch:  0 loss :  8.01722040376463 acc:  0.0008262063729540227
[36m(eval_config pid=10506)[0m epoch:  1 loss :  8.009111351066537 acc:  0.0012504745104168992
[36m(eval_config pid=10506)[0m epoch:  2 loss :  8.00702942001236 acc:  0.0012951343143603599
[36m(eval_config pid=10506)[0m epoch:  3 loss :  8.007453601677101 acc:  0.0013174642163320902
[36m(eval_config pid=10506)[0m epoch:  4 loss :  8.00717492537065 acc:  0.0013397941183038206
[36m(eval_config pid=10506)[0m epoch:  5 loss :  8.007135894748714 acc:  0.0013397941183038206
[36m(eval_config pid=10506)[0m epoch:  6 loss :  8.006846868074858 acc:  0.0013397941183038206
[36m(eval_config pid=10506)[0m epoch:  7 loss :  8.006878249295108 acc:  0.0013397941183038206
[36m(eval_config pid=10506)[0m epoch:  8 loss :  8.007093713000105 acc:  0.0013397941183038206
[36m(eval_config pid=10506)[0m epoch:  9 loss :  8.007102562830998 acc:  0.0013397941183038206
[36m(eval_config pid=10506)[0m 



[36m(eval_config pid=10506)[0m epoch:  0 loss :  7.416847728123175 acc:  0.005001898041667597
[36m(eval_config pid=10506)[0m epoch:  1 loss :  7.319793304550314 acc:  0.003438804903646473
[36m(eval_config pid=10506)[0m epoch:  2 loss :  7.3094570168824955 acc:  0.003438804903646473
[36m(eval_config pid=10506)[0m epoch:  3 loss :  7.304258449055324 acc:  0.005001898041667597
[36m(eval_config pid=10506)[0m epoch:  4 loss :  7.29687814623396 acc:  0.0011388250005582475
[36m(eval_config pid=10506)[0m epoch:  5 loss :  7.297992291851578 acc:  0.003438804903646473

1
loading already preprocessed data: 
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/vocab


  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-10 17:06:20,373	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 17:06:35,189	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 17:06:35,191	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_2        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_2
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_2`




[36m(eval_config pid=21340)[0m CUDA is available. Using GPU.
[36m(eval_config pid=21340)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'Softsign', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 48, 'dropout': 0.09506613328743097, 'dropout_transformers': 0.32149113081430036, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.6485194651290492, 'scheduler': 'StepLR', 'step_size': 2, 'lr': 0.0002617397335834688, 'dropout_lstm': 0.48302290230585493, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 48, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 1, 'reg': True, 'tr



[36m(eval_config pid=21340)[0m {'activation': 'Hardshrink', 'activation_transformers': 'GELU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 672, 'dropout': 0.4919623830688768, 'dropout_transformers': 0.02034306998237919, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.9860525136032905, 'scheduler': 'StepLR', 'step_size': 23, 'lr': 0.00012358054116552142, 'dropout_lstm': 0.4949008960616998, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 36, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 1.636



[36m(eval_config pid=21340)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Hardshrink', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 672, 'dropout': 0.47980410140855057, 'dropout_transformers': 0.0023039197429331575, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.9310843233951526, 'scheduler': 'StepLR', 'step_size': 25, 'lr': 5.105097948135802e-05, 'dropout_lstm': 0.4995154709818789, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 38, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay



[36m(eval_config pid=21340)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Hardswish', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 672, 'dropout': 0.4943367716741284, 'dropout_transformers': 0.014475647249882387, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 1, 'factor': 0.02196817428216241, 'patience': 10, 'scheduler': 'ReduceLROnPlateau', 'threshold': 9.41660336346162e-06, 'lr': 4.41197896125534e-05, 'dropout_lstm': 0.4938554968480045, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 40, 'reg': True, 



[36m(eval_config pid=21340)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Hardshrink', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 600, 'dropout': 0.47875861007010395, 'dropout_transformers': 0.001056604010270197, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.9892408182937804, 'scheduler': 'StepLR', 'step_size': 30, 'lr': 6.162707580225076e-05, 'dropout_lstm': 0.3982964302529492, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 26, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay'



[36m(eval_config pid=21340)[0m {'activation': 'Tanh', 'activation_transformers': 'Mish', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 576, 'dropout': 0.4774059345215137, 'dropout_transformers': 0.031348393792711995, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.9887138711964137, 'scheduler': 'StepLR', 'step_size': 21, 'lr': 9.939420944917358e-07, 'dropout_lstm': 0.4145860362751082, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 24, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 9.42163982



[36m(eval_config pid=21340)[0m {'activation': 'PReLU', 'activation_transformers': 'Softplus', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 744, 'dropout': 0.2878184730369306, 'dropout_transformers': 0.040858527434671194, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 1, 'factor': 0.8200856692298591, 'patience': 2, 'scheduler': 'ReduceLROnPlateau', 'threshold': 2.3740073111010183e-05, 'lr': 1.3394809372498067e-05, 'dropout_lstm': 0.4113590082702857, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 15, 'reg': True, 'tra



[36m(eval_config pid=21340)[0m {'activation': 'ELU', 'activation_transformers': 'SiLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 600, 'dropout': 0.4625860131892625, 'dropout_transformers': 0.14115466591035342, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.9787567498808057, 'scheduler': 'StepLR', 'step_size': 28, 'lr': 0.00010617509843213127, 'dropout_lstm': 0.43186679821727514, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 26, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 8.8556766



[36m(eval_config pid=21340)[0m {'activation': 'ELU', 'activation_transformers': 'SiLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 480, 'dropout': 0.31452836904348236, 'dropout_transformers': 0.13470346099768687, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 4, 'factor': 0.49121603560687493, 'patience': 7, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.00045583150797639247, 'lr': 0.00014312359413428769, 'dropout_lstm': 0.3035380004777843, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 43, 'reg': True, 'transfo



[36m(eval_config pid=21340)[0m {'activation': 'RReLU', 'activation_transformers': 'Sigmoid', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 672, 'dropout': 0.4499734506126374, 'dropout_transformers': 0.13137301920984235, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.4168773933005001, 'scheduler': 'StepLR', 'step_size': 30, 'lr': 1.2851115021366461e-05, 'dropout_lstm': 0.4472187317194874, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 30, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 6.990

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-10 18:31:40,375	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 18:31:54,547	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 18:31:54,549	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_3        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_3
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_3`
[36m(eval_config pid=42630)[0m CUDA is available. Using GPU.




[36m(eval_config pid=42630)[0m {'activation': 'GELU', 'activation_transformers': 'Softmin', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 816, 'dropout': 0.3438531616179985, 'dropout_transformers': 0.14165092081334543, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'lr': 0.0004420097344922277, 'dropout_lstm': 0.13136606724099803, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 19, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 4.231115356342549e-06}
[36m(eval_config pid=42630)[0m 

[36m(eval_config pid=42630)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=42630)[0m epoch:  0 loss :  7.533016605911968 acc:  0.026170645110867963
[36m(eval_config pid=42630)[0m epoch:  1 loss :  6.65168462289828 acc:  0.12477949221802916
[36m(eval_config pid=42630)[0m epoch:  2 loss :  5.345523395270945 acc:  0.23254359913359982
[36m(eval_config pid=42630)[0m epoch:  3 loss :  4.391495143141702 acc:  0.26728892660161224
[36m(eval_config pid=42630)[0m epoch:  4 loss :  3.85149136881962 acc:  0.3090011834848045
[36m(eval_config pid=42630)[0m epoch:  5 loss :  3.5210009089140133 acc:  0.3290087756514749
[36m(eval_config pid=42630)[0m epoch:  6 loss :  3.2909327221808033 acc:  0.3435902016390148
[36m(eval_config pid=42630)[0m epoch:  7 loss :  3.0009961083670644 acc:  0.35428622468347365
[36m(eval_config pid=42630)[0m epoch:  8 loss :  2.981705583144571 acc:  0.3683094031217203
[36m(eval_config pid=42630)[0m epoch:  9 loss :  2.9166000245887544 acc:  0.37538798204675883
[36m(eval_config pid=42630)[0m epoch:  10 loss : 



[36m(eval_config pid=42630)[0m {'activation': 'ELU', 'activation_transformers': 'ELU', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 528, 'dropout': 0.3051023074058299, 'dropout_transformers': 0.27717582117318007, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.5152221102552028, 'scheduler': 'StepLR', 'step_size': 20, 'lr': 4.8629247907774085e-06, 'dropout_lstm': 0.4456332286951031, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 28, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 4.013053419



[36m(eval_config pid=42630)[0m {'activation': 'CELU', 'activation_transformers': 'PReLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 624, 'dropout': 0.4948613522047571, 'dropout_transformers': 0.047406444433991904, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.9715754455123032, 'scheduler': 'StepLR', 'step_size': 26, 'lr': 0.00010561099548622995, 'dropout_lstm': 0.18277793578830526, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 10, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 2.353230



[36m(eval_config pid=42630)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'SiLU', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 480, 'dropout': 0.06724681533227633, 'dropout_transformers': 0.1208493852023012, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'lr': 6.6332456692384125e-06, 'dropout_lstm': 0.3249174116888139, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 23, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 2.2047127209201305e-08}
[36m(eval_config pid=42630)[



[36m(eval_config pid=42630)[0m {'activation': 'Softsign', 'activation_transformers': 'LogSigmoid', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 720, 'dropout': 0.3685605833636331, 'dropout_transformers': 0.25674047210316275, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.0433654566444151, 'scheduler': 'StepLR', 'step_size': 24, 'lr': 0.12918973204548329, 'dropout_lstm': 0.28474731249573326, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 37, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.0



[36m(eval_config pid=42630)[0m {'activation': 'Hardswish', 'activation_transformers': 'ReLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 552, 'dropout': 0.45335554038458487, 'dropout_transformers': 0.1550292893382258, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 5, 'factor': 0.0001703930088358674, 'patience': 7, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.0018072287515277542, 'lr': 2.3140061076331447e-05, 'dropout_lstm': 0.4487368858733206, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 43, 'reg': True, 'tr



[36m(eval_config pid=42630)[0m epoch:  0 loss :  7.718835362764162 acc:  0.005269856865328361
[36m(eval_config pid=42630)[0m epoch:  1 loss :  7.346097478242678 acc:  0.00649800147377353
[36m(eval_config pid=42630)[0m epoch:  2 loss :  7.121283780748599 acc:  0.044324855413884735
[36m(eval_config pid=42630)[0m epoch:  3 loss :  6.8225647088523225 acc:  0.08011968827456847
[36m(eval_config pid=42630)[0m epoch:  4 loss :  6.434593075903777 acc:  0.10890293191612889
[36m(eval_config pid=42630)[0m epoch:  5 loss :  6.056678593715775 acc:  0.142018176540205
[36m(eval_config pid=42630)[0m epoch:  6 loss :  5.496078397626075 acc:  0.17370430743809034
[36m(eval_config pid=42630)[0m epoch:  7 loss :  5.1609861694763755 acc:  0.1989817564700891
[36m(eval_config pid=42630)[0m epoch:  8 loss :  4.872577281755822 acc:  0.23133778442712635
[36m(eval_config pid=42630)[0m epoch:  9 loss :  4.580361831968076 acc:  0.25114440747605116
[36m(eval_config pid=42630)[0m epoch:  10 loss 



[36m(eval_config pid=42630)[0m {'activation': 'Hardtanh', 'activation_transformers': 'ReLU6', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': True, 'd_model': 792, 'dropout': 0.4236397583190712, 'dropout_transformers': 0.1106874617714919, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'lr': 0.0030331599014896678, 'dropout_lstm': 0.36807632135382384, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 20, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 7.192611057108053e-06}
[36m(eval_config pid=42630)[0m CU



[36m(eval_config pid=42630)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 456, 'dropout': 0.2658760496573009, 'dropout_transformers': 0.24292985463551653, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6547226603222032, 'scheduler': 'StepLR', 'step_size': 17, 'lr': 0.001187794341727599, 'dropout_lstm': 0.448211321629359, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 42, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 5.8185439



[36m(eval_config pid=42630)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 360, 'dropout': 0.26116961672473477, 'dropout_transformers': 0.3671638101350774, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.576195809198437, 'scheduler': 'StepLR', 'step_size': 8, 'lr': 0.0012538223841012035, 'dropout_lstm': 0.2762220113891003, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'Hardtanh', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 10, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 7.59

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-10 19:51:35,035	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 19:51:49,483	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 19:51:49,484	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_4        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_4
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_4`
[36m(eval_config pid=62604)[0m CUDA is available. Using GPU.




[36m(eval_config pid=62604)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 432, 'dropout': 0.11755847073077369, 'dropout_transformers': 0.3057191508007018, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.3846357023516324, 'scheduler': 'StepLR', 'step_size': 17, 'lr': 0.005275982960861348, 'dropout_lstm': 0.35143685329930685, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 49, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.00051



[36m(eval_config pid=62604)[0m epoch:  0 loss :  7.40297600530809 acc:  0.006051403434338924
[36m(eval_config pid=62604)[0m epoch:  1 loss :  7.028814131213773 acc:  0.01038340441685461
[36m(eval_config pid=62604)[0m epoch:  2 loss :  6.865513976927726 acc:  0.009445548534041935
[36m(eval_config pid=62604)[0m epoch:  3 loss :  6.814328393628521 acc:  0.009445548534041935
[36m(eval_config pid=62604)[0m epoch:  4 loss :  6.7755461692810055 acc:  0.010807672554317487
[36m(eval_config pid=62604)[0m epoch:  5 loss :  7.340303950155935 acc:  0.0012951343143603599
[36m(eval_config pid=62604)[0m epoch:  6 loss :  7.352220147655856 acc:  0.0023223098050599556
[36m(eval_config pid=62604)[0m epoch:  7 loss :  7.355152216265278 acc:  0.005001898041667597
[36m(eval_config pid=62604)[0m epoch:  8 loss :  7.357634332103114 acc:  0.003438804903646473
[36m(eval_config pid=62604)[0m epoch:  9 loss :  7.354564660595309 acc:  0.0028135676484380232
[36m(eval_config pid=62604)[0m CUDA 



[36m(eval_config pid=62604)[0m {'activation': 'Tanhshrink', 'activation_transformers': 'SELU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 456, 'dropout': 0.2135259807810757, 'dropout_transformers': 0.4003117953103801, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.7413029520849147, 'scheduler': 'ExponentialLR', 'lr': 0.0008268571870904207, 'dropout_lstm': 0.20109631189942853, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'abs', 'epochs_complete_problem': 29, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.0163457029252



[36m(eval_config pid=62604)[0m {'activation': 'ReLU6', 'activation_transformers': 'Softshrink', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 528, 'dropout': 0.16076125580011255, 'dropout_transformers': 0.16388874148420207, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.7296125901052692, 'scheduler': 'StepLR', 'step_size': 6, 'lr': 0.0004817049099515965, 'dropout_lstm': 0.07727071494945559, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'LogSigmoid', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 15, 'reg': True, 'transformers_model': True, 'activation_gcn': 'SiLU', 'd



[36m(eval_config pid=62604)[0m {'activation': 'Mish', 'activation_transformers': 'CELU', 'amsgrad': False, 'batch_size': 16, 'concatenate_features': False, 'd_model': 624, 'dropout': 0.35770031028184623, 'dropout_transformers': 0.48513433264790473, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6809469080431109, 'scheduler': 'StepLR', 'step_size': 10, 'lr': 0.02079692342505266, 'dropout_lstm': 0.46879329030674655, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'abs', 'epochs_complete_problem': 46, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 3.478495928



[36m(eval_config pid=62604)[0m {'activation': 'Softmin', 'activation_transformers': 'LogSigmoid', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 720, 'dropout': 0.2671760123766716, 'dropout_transformers': 0.19032810329621433, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.496851166761099, 'scheduler': 'StepLR', 'step_size': 16, 'lr': 0.001885332967648025, 'dropout_lstm': 0.3592150257979735, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'ReLU6', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 6, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 5.7

[36m(eval_config pid=62604)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=62604)[0m epoch:  0 loss :  7.948126430971077 acc:  0.0023223098050599556
[36m(eval_config pid=62604)[0m epoch:  1 loss :  7.788612268057214 acc:  0.004354330884487417
[36m(eval_config pid=62604)[0m epoch:  2 loss :  7.667368136256574 acc:  0.005001898041667597
[36m(eval_config pid=62604)[0m epoch:  3 loss :  7.58743409076369 acc:  0.005001898041667597
[36m(eval_config pid=62604)[0m epoch:  4 loss :  7.503788861883692 acc:  0.005001898041667597
[36m(eval_config pid=62604)[0m epoch:  5 loss :  7.459180780203946 acc:  0.005001898041667597
[36m(eval_config pid=62604)[0m epoch:  6 loss :  7.410375106765564 acc:  0.005001898041667597
[36m(eval_config pid=62604)[0m epoch:  7 loss :  7.380524020597159 acc:  0.003438804903646473
[36m(eval_config pid=62604)[0m CUDA is available. Using GPU.
[36m(eval_config pid=62604)[0m {'activation': 'Hardsigmoid', 'activation_transformers': 'ReLU6', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': False, 'd

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-10 20:44:22,127	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 20:44:36,328	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 20:44:36,330	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_5        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_5
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_5`
[36m(eval_config pid=75976)[0m CUDA is available. Using GPU.
[36m(eval_config pid=75976)[0m GAT




[36m(eval_config pid=75976)[0m {'activation': 'ReLU', 'activation_transformers': 'SiLU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': True, 'd_model': 456, 'dropout': 0.07266906133920159, 'dropout_transformers': 0.3904479455693327, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6907463133520679, 'scheduler': 'ExponentialLR', 'lr': 9.512721851361545e-07, 'dropout_lstm': 0.3293871038514281, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 35, 'reg': True, 'transformers_model': True, 'activation_gcn': 'Hardsigmoid', 'dropout_gcn': 0.485178



[36m(eval_config pid=75976)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'ELU', 'amsgrad': False, 'batch_size': 16, 'concatenate_features': False, 'd_model': 504, 'dropout': 0.39678315228417826, 'dropout_transformers': 0.263568563890213, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.5673346444820138, 'scheduler': 'StepLR', 'step_size': 7, 'lr': 0.0001926940701022216, 'dropout_lstm': 0.38198661215791296, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'ELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 36, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 6, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'abs', 'reg': False, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.004740523607723396}
[36m(eval_conf

[36m(eval_config pid=75976)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=75976)[0m loss is undifined
[36m(eval_config pid=75976)[0m CUDA is available. Using GPU.




[36m(eval_config pid=75976)[0m {'activation': 'PReLU', 'activation_transformers': 'Hardswish', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': True, 'd_model': 360, 'dropout': 0.0013605650986124318, 'dropout_transformers': 0.18204438676651752, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.8971030315371361, 'scheduler': 'StepLR', 'step_size': 19, 'lr': 7.680677314027735e-05, 'dropout_lstm': 0.4246886902953353, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 39, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 2.2



[36m(eval_config pid=75976)[0m {'activation': 'Tanh', 'activation_transformers': 'Mish', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': False, 'd_model': 648, 'dropout': 0.22450431868258813, 'dropout_transformers': 0.2773618759806947, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.2758069608728557, 'scheduler': 'StepLR', 'step_size': 28, 'lr': 0.0566058506719609, 'dropout_lstm': 0.1465377556397805, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 3, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.000238583408865439}
[36m(eval_config pid



[36m(eval_config pid=75976)[0m loss is undifined
[36m(eval_config pid=75976)[0m CUDA is available. Using GPU.
[36m(eval_config pid=75976)[0m {'activation': 'GELU', 'activation_transformers': 'Tanhshrink', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 432, 'dropout': 0.038439441106465994, 'dropout_transformers': 0.0752189495556401, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.39880987944432933, 'scheduler': 'StepLR', 'step_size': 15, 'lr': 2.6339252400357853e-05, 'dropout_lstm': 0.2951617995835757, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 's



[36m(eval_config pid=75976)[0m epoch:  0 loss :  7.961953834748604 acc:  0.002858227452381484
[36m(eval_config pid=75976)[0m epoch:  1 loss :  7.620114514525508 acc:  0.008574682357144451
[36m(eval_config pid=75976)[0m epoch:  2 loss :  7.253664419684611 acc:  0.03597347207645758
[36m(eval_config pid=75976)[0m epoch:  3 loss :  6.931489931025975 acc:  0.07076345934841347
[36m(eval_config pid=75976)[0m epoch:  4 loss :  6.5600646314486655 acc:  0.10347676573699842
[36m(eval_config pid=75976)[0m epoch:  5 loss :  6.235202144569074 acc:  0.13460464908559053
[36m(eval_config pid=75976)[0m epoch:  6 loss :  5.922251708070997 acc:  0.1544782618404305
[36m(eval_config pid=75976)[0m epoch:  7 loss :  5.7219705447344715 acc:  0.1755800192037157
[36m(eval_config pid=75976)[0m epoch:  8 loss :  5.4788090880488 acc:  0.19319831185941094
[36m(eval_config pid=75976)[0m epoch:  9 loss :  5.315964389854754 acc:  0.2103923363776433
[36m(eval_config pid=75976)[0m epoch:  10 loss : 



[36m(eval_config pid=75976)[0m {'activation': 'CELU', 'activation_transformers': 'ReLU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 576, 'dropout': 0.2732600885089167, 'dropout_transformers': 0.15233159194610477, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 6, 'factor': 0.8941761721781163, 'patience': 8, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.006517438431592051, 'lr': 0.0005688977478552451, 'dropout_lstm': 0.2678404530925982, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_gcn':

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-10 21:54:48,353	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 21:55:02,634	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 21:55:02,636	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_6        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_6
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_6`
[36m(eval_config pid=93639)[0m CUDA is available. Using GPU.
[36m(eval_config pid=93639)[0m GraphSAGE




[36m(eval_config pid=93639)[0m {'activation': 'CELU', 'activation_transformers': 'ReLU', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 768, 'dropout': 0.20204198964704126, 'dropout_transformers': 0.3449150229115517, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 2, 'factor': 0.6595196235441989, 'patience': 9, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.020116107551875935, 'lr': 0.0010911025374218794, 'dropout_lstm': 0.08068125592597508, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'Hardshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': True, 'num_heads': 12, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activati



[36m(eval_config pid=93639)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'RReLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 528, 'dropout': 0.17412175297489957, 'dropout_transformers': 0.19838064135090316, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 9, 'factor': 0.26224131909955195, 'patience': 5, 'scheduler': 'ReduceLROnPlateau', 'threshold': 5.319753960742555e-05, 'lr': 0.004548158862865442, 'dropout_lstm': 0.22550709338547384, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 12, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activa



[36m(eval_config pid=93639)[0m {'activation': 'CELU', 'activation_transformers': 'PReLU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 696, 'dropout': 0.3206722790585691, 'dropout_transformers': 0.22226395207853425, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 7, 'factor': 0.3571618992257808, 'patience': 6, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.0036004120305573257, 'lr': 0.00027612290356720436, 'dropout_lstm': 0.17228845793452424, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 24, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 3, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_g



[36m(eval_config pid=93639)[0m {'activation': 'Hardswish', 'activation_transformers': 'LeakyReLU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 576, 'dropout': 0.3044560022875151, 'dropout_transformers': 0.09960567806785092, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 3, 'factor': 0.6141674018123261, 'patience': 3, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.11479720568384098, 'lr': 0.01164008748957068, 'dropout_lstm': 0.021401173048457706, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 6, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'abs', 'reg': False, 'transformers_model': True, 'activatio



[36m(eval_config pid=93639)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Softshrink', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 552, 'dropout': 0.2829696194730347, 'dropout_transformers': 0.15129776963597713, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 10, 'factor': 0.41015244313608307, 'patience': 6, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.08307402570199798, 'lr': 0.00015856161129523333, 'dropout_lstm': 0.393105242715615, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activa



[36m(eval_config pid=93639)[0m {'activation': 'Hardtanh', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 600, 'dropout': 0.27465477203148375, 'dropout_transformers': 0.1680128657423158, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 3, 'factor': 0.7696770693885421, 'patience': 8, 'scheduler': 'ReduceLROnPlateau', 'threshold': 6.29713406492609e-06, 'lr': 9.458291266836354e-05, 'dropout_lstm': 0.26581508315419833, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_



[36m(eval_config pid=93639)[0m {'activation': 'ELU', 'activation_transformers': 'SELU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 600, 'dropout': 0.3544267159284486, 'dropout_transformers': 0.0608174733685599, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.18517139700046586, 'scheduler': 'StepLR', 'step_size': 5, 'lr': 0.0004164137802881084, 'dropout_lstm': 0.3145092864232036, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_gcn': 'ELU', 'dropout_gcn': 0.37646887009333274, 'hidden_channels'



[36m(eval_config pid=93639)[0m {'activation': 'Softshrink', 'activation_transformers': 'Hardshrink', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 648, 'dropout': 0.41781825072781487, 'dropout_transformers': 0.11086795068186289, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.6995526000139187, 'scheduler': 'StepLR', 'step_size': 9, 'lr': 3.0191680630530223e-06, 'dropout_lstm': 0.4642991032056453, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 36, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_gcn': 'SELU', 'dropout_gcn': 0.22548489430575608, 'h



[36m(eval_config pid=93639)[0m {'activation': 'SELU', 'activation_transformers': 'Hardtanh', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 456, 'dropout': 0.24281122261877675, 'dropout_transformers': 0.12365117691740052, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8917593344250823, 'scheduler': 'ExponentialLR', 'lr': 0.0015377549761848856, 'dropout_lstm': 0.24689690900676325, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 27, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 3.4610806714958

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-10 23:46:00,400	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-10 23:46:14,549	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-10 23:46:14,550	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_7        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_7
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_7`
[36m(eval_config pid=121239)[0m CUDA is available. Using GPU.




[36m(eval_config pid=121239)[0m {'activation': 'SELU', 'activation_transformers': 'Hardtanh', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 456, 'dropout': 0.22585919485981243, 'dropout_transformers': 0.12466197208910665, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8879154786357151, 'scheduler': 'ExponentialLR', 'lr': 0.001526703057546723, 'dropout_lstm': 0.25365793514655316, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.00015029778450067682}
[36m(eval_config pid=



[36m(eval_config pid=121239)[0m {'activation': 'SELU', 'activation_transformers': 'Hardtanh', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 504, 'dropout': 0.24776133367731995, 'dropout_transformers': 0.2105592530141508, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.6912536642488581, 'scheduler': 'ExponentialLR', 'lr': 0.0025742074952065158, 'dropout_lstm': 0.19913593856792824, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 35, 'reg': True, 'transformers_model': True, 'activation_gcn': 'LogSigmoid', 'dropout_gcn': 0.



[36m(eval_config pid=121239)[0m {'activation': 'Mish', 'activation_transformers': 'Hardtanh', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 384, 'dropout': 0.18099421942942612, 'dropout_transformers': 0.17991263697501522, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8156713419571193, 'scheduler': 'ExponentialLR', 'lr': 0.005916960712104591, 'dropout_lstm': 0.10010231561909952, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 40, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.00029210179103



[36m(eval_config pid=121239)[0m {'activation': 'Tanhshrink', 'activation_transformers': 'Hardsigmoid', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 336, 'dropout': 0.19838711781432616, 'dropout_transformers': 0.28916877785087963, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.5730024342581277, 'scheduler': 'ExponentialLR', 'lr': 0.0007112725952758572, 'dropout_lstm': 0.2291817531635105, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 47, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.0010



[36m(eval_config pid=121239)[0m {'activation': 'SELU', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 408, 'dropout': 0.29938404026768234, 'dropout_transformers': 0.2556445630415152, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.35011231217735594, 'scheduler': 'ExponentialLR', 'lr': 0.01627940131047381, 'dropout_lstm': 0.1592649143437635, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 48, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_gcn': 'Softplus', 'dropout_gcn': 0.49931270154902485, 'hidden_channels': 6



[36m(eval_config pid=121239)[0m {'activation': 'Softmin', 'activation_transformers': 'ReLU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 552, 'dropout': 0.1470030865119585, 'dropout_transformers': 0.025262161576319137, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.5788761022609024, 'scheduler': 'ExponentialLR', 'lr': 0.0026930668395546936, 'dropout_lstm': 0.20482727024980574, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 3, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 1.1094080402224

[36m(eval_config pid=121239)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=121239)[0m epoch:  0 loss :  7.928429778193085 acc:  0.0012951343143603599
[36m(eval_config pid=121239)[0m epoch:  1 loss :  7.765673234429158 acc:  0.0019650313735122705
[36m(eval_config pid=121239)[0m epoch:  2 loss :  7.680766401156573 acc:  0.005001898041667597
[36m(eval_config pid=121239)[0m epoch:  3 loss :  7.642520340395645 acc:  0.005001898041667597
[36m(eval_config pid=121239)[0m epoch:  4 loss :  7.62420894730259 acc:  0.005001898041667597
[36m(eval_config pid=121239)[0m epoch:  5 loss :  7.615174750207176 acc:  0.005001898041667597
[36m(eval_config pid=121239)[0m epoch:  6 loss :  7.611563548235826 acc:  0.005001898041667597
[36m(eval_config pid=121239)[0m epoch:  7 loss :  7.604552866707386 acc:  0.005001898041667597
[36m(eval_config pid=121239)[0m CUDA is available. Using GPU.




[36m(eval_config pid=121239)[0m {'activation': 'Softplus', 'activation_transformers': 'GELU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 432, 'dropout': 0.2215496096511651, 'dropout_transformers': 0.15277980124902346, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.3691067560545723, 'scheduler': 'ExponentialLR', 'lr': 0.0008714400035539075, 'dropout_lstm': 0.3408175468019146, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 28, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.005413808816502



[36m(eval_config pid=121239)[0m {'activation': 'ReLU6', 'activation_transformers': 'CELU', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 288, 'dropout': 0.2584418436329703, 'dropout_transformers': 0.2454966060176655, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'lr': 3.390403057771875e-05, 'dropout_lstm': 0.2502401610492202, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 3.0641641809573105e-05}
[36m(eval_config pid=121239)[0m CUDA is available. Using GPU.
[3



[36m(eval_config pid=121239)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Softsign', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 504, 'dropout': 0.3183435737220369, 'dropout_transformers': 0.049738070988202904, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.7821206822083046, 'scheduler': 'ExponentialLR', 'lr': 0.007549887189556344, 'dropout_lstm': 0.12247684025378468, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 24, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 32, 'reg': True, 'transformers_model': True, 'activation_gcn': 'Hardtanh', 'dropout_gcn': 0



[36m(eval_config pid=121239)[0m epoch:  0 loss :  7.771523611169112 acc:  0.0033941450997030122
[36m(eval_config pid=121239)[0m epoch:  1 loss :  7.415456445593583 acc:  0.003438804903646473
[36m(eval_config pid=121239)[0m epoch:  2 loss :  7.420500529439826 acc:  0.003438804903646473
[36m(eval_config pid=121239)[0m epoch:  3 loss :  7.320869641554983 acc:  0.003438804903646473
[36m(eval_config pid=121239)[0m epoch:  4 loss :  7.270996058614631 acc:  0.003438804903646473
[36m(eval_config pid=121239)[0m epoch:  5 loss :  7.259149947919344 acc:  0.003438804903646473
[36m(eval_config pid=121239)[0m epoch:  6 loss :  7.249860100997122 acc:  0.003438804903646473

1
loading already preprocessed data: 
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/vocab


  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 00:44:22,213	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 00:44:36,046	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 00:44:36,048	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_8        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_8
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_8`
[36m(eval_config pid=135969)[0m CUDA is available. Using GPU.




[36m(eval_config pid=135969)[0m {'activation': 'Hardsigmoid', 'activation_transformers': 'Hardtanh', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 696, 'dropout': 0.2083994655886317, 'dropout_transformers': 0.2216541258787829, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.8873298374231895, 'scheduler': 'ExponentialLR', 'lr': 0.000581406783041363, 'dropout_lstm': 0.3131458507885086, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'abs', 'epochs_complete_problem': 22, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.0003059



[36m(eval_config pid=135969)[0m {'activation': 'SiLU', 'activation_transformers': 'Sigmoid', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 648, 'dropout': 0.2762281162612265, 'dropout_transformers': 0.007890055936593476, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'lr': 0.0002438642006944799, 'dropout_lstm': 0.21269774486388573, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 36, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 41, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 1.212848278087511e-06}
[36m(eval_config pid=135969)[0



[36m(eval_config pid=135969)[0m {'activation': 'PReLU', 'activation_transformers': 'Softplus', 'amsgrad': False, 'batch_size': 64, 'concatenate_features': True, 'd_model': 456, 'dropout': 0.3355812817762327, 'dropout_transformers': 0.13487569043590217, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6427774676784119, 'scheduler': 'ExponentialLR', 'lr': 6.782136900125635e-05, 'dropout_lstm': 0.1864833911294741, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 37, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 3.898752818867



[36m(eval_config pid=135969)[0m {'activation': 'Tanh', 'activation_transformers': 'LogSigmoid', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 528, 'dropout': 0.16635300480932494, 'dropout_transformers': 0.19523446093467753, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 8, 'factor': 0.8982314784940164, 'patience': 9, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.0007466183185447512, 'lr': 0.11177604324524067, 'dropout_lstm': 0.03700277171787508, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 48, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'sig', 'reg': False, 'transformers_model': True, 'activa



[36m(eval_config pid=135969)[0m epoch:  0 loss :  7.766161037703692 acc:  0.003103856374070518
[36m(eval_config pid=135969)[0m epoch:  1 loss :  7.464499562473621 acc:  0.0008262063729540227
[36m(eval_config pid=135969)[0m epoch:  2 loss :  7.339039228730282 acc:  0.003438804903646473
[36m(eval_config pid=135969)[0m epoch:  3 loss :  7.2826818611662265 acc:  0.004912578433780675
[36m(eval_config pid=135969)[0m epoch:  4 loss :  7.201336125196037 acc:  0.001563093138021124
[36m(eval_config pid=135969)[0m epoch:  5 loss :  7.1669599565409 acc:  0.0037290936292789676
[36m(eval_config pid=135969)[0m epoch:  6 loss :  7.07232423556053 acc:  0.004265011276600496
[36m(eval_config pid=135969)[0m epoch:  7 loss :  6.986328391705529 acc:  0.009981466181363464
[36m(eval_config pid=135969)[0m epoch:  8 loss :  6.949727292788231 acc:  0.007301877944755822
[36m(eval_config pid=135969)[0m epoch:  9 loss :  6.830064765477585 acc:  0.008373713239398879
[36m(eval_config pid=135969)



[36m(eval_config pid=135969)[0m {'activation': 'SELU', 'activation_transformers': 'ReLU6', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': True, 'd_model': 480, 'dropout': 0.19149029316682123, 'dropout_transformers': 0.3255643791746741, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 9, 'factor': 0.54784651753612, 'patience': 3, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.044670491828729474, 'lr': 0.059837833155197695, 'dropout_lstm': 0.40669387162097065, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': True, 'num_heads': 12, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_gcn': 

[36m(eval_config pid=135969)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=135969)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Softmin', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': False, 'd_model': 408, 'dropout': 0.3104455169651287, 'dropout_transformers': 0.3158706585885783, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.1020920265548837, 'scheduler': 'ExponentialLR', 'lr': 0.0019787782761147665, 'dropout_lstm': 0.2848152670448947, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 13, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 4.7430081919



[36m(eval_config pid=135969)[0m {'activation': 'GELU', 'activation_transformers': 'RReLU', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 744, 'dropout': 0.12811692101846764, 'dropout_transformers': 0.2825584891021054, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 5, 'factor': 0.33454279009409743, 'patience': 4, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.0016092135894030374, 'lr': 0.00017512683418077613, 'dropout_lstm': 0.12752508541105217, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 5, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 49, 'reg': True, 'trans

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 01:47:34,997	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 01:47:48,713	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 01:47:48,714	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_9        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_9
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_9`
[36m(eval_config pid=151880)[0m CUDA is available. Using GPU.
[36m(eval_config pid=151880)[0m GCNConv




[36m(eval_config pid=151880)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 576, 'dropout': 0.38253112636905273, 'dropout_transformers': 0.2642799262141535, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.08467818817593553, 'scheduler': 'StepLR', 'step_size': 11, 'lr': 1.6926383140951762e-05, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 12, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 6, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'activation_gcn': 'Tanhshrink', 'dropout_gcn': 0.14804889778497426, 'hidden_channels': 32, 'layer_type': 'GCNConv', 'norm': 'GraphNorm', 'num_layers_gcn': 4, 'use_gcn': True, 'weight_decay': 0.0016196747321426393}
[36m(eval_



[36m(eval_config pid=151880)[0m {'activation': 'Softsign', 'activation_transformers': 'PReLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 312, 'dropout': 0.345453920009895, 'dropout_transformers': 0.14593165195521157, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.038147765235227206, 'dropout_lstm': 0.24227724071166984, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'Hardswish', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 27, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.0004440440608510742}
[36m(eval_config pid=15188



[36m(eval_config pid=151880)[0m {'activation': 'Hardtanh', 'activation_transformers': 'LeakyReLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 624, 'dropout': 0.26797824417402283, 'dropout_transformers': 0.22826357004954914, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 0, 'factor': 0.11942543685069673, 'patience': 7, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.21498505467455392, 'lr': 3.9398320774809626e-05, 'dropout_lstm': 0.1101124771814363, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 48, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 24, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'abs', 'reg': False, 'transformers_model': True, 'us



[36m(eval_config pid=151880)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'ReLU', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 816, 'dropout': 0.24669987521755554, 'dropout_transformers': 0.20475723219376735, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.44448613375389306, 'scheduler': 'StepLR', 'step_size': 4, 'lr': 0.006536909720196118, 'dropout_lstm': 0.35243331780607634, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 24, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 30, 'reg': True, 'transformers_model': True, 'activation_gcn': 'Sigmoid', 'dropou



[36m(eval_config pid=151880)[0m {'activation': 'CELU', 'activation_transformers': 'Softshrink', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 552, 'dropout': 0.2167111192713454, 'dropout_transformers': 0.17695449683160008, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 7, 'factor': 0.7905352424881034, 'patience': 8, 'scheduler': 'ReduceLROnPlateau', 'threshold': 2.4110413565012488e-05, 'lr': 0.00012621117088966487, 'dropout_lstm': 0.1580930159648416, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 6, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'relu', 'reg': False, 'transformers_model': True, 'activ



[36m(eval_config pid=151880)[0m epoch:  0 loss :  115.2815236630647 acc:  0.000781546569010562
[36m(eval_config pid=151880)[0m epoch:  1 loss :  19.13190211420474 acc:  0.0021883303932295735
[36m(eval_config pid=151880)[0m epoch:  2 loss :  8.603037709775178 acc:  0.002411629412946877
[36m(eval_config pid=151880)[0m epoch:  3 loss :  7.9856661299000615 acc:  0.0023669696090034163
[36m(eval_config pid=151880)[0m epoch:  4 loss :  7.874470503433891 acc:  0.002232990197173034
[36m(eval_config pid=151880)[0m epoch:  5 loss :  7.865922844928244 acc:  0.002210660295201304
[36m(eval_config pid=151880)[0m epoch:  6 loss :  7.846844673156738 acc:  0.002389299510975147
[36m(eval_config pid=151880)[0m epoch:  7 loss :  7.871899791385816 acc:  0.0023669696090034163
[36m(eval_config pid=151880)[0m CUDA is available. Using GPU.
[36m(eval_config pid=151880)[0m {'activation': 'ReLU6', 'activation_transformers': 'Hardsigmoid', 'amsgrad': False, 'batch_size': 16, 'concatenate_feature



[36m(eval_config pid=151880)[0m epoch:  0 loss :  7.672063054156904 acc:  0.010629033338543644
[36m(eval_config pid=151880)[0m epoch:  1 loss :  7.186748985482865 acc:  0.021146417167228634
[36m(eval_config pid=151880)[0m epoch:  2 loss :  6.944905249010615 acc:  0.03193175981957439
[36m(eval_config pid=151880)[0m epoch:  3 loss :  6.877205203561222 acc:  0.047093763258379294
[36m(eval_config pid=151880)[0m epoch:  4 loss :  6.775949169607723 acc:  0.056025724047071436
[36m(eval_config pid=151880)[0m epoch:  5 loss :  6.7888534610011 acc:  0.061027622088739034
[36m(eval_config pid=151880)[0m epoch:  6 loss :  6.722851989649925 acc:  0.06178683875577786
[36m(eval_config pid=151880)[0m epoch:  7 loss :  6.678793262032902 acc:  0.06274702454056226
[36m(eval_config pid=151880)[0m epoch:  8 loss :  6.694450021791859 acc:  0.06285867405042092
[36m(eval_config pid=151880)[0m epoch:  9 loss :  6.765150106253744 acc:  0.06308197307013823
[36m(eval_config pid=151880)[0m epo



[36m(eval_config pid=151880)[0m {'activation': 'SELU', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 504, 'dropout': 0.44016298575690893, 'dropout_transformers': 0.1587011106612337, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.003422356619225426, 'dropout_lstm': 0.4980330108705078, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 7, 'reg': True, 'transformers_model': True, 'activation_gcn': 'Mish', 'dropout_gcn': 0.395182377200492, 'hidden_channels': 128, 'layer_type'

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 02:47:08,211	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 02:47:21,975	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 02:47:21,977	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_10       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_10
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_10`
[36m(eval_config pid=166899)[0m CUDA is available. Using GPU.
[36m(eval_config pid=166899)[0m GCNConv




[36m(eval_config pid=166899)[0m {'activation': 'Softshrink', 'activation_transformers': 'CELU', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 432, 'dropout': 0.05248749532458005, 'dropout_transformers': 0.3760524083554718, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.7544171744831277, 'scheduler': 'StepLR', 'step_size': 13, 'lr': 0.0003459615765822386, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 36, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 6, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'abs', 'reg': False, 'transformers_model': True, 'activation_gcn': 'Hardswish', 'dropout_gcn': 0.34940850382984456, 'hidden_channels': 64, 'layer_type': 'GCNConv', 'norm': 'LayerNorm', 'num_layers_gcn': 1, 'use_gcn': True, 'weight_decay': 3.1055534051358038e-06}
[36m(eval_



[36m(eval_config pid=166899)[0m CUDA is available. Using GPU.
[36m(eval_config pid=166899)[0m {'activation': 'Softshrink', 'activation_transformers': 'ReLU', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': True, 'd_model': 264, 'dropout': 0.09655459636420655, 'dropout_transformers': 0.18632265901093006, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8103238177563501, 'scheduler': 'StepLR', 'step_size': 18, 'lr': 5.3672711992979084e-05, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 36, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'abs', 'epochs_complete_problem': 50, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 7.116553235331305e-06}
[36m(eval_config pid=166899)[0m CUDA is available. Using GPU.
[36m(eval

[36m(eval_config pid=166899)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=166899)[0m epoch:  0 loss :  7.978754286038673 acc:  0.0019650313735122705
[36m(eval_config pid=166899)[0m epoch:  1 loss :  7.650281340388928 acc:  0.003505794609561664
[36m(eval_config pid=166899)[0m epoch:  2 loss :  7.402517116675942 acc:  0.00388540294308108
[36m(eval_config pid=166899)[0m epoch:  3 loss :  7.303029108855684 acc:  0.004175691668713575
[36m(eval_config pid=166899)[0m epoch:  4 loss :  7.239663326134116 acc:  0.004220351472657035
[36m(eval_config pid=166899)[0m epoch:  5 loss :  7.220408366898359 acc:  0.008128084317709845
[36m(eval_config pid=166899)[0m epoch:  6 loss :  7.188087018869691 acc:  0.013509590692896858
[36m(eval_config pid=166899)[0m epoch:  7 loss :  7.134114612967281 acc:  0.019181385793716366
[36m(eval_config pid=166899)[0m epoch:  8 loss :  7.070006467528263 acc:  0.03389679119308666
[36m(eval_config pid=166899)[0m epoch:  9 loss :  7.003901360398632 acc:  0.04825491816090927
[36m(eval_config pid=166899)[0

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 03:51:38,802	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 03:51:52,564	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 03:51:52,567	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_11       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_11
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_11`
[36m(eval_config pid=183132)[0m CUDA is available. Using GPU.
[36m(eval_config pid=183132)[0m GraphSAGE
[36m(eval_config pid=183132)[0m {'activation': 'ReLU', 'activation_transformers': 'Hardtanh', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 600, 'dropout': 0.0825693134742821, 'dropout_transformers': 0.17217142730319243, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_enc

[36m(eval_config pid=183132)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=183132)[0m epoch:  0 loss :  8.037662029266357 acc:  0.002099010785342652
[36m(eval_config pid=183132)[0m epoch:  1 loss :  7.875250007795251 acc:  0.006274702454056227
[36m(eval_config pid=183132)[0m epoch:  2 loss :  7.730203835860543 acc:  0.013018332849518791
[36m(eval_config pid=183132)[0m epoch:  3 loss :  7.590285384136697 acc:  0.02320076814862783
[36m(eval_config pid=183132)[0m epoch:  4 loss :  7.521693001622739 acc:  0.03059196570127057
[36m(eval_config pid=183132)[0m epoch:  5 loss :  7.450060844421387 acc:  0.038407431391376194
[36m(eval_config pid=183132)[0m epoch:  6 loss :  7.410533158675484 acc:  0.04515106178683875
[36m(eval_config pid=183132)[0m epoch:  7 loss :  7.354178242061449 acc:  0.052207310809905545
[36m(eval_config pid=183132)[0m epoch:  8 loss :  7.312455591948136 acc:  0.05707522943974276
[36m(eval_config pid=183132)[0m epoch:  9 loss :  7.266076544056768 acc:  0.06127325101042806
[36m(eval_config pid=183132)[0m e

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 05:24:03,397	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 05:24:17,220	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 05:24:17,222	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_12       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_12
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_12`
[36m(eval_config pid=206162)[0m CUDA is available. Using GPU.




[36m(eval_config pid=206162)[0m {'activation': 'GELU', 'activation_transformers': 'SiLU', 'amsgrad': True, 'batch_size': 16, 'concatenate_features': False, 'd_model': 552, 'dropout': 0.2566481705378929, 'dropout_transformers': 0.3561738931081586, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.8520830540904167, 'scheduler': 'StepLR', 'step_size': 17, 'lr': 0.00012598081526979583, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 39, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.001928504065077123}
[36m(eval_config pid=206162)[0m CUDA is available. Using GPU.
[36m(eval_config pid=206162)[0m epoch:  0 loss :  7.639690303269711 acc:  0.0

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 06:59:20,057	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 06:59:33,890	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 06:59:33,891	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_13       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_13
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_13`
[36m(eval_config pid=229885)[0m CUDA is available. Using GPU.




[36m(eval_config pid=229885)[0m {'activation': 'Hardswish', 'activation_transformers': 'Softshrink', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 552, 'dropout': 0.1155659148805338, 'dropout_transformers': 0.38243086285753525, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.37595760571399034, 'scheduler': 'StepLR', 'step_size': 24, 'lr': 0.001004108348410396, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 16, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.3641134563118605}
[36m(eval_config pid=229885)[0m CUDA is available. Using GPU.
[36m(eval_config pid=229885)[0m epoch:  0 loss :  6.51509057191702 a



[36m(eval_config pid=229885)[0m epoch:  0 loss :  7.918905843205812 acc:  0.004108701962798384
[36m(eval_config pid=229885)[0m epoch:  1 loss :  7.607863338053727 acc:  0.005113547551526249
[36m(eval_config pid=229885)[0m epoch:  2 loss :  7.41976544035583 acc:  0.0051805372574414395
[36m(eval_config pid=229885)[0m epoch:  3 loss :  7.325107674638764 acc:  0.006698970591519103
[36m(eval_config pid=229885)[0m epoch:  4 loss :  7.25891989619792 acc:  0.0061183931402541145
[36m(eval_config pid=229885)[0m epoch:  5 loss :  7.221883673627837 acc:  0.008686331867003103
[36m(eval_config pid=229885)[0m epoch:  6 loss :  7.177859089955562 acc:  0.01636781814527834
[36m(eval_config pid=229885)[0m epoch:  7 loss :  7.125173753049193 acc:  0.03217738874126343
[36m(eval_config pid=229885)[0m epoch:  8 loss :  7.055893080575125 acc:  0.0451064019828953
[36m(eval_config pid=229885)[0m epoch:  9 loss :  7.0036762261591035 acc:  0.061742178951834405
[36m(eval_config pid=229885)[0m

[36m(eval_config pid=229885)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=229885)[0m epoch:  0 loss :  8.019123252111537 acc:  0.002545608824777259
[36m(eval_config pid=229885)[0m epoch:  1 loss :  8.016653861708313 acc:  0.0010718352946430564
[36m(eval_config pid=229885)[0m epoch:  2 loss :  8.012079173371992 acc:  0.0010718352946430564
[36m(eval_config pid=229885)[0m epoch:  3 loss :  8.009751854962065 acc:  0.0010718352946430564
[36m(eval_config pid=229885)[0m epoch:  4 loss :  8.006767451308155 acc:  0.0010718352946430564
[36m(eval_config pid=229885)[0m epoch:  5 loss :  8.005190649105392 acc:  0.0010718352946430564
[36m(eval_config pid=229885)[0m CUDA is available. Using GPU.
[36m(eval_config pid=229885)[0m {'activation': 'Mish', 'activation_transformers': 'Mish', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 696, 'dropout': 0.0607619171604327, 'dropout_transformers': 0.4384552899543174, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learn

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 07:42:07,539	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 07:42:21,427	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 07:42:21,429	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_14       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_14
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_14`
[36m(eval_config pid=240819)[0m CUDA is available. Using GPU.




[36m(eval_config pid=240819)[0m {'activation': 'Hardsigmoid', 'activation_transformers': 'Sigmoid', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 408, 'dropout': 0.08364036537484446, 'dropout_transformers': 0.27240122602325756, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.49112342767287376, 'scheduler': 'StepLR', 'step_size': 20, 'lr': 2.850370433533623e-05, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'relu', 'epochs_complete_problem': 15, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.0008624478954328188}
[36m(eval_config pid=240819)[0m CUDA is available. Using GPU.
[36m(eval_config pid=240819)[0m epoch:  0 loss :  7.870938159127

[36m(eval_config pid=240819)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=240819)[0m epoch:  0 loss :  7.076406872657038 acc:  0.14221914565795057
[36m(eval_config pid=240819)[0m epoch:  1 loss :  5.199409915554908 acc:  0.2717102471920148
[36m(eval_config pid=240819)[0m epoch:  2 loss :  4.06711625899038 acc:  0.34149119085367213
[36m(eval_config pid=240819)[0m epoch:  3 loss :  3.549341933957992 acc:  0.375454971752674
[36m(eval_config pid=240819)[0m epoch:  4 loss :  3.2974767884900493 acc:  0.3937654913694929
[36m(eval_config pid=240819)[0m epoch:  5 loss :  3.13532693770624 acc:  0.3994596163722841
[36m(eval_config pid=240819)[0m epoch:  6 loss :  3.067410429062382 acc:  0.4081236183373155
[36m(eval_config pid=240819)[0m epoch:  7 loss :  2.951849543663763 acc:  0.412969207065181
[36m(eval_config pid=240819)[0m epoch:  8 loss :  2.888833245923442 acc:  0.4168322801062903
[36m(eval_config pid=240819)[0m epoch:  9 loss :  2.8384008038428523 acc:  0.42022642520599335
[36m(eval_config pid=240819)[0m epoch:  10 loss

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 08:39:05,507	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 08:39:19,406	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 08:39:19,408	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_15       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_15
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_15`
[36m(eval_config pid=255211)[0m CUDA is available. Using GPU.




[36m(eval_config pid=255211)[0m {'activation': 'PReLU', 'activation_transformers': 'Softmin', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 744, 'dropout': 0.37517339098161123, 'dropout_transformers': 0.3787838205825871, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.12352744640110168, 'scheduler': 'StepLR', 'step_size': 17, 'lr': 0.0002488219778548973, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 13, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 0.00035970202196859084}
[36m(eval_config pid=255211)[0m CUDA is available. Using GPU.


[36m(eval_config pid=255211)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=255211)[0m epoch:  0 loss :  7.364086484909057 acc:  0.039702565705736555
[36m(eval_config pid=255211)[0m epoch:  1 loss :  6.592517592356756 acc:  0.1906750329366054
[36m(eval_config pid=255211)[0m epoch:  2 loss :  5.503632329060481 acc:  0.26849474130808565
[36m(eval_config pid=255211)[0m epoch:  3 loss :  4.617004062579229 acc:  0.31757586584194897
[36m(eval_config pid=255211)[0m epoch:  4 loss :  4.0278824751193705 acc:  0.353013420271085
[36m(eval_config pid=255211)[0m epoch:  5 loss :  3.6452543863883387 acc:  0.37887144675434875
[36m(eval_config pid=255211)[0m epoch:  6 loss :  3.394614835885855 acc:  0.3995935957841145
[36m(eval_config pid=255211)[0m epoch:  7 loss :  3.2220194321412308 acc:  0.4099993301029409
[36m(eval_config pid=255211)[0m epoch:  8 loss :  3.0970316079946665 acc:  0.41803809481276377
[36m(eval_config pid=255211)[0m epoch:  9 loss :  3.003039921247042 acc:  0.42120894089274946
[36m(eval_config pid=255211)[0m epoch:

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 09:26:58,593	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 09:27:13,156	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 09:27:13,158	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_16       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_16
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_16`




[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_config pid=267397)[0m {'activation': 'GELU', 'activation_transformers': 'SiLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 336, 'dropout': 0.28226871881079674, 'dropout_transformers': 0.3238904480542086, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.4153605845223523, 'scheduler': 'StepLR', 'step_size': 17, 'lr': 0.004061100598483793, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 3, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 3.0959718815457053e-06}
[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_conf



[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_config pid=267397)[0m {'activation': 'Softsign', 'activation_transformers': 'PReLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 288, 'dropout': 0.41701128346134164, 'dropout_transformers': 0.3880139179194972, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.13766905538402274, 'scheduler': 'StepLR', 'step_size': 21, 'lr': 0.0009901827069737036, 'dropout_lstm': 0.45934053481490406, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Mish', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 17, 'reg': True



[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_config pid=267397)[0m {'activation': 'Hardshrink', 'activation_transformers': 'PReLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 288, 'dropout': 0.4838967722896893, 'dropout_transformers': 0.3084625562688811, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.7479402536073985, 'scheduler': 'StepLR', 'step_size': 21, 'lr': 0.0010529466865984967, 'dropout_lstm': 0.48091713613422116, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Hardsigmoid', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 6, 'reg'



[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_config pid=267397)[0m {'activation': 'Softsign', 'activation_transformers': 'PReLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 120, 'dropout': 0.41253970608099116, 'dropout_transformers': 0.3847227124045911, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.3395385990561092, 'scheduler': 'StepLR', 'step_size': 19, 'lr': 0.07180824122993944, 'dropout_lstm': 0.45349153484569504, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Mish', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 9, 'reg': True, 't



[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_config pid=267397)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'PReLU', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': True, 'd_model': 216, 'dropout': 0.4964415671229234, 'dropout_transformers': 0.36944152413605036, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.20360100207657367, 'scheduler': 'ExponentialLR', 'lr': 0.003136543825653575, 'dropout_lstm': 0.41967075950939603, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'PReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 16, 'reg': True, 'transfo



[36m(eval_config pid=267397)[0m loss is undifined
[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_config pid=267397)[0m {'activation': 'Softsign', 'activation_transformers': 'Softplus', 'amsgrad': False, 'batch_size': 64, 'concatenate_features': True, 'd_model': 264, 'dropout': 0.4359533882589522, 'dropout_transformers': 0.2846257038729461, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.010725306748563717, 'dropout_lstm': 0.428987788959831, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'LeakyReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 14, 'reg': T



[36m(eval_config pid=267397)[0m epoch:  0 loss :  7.074214132208573 acc:  0.15543844762521491
[36m(eval_config pid=267397)[0m epoch:  1 loss :  5.570118442334627 acc:  0.20150503539289463
[36m(eval_config pid=267397)[0m epoch:  2 loss :  4.7062074661254885 acc:  0.2214902976575933
[36m(eval_config pid=267397)[0m epoch:  3 loss :  4.274358699196263 acc:  0.22209320501083
[36m(eval_config pid=267397)[0m epoch:  4 loss :  4.027489300778037 acc:  0.23441931089922516
[36m(eval_config pid=267397)[0m epoch:  5 loss :  3.841892285096018 acc:  0.24779492218029162
[36m(eval_config pid=267397)[0m epoch:  6 loss :  3.6856026122444554 acc:  0.25482884130138667
[36m(eval_config pid=267397)[0m epoch:  7 loss :  3.5779713254225882 acc:  0.26813746287653795
[36m(eval_config pid=267397)[0m epoch:  8 loss :  3.4978087952262475 acc:  0.2768461246455128
[36m(eval_config pid=267397)[0m epoch:  9 loss :  3.470077487042076 acc:  0.2839916932764665
[36m(eval_config pid=267397)[0m epoch:  



[36m(eval_config pid=267397)[0m epoch:  0 loss :  7.555942486379748 acc:  0.003706763727307237
[36m(eval_config pid=267397)[0m epoch:  1 loss :  7.2744783954085595 acc:  0.006029073532367193
[36m(eval_config pid=267397)[0m epoch:  2 loss :  7.179983112299554 acc:  0.01958332402920751
[36m(eval_config pid=267397)[0m epoch:  3 loss :  6.968090725836353 acc:  0.03563852354688163
[36m(eval_config pid=267397)[0m epoch:  4 loss :  6.577022592598032 acc:  0.07656923386106335
[36m(eval_config pid=267397)[0m epoch:  5 loss :  6.129046725335522 acc:  0.09918942455842619
[36m(eval_config pid=267397)[0m epoch:  6 loss :  5.788537546853039 acc:  0.11812518143045352
[36m(eval_config pid=267397)[0m epoch:  7 loss :  5.4551335539773245 acc:  0.1443181564432932
[36m(eval_config pid=267397)[0m epoch:  8 loss :  5.206766935152428 acc:  0.16399080008038766
[36m(eval_config pid=267397)[0m epoch:  9 loss :  4.967996497020543 acc:  0.18247995891298038
[36m(eval_config pid=267397)[0m epo



[36m(eval_config pid=267397)[0m {'activation': 'Hardtanh', 'activation_transformers': 'LeakyReLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 384, 'dropout': 0.399367972565674, 'dropout_transformers': 0.3455632035651406, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.28846233721517756, 'scheduler': 'StepLR', 'step_size': 16, 'lr': 0.001439786491238468, 'dropout_lstm': 0.4892240851759689, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Mish', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 18, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 



[36m(eval_config pid=267397)[0m CUDA is available. Using GPU.
[36m(eval_config pid=267397)[0m {'activation': 'Hardswish', 'activation_transformers': 'Hardshrink', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': True, 'd_model': 240, 'dropout': 0.4893371274412556, 'dropout_transformers': 0.33090480903514535, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.3936936124230907, 'scheduler': 'StepLR', 'step_size': 20, 'lr': 0.008129832820973137, 'dropout_lstm': 0.4401446401729308, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Mish', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 16, 'reg': Tr

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 10:10:36,348	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 10:10:51,078	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 10:10:51,080	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_17       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_17
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_17`




[36m(eval_config pid=278540)[0m CUDA is available. Using GPU.
[36m(eval_config pid=278540)[0m {'activation': 'RReLU', 'activation_transformers': 'SELU', 'amsgrad': False, 'batch_size': 64, 'concatenate_features': True, 'd_model': 312, 'dropout': 0.47356564755638175, 'dropout_transformers': 0.23565132304611372, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.32670935294270237, 'scheduler': 'ExponentialLR', 'lr': 0.00018244539836324145, 'dropout_lstm': 0.36652763344461714, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Tanhshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 8, 'reg': True, 'transf



[36m(eval_config pid=278540)[0m epoch:  0 loss :  7.402355971456576 acc:  0.0041310318647701134
[36m(eval_config pid=278540)[0m epoch:  1 loss :  7.057596487157485 acc:  0.014626085791483376
[36m(eval_config pid=278540)[0m epoch:  2 loss :  6.659712715309207 acc:  0.02730947011142621
[36m(eval_config pid=278540)[0m epoch:  3 loss :  6.279435362134661 acc:  0.04110934952995556
[36m(eval_config pid=278540)[0m epoch:  4 loss :  6.057931118652601 acc:  0.041890896098966124
[36m(eval_config pid=278540)[0m epoch:  5 loss :  5.989184111106296 acc:  0.0433423397271286
[36m(eval_config pid=278540)[0m epoch:  6 loss :  5.944240694286442 acc:  0.046356876493312195
[36m(eval_config pid=278540)[0m epoch:  7 loss :  5.888580791088713 acc:  0.05073353727977134
[36m(eval_config pid=278540)[0m epoch:  8 loss :  5.858397575987487 acc:  0.051046155907375566
[36m(eval_config pid=278540)[0m epoch:  9 loss :  5.8523978505815775 acc:  0.05245293973159458
[36m(eval_config pid=278540)[0m 



[36m(eval_config pid=278540)[0m epoch:  0 loss :  7.699511150213389 acc:  0.00777080588616216
[36m(eval_config pid=278540)[0m epoch:  1 loss :  7.160051173430222 acc:  0.04682580443471853
[36m(eval_config pid=278540)[0m epoch:  2 loss :  6.677337052271916 acc:  0.08753321572918295
[36m(eval_config pid=278540)[0m epoch:  3 loss :  6.000873352931096 acc:  0.16017238684322177
[36m(eval_config pid=278540)[0m epoch:  4 loss :  5.397197099832388 acc:  0.20438559274724785
[36m(eval_config pid=278540)[0m epoch:  5 loss :  4.978876682428213 acc:  0.23566978540964206
[36m(eval_config pid=278540)[0m epoch:  6 loss :  4.671181099231426 acc:  0.2615278118929058
[36m(eval_config pid=278540)[0m epoch:  7 loss :  4.435013090647184 acc:  0.2814460844516893
[36m(eval_config pid=278540)[0m epoch:  8 loss :  4.258253081028278 acc:  0.29555858249782285
[36m(eval_config pid=278540)[0m epoch:  9 loss :  4.112780195016128 acc:  0.3075720697586138
[36m(eval_config pid=278540)[0m epoch:  1



[36m(eval_config pid=278540)[0m epoch:  0 loss :  8.049807071685791 acc:  0.0027019181385793717
[36m(eval_config pid=278540)[0m epoch:  1 loss :  7.626627635955811 acc:  0.005738784806734698
[36m(eval_config pid=278540)[0m epoch:  2 loss :  7.441541069432309 acc:  0.00783779559207735
[36m(eval_config pid=278540)[0m epoch:  3 loss :  7.318670834993061 acc:  0.015742580890069892
[36m(eval_config pid=278540)[0m epoch:  4 loss :  7.236667602940609 acc:  0.02652792354241565
[36m(eval_config pid=278540)[0m epoch:  5 loss :  7.119360878593043 acc:  0.035459884331107784
[36m(eval_config pid=278540)[0m epoch:  6 loss :  6.989996528625488 acc:  0.051470424044838446
[36m(eval_config pid=278540)[0m epoch:  7 loss :  6.92373709427683 acc:  0.06585088091463279
[36m(eval_config pid=278540)[0m epoch:  8 loss :  6.686829712516383 acc:  0.07967309023513387
[36m(eval_config pid=278540)[0m epoch:  9 loss :  6.597689548291658 acc:  0.08902931916128888
[36m(eval_config pid=278540)[0m e



[36m(eval_config pid=278540)[0m CUDA is available. Using GPU.
[36m(eval_config pid=278540)[0m {'activation': 'Mish', 'activation_transformers': 'GELU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 192, 'dropout': 0.4238829795442378, 'dropout_transformers': 0.2729915989790161, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.00013374349154575196, 'dropout_lstm': 0.30299781744469606, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'SiLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 17, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_

[36m(eval_config pid=278540)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=278540)[0m CUDA is available. Using GPU.
[36m(eval_config pid=278540)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'amsgrad': False, 'batch_size': 64, 'concatenate_features': True, 'd_model': 360, 'dropout': 0.372783434715439, 'dropout_transformers': 0.21646544352332928, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.7084135651818453, 'scheduler': 'StepLR', 'step_size': 19, 'lr': 0.030604381156659942, 'dropout_lstm': 0.3986106592081212, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Softmin', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 32, 'reg': Tr



[36m(eval_config pid=278540)[0m {'activation': 'Softmin', 'activation_transformers': 'Softsign', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': True, 'd_model': 456, 'dropout': 0.49472151109376195, 'dropout_transformers': 0.30398234014288233, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6729743005963787, 'scheduler': 'StepLR', 'step_size': 17, 'lr': 0.0026865137236515876, 'dropout_lstm': 0.43113457779595576, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'SELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 24, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 



[36m(eval_config pid=278540)[0m epoch:  0 loss :  7.716067037662538 acc:  0.005716454904762968
[36m(eval_config pid=278540)[0m epoch:  1 loss :  7.33573759704077 acc:  0.00520286715941317
[36m(eval_config pid=278540)[0m epoch:  2 loss :  7.227383092671883 acc:  0.013174642163320902
[36m(eval_config pid=278540)[0m epoch:  3 loss :  7.040570792029886 acc:  0.032333698055065536
[36m(eval_config pid=278540)[0m epoch:  4 loss :  6.82808463713702 acc:  0.04957238237724136
[36m(eval_config pid=278540)[0m epoch:  5 loss :  6.555294489660183 acc:  0.08416140053145167
[36m(eval_config pid=278540)[0m epoch:  6 loss :  6.178232180972059 acc:  0.11147087064287788
[36m(eval_config pid=278540)[0m epoch:  7 loss :  5.670135241596639 acc:  0.12361833731549919
[36m(eval_config pid=278540)[0m epoch:  8 loss :  5.578030530144186 acc:  0.1551928187035259
[36m(eval_config pid=278540)[0m epoch:  9 loss :  5.216079485516588 acc:  0.17254315253556038
[36m(eval_config pid=278540)[0m epoch:



[36m(eval_config pid=278540)[0m CUDA is available. Using GPU.
[36m(eval_config pid=278540)[0m {'activation': 'ELU', 'activation_transformers': 'Mish', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 336, 'dropout': 0.41463105538163153, 'dropout_transformers': 0.20246596977219092, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.448390260843404, 'scheduler': 'ExponentialLR', 'lr': 0.0008100456727013664, 'dropout_lstm': 0.4697974851634549, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Softplus', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 14, 'reg': True, 'transformer



[36m(eval_config pid=278540)[0m {'activation': 'Hardswish', 'activation_transformers': 'Tanhshrink', 'amsgrad': False, 'batch_size': 64, 'concatenate_features': True, 'd_model': 408, 'dropout': 0.3492998418880478, 'dropout_transformers': 0.4417864251563269, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 9, 'factor': 0.07336042631402362, 'patience': 3, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.0032489148617814946, 'lr': 0.0004044735745729699, 'dropout_lstm': 0.3426584497123991, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Softshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 11, 'reg'

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 11:19:00,681	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 11:19:14,894	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 11:19:14,895	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_18       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_18
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_18`




[36m(eval_config pid=295713)[0m CUDA is available. Using GPU.
[36m(eval_config pid=295713)[0m {'activation': 'Sigmoid', 'activation_transformers': 'ELU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 240, 'dropout': 0.49914038621041334, 'dropout_transformers': 0.2502821712317257, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.5900053207023038, 'scheduler': 'StepLR', 'step_size': 22, 'lr': 0.0002316383897140383, 'dropout_lstm': 0.36099695838797724, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'PReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 28, 'reg': True, '



[36m(eval_config pid=295713)[0m {'activation': 'Hardsigmoid', 'activation_transformers': 'LogSigmoid', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': True, 'd_model': 384, 'dropout': 0.33444080886129024, 'dropout_transformers': 0.3418084126891912, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.015073765226183589, 'dropout_lstm': 0.4519105057384538, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Softsign', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 21, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 3.854126529900434e-06}
[36m(eval_config pid



[36m(eval_config pid=295713)[0m {'activation': 'ReLU', 'activation_transformers': 'Softshrink', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 312, 'dropout': 0.3899359329276796, 'dropout_transformers': 0.4532240363117542, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.5468480973470571, 'scheduler': 'StepLR', 'step_size': 24, 'lr': 0.001147928926972868, 'dropout_lstm': 0.49760611033727187, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Tanh', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 19, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 5.



[36m(eval_config pid=295713)[0m {'activation': 'SiLU', 'activation_transformers': 'Softplus', 'amsgrad': False, 'batch_size': 64, 'concatenate_features': True, 'd_model': 528, 'dropout': 0.3643058290084673, 'dropout_transformers': 0.49086229734025066, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6436057561775197, 'scheduler': 'StepLR', 'step_size': 25, 'lr': 5.1285625665280206e-05, 'dropout_lstm': 0.3764144674438583, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'LogSigmoid', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 22, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay'



[36m(eval_config pid=295713)[0m CUDA is available. Using GPU.
[36m(eval_config pid=295713)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'ReLU6', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 48, 'dropout': 0.43167051808142787, 'dropout_transformers': 0.4085320132395845, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.43656647830530976, 'scheduler': 'StepLR', 'step_size': 20, 'lr': 3.779822907191557e-05, 'dropout_lstm': 0.29199178526549857, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'ELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 30, 'reg': True



[36m(eval_config pid=295713)[0m {'activation': 'RReLU', 'activation_transformers': 'LeakyReLU', 'amsgrad': False, 'batch_size': 16, 'concatenate_features': True, 'd_model': 480, 'dropout': 0.30800021614726614, 'dropout_transformers': 0.36273808433775845, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 2, 'factor': 0.3233130592800654, 'patience': 10, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.3211198729005216, 'lr': 9.003547349055623e-05, 'dropout_lstm': 0.25661274634629566, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'RReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'reg': False, 'transformers_model': True, 'acti



[36m(eval_config pid=295713)[0m {'activation': 'Hardshrink', 'activation_transformers': 'RReLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 432, 'dropout': 0.4567714069620133, 'dropout_transformers': 0.43434786507788126, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.09430711391420205, 'scheduler': 'ExponentialLR', 'lr': 0.0009468108039063848, 'dropout_lstm': 0.4766541425719399, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 1, 'optimizer': 'AdamW', 'positive_function': 'exp', 'epochs_complete_problem': 25, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 1.994092690



[36m(eval_config pid=295713)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'SiLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 336, 'dropout': 0.40263773452452967, 'dropout_transformers': 0.4660203616355475, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.15754890999079812, 'scheduler': 'StepLR', 'step_size': 12, 'lr': 0.00018330178869909358, 'dropout_lstm': 0.3261206651696913, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'Hardshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 17, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_deca



[36m(eval_config pid=295713)[0m {'activation': 'GELU', 'activation_transformers': 'PReLU', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': False, 'd_model': 456, 'dropout': 0.011854644777911427, 'dropout_transformers': 0.2901432719501403, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.789797312978834, 'scheduler': 'StepLR', 'step_size': 18, 'lr': 0.0006435251285857135, 'dropout_lstm': 0.39835295062948795, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 19, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 5.00720



[36m(eval_config pid=295713)[0m epoch:  0 loss :  6.3390464746035065 acc:  0.22854654668066007
[36m(eval_config pid=295713)[0m epoch:  1 loss :  3.7726616896115814 acc:  0.3058526673067905
[36m(eval_config pid=295713)[0m epoch:  2 loss :  3.099001688223619 acc:  0.3439474800705625
[36m(eval_config pid=295713)[0m epoch:  3 loss :  2.828851787860577 acc:  0.36618806243440594
[36m(eval_config pid=295713)[0m epoch:  4 loss :  2.6688114367998566 acc:  0.37969765312730275
[36m(eval_config pid=295713)[0m epoch:  5 loss :  2.5528691328488864 acc:  0.39425674921287096
[36m(eval_config pid=295713)[0m epoch:  6 loss :  2.463381218910217 acc:  0.399883884509747
[36m(eval_config pid=295713)[0m epoch:  7 loss :  2.3776013612747193 acc:  0.4037916173547998
[36m(eval_config pid=295713)[0m epoch:  8 loss :  2.322897533270029 acc:  0.4056226693164817
[36m(eval_config pid=295713)[0m epoch:  9 loss :  2.272776493659386 acc:  0.41044592814237546
[36m(eval_config pid=295713)[0m epoch: 

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 13:12:01,499	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 13:12:15,911	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 13:12:15,913	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_19       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_19
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_19`
[36m(eval_config pid=323753)[0m CUDA is available. Using GPU.
[36m(eval_config pid=323753)[0m GCNConv




[36m(eval_config pid=323753)[0m {'activation': 'Hardtanh', 'activation_transformers': 'Hardshrink', 'amsgrad': False, 'batch_size': 32, 'concatenate_features': True, 'd_model': 264, 'dropout': 0.2713825276869094, 'dropout_transformers': 0.39674897693108957, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.001428428264605978, 'dropout_lstm': 0.4212373473574615, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': True, 'num_heads': 3, 'num_layers_transformer': 4, 'optimizer': 'AdamW', 'positive_function': 'sig', 'reg': False, 'transformers_model': True, 'activation_gcn': 'PReLU', 'dropout_gcn': 0.04455764197113807, 'hidden_channels': 256, 'layer_type': 'GCNConv', 'nor



[36m(eval_config pid=323753)[0m epoch:  0 loss :  8.012241082019116 acc:  0.00042426813746287653
[36m(eval_config pid=323753)[0m epoch:  1 loss :  7.905013469328363 acc:  0.0008038764709822924
[36m(eval_config pid=323753)[0m epoch:  2 loss :  7.820194617811456 acc:  0.0012281446084451688
[36m(eval_config pid=323753)[0m epoch:  3 loss :  7.737622559788716 acc:  0.0016747426478797758
[36m(eval_config pid=323753)[0m epoch:  4 loss :  7.6707618092916094 acc:  0.0021213406873143827
[36m(eval_config pid=323753)[0m epoch:  5 loss :  7.602218202797763 acc:  0.002210660295201304
[36m(eval_config pid=323753)[0m epoch:  6 loss :  7.5508525342826385 acc:  0.0027465779425228324
[36m(eval_config pid=323753)[0m epoch:  7 loss :  7.492411693894719 acc:  0.002858227452381484
[36m(eval_config pid=323753)[0m epoch:  8 loss :  7.450441504099283 acc:  0.002992206864211866
[36m(eval_config pid=323753)[0m epoch:  9 loss :  7.402127202734889 acc:  0.003126186276042248
[36m(eval_config pid



[36m(eval_config pid=323753)[0m {'activation': 'RReLU', 'activation_transformers': 'ReLU', 'amsgrad': False, 'batch_size': 128, 'concatenate_features': True, 'd_model': 480, 'dropout': 0.2986619795509168, 'dropout_transformers': 0.3725707191288807, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6912571913858578, 'scheduler': 'StepLR', 'step_size': 27, 'lr': 0.006153166601226652, 'dropout_lstm': 0.2225255166922815, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Hardswish', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 12, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 2.75



[36m(eval_config pid=323753)[0m epoch:  0 loss :  6.966867589504919 acc:  0.20418462362950227
[36m(eval_config pid=323753)[0m epoch:  1 loss :  4.530090784358087 acc:  0.2278989795234799
[36m(eval_config pid=323753)[0m epoch:  2 loss :  3.8210463568428965 acc:  0.25348904718308285
[36m(eval_config pid=323753)[0m epoch:  3 loss :  3.4369778343450244 acc:  0.2850188687671661
[36m(eval_config pid=323753)[0m epoch:  4 loss :  3.16768317579109 acc:  0.31299823593774423
[36m(eval_config pid=323753)[0m epoch:  5 loss :  2.9666324686781267 acc:  0.3422727374226827
[36m(eval_config pid=323753)[0m epoch:  6 loss :  2.7727502729291116 acc:  0.3586852153719045
[36m(eval_config pid=323753)[0m epoch:  7 loss :  2.6087869283194856 acc:  0.38123841636335215
[36m(eval_config pid=323753)[0m epoch:  8 loss :  2.495952702014246 acc:  0.39291695509456714
[36m(eval_config pid=323753)[0m epoch:  9 loss :  2.419101088960594 acc:  0.3971373065672242
[36m(eval_config pid=323753)[0m epoch: 

[36m(eval_config pid=323753)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=323753)[0m {'activation': 'Softmin', 'activation_transformers': 'PReLU', 'amsgrad': True, 'batch_size': 32, 'concatenate_features': False, 'd_model': 504, 'dropout': 0.3797765193911646, 'dropout_transformers': 0.2955623689790295, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.5201233688471268, 'scheduler': 'StepLR', 'step_size': 15, 'lr': 0.0018499170739355057, 'dropout_lstm': 0.347154448756278, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 3, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'sig', 'epochs_complete_problem': 20, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 6.362921

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 14:09:26,914	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 14:09:41,015	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 14:09:41,017	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_20       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_20
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_20`




[36m(eval_config pid=338248)[0m CUDA is available. Using GPU.
[36m(eval_config pid=338248)[0m {'activation': 'ELU', 'activation_transformers': 'Mish', 'amsgrad': True, 'batch_size': 128, 'concatenate_features': False, 'd_model': 600, 'dropout': 0.23088325133538307, 'dropout_transformers': 0.19201447830589397, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.0003940817058048093, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features_globally': True, 'normalize_features_independantly': False, 'num_heads': 6, 'num_layers_transformer': 2, 'optimizer': 'AdamW', 'positive_function': 'abs', 'epochs_complete_problem': 38, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 5.2857004951775224e-08}
[36m(eval_config pid=338248)[0m CUDA is available. Using GPU.
[36m(eval_config pid=338248)[0m epoch:  0 loss :  6.5783027905

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-11 15:37:59,816	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 15:38:14,483	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-11 15:38:14,485	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_21       |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 10              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_21
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_21`
[36m(eval_config pid=360327)[0m CUDA is available. Using GPU.




[36m(eval_config pid=360327)[0m {'activation': 'SiLU', 'activation_transformers': 'Tanh', 'amsgrad': True, 'batch_size': 64, 'concatenate_features': True, 'd_model': 624, 'dropout': 0.180672988078028, 'dropout_transformers': 0.07792733151736303, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lr': 0.00011369165460865716, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features_globally': False, 'normalize_features_independantly': False, 'num_heads': 12, 'num_layers_transformer': 3, 'optimizer': 'AdamW', 'positive_function': 'abs', 'epochs_complete_problem': 39, 'reg': True, 'transformers_model': True, 'use_gcn': False, 'weight_decay': 4.1294739512530716e-08}
[36m(eval_config pid=360327)[0m CUDA is available. Using GPU.
[36m(eval_config pid=360327)[0m epoch:  0 loss :  7.412035228489162 acc:  0.08958756671058214
[36m(eval_config pid=360327)[0

In [None]:
run_all_xp(xps_name="hyperparameter_tuning_projet_long", algo=None, xp_size=20, xps_number=10, accuracy_target=0.98, max_num_epochs=None, storage_path='/content/tuning',drive_path="/content/drive/MyDrive")

1
loading already preprocessed data: 
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/vocab


  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-06 18:25:57,062	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-06 18:26:10,677	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-06 18:26:10,679	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_0        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_0
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_0`
[36m(eval_config pid=60669)[0m CUDA is available. Using GPU.




[36m(eval_config pid=60669)[0m {'activation': 'GELU', 'activation_transformers': 'Softplus', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1200, 'dropout': 0.25926108944253146, 'dropout_StationIdEmbedding': 0.6617067241662258, 'dropout_timeStampEmbedding': 0.703771911609112, 'dropout_transformers': 0.2106148532882447, 'early_stopping': 9, 'encoder_only': False, 'epochs_classifcation_only': 78, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.6455072704467397, 'scheduler': 'StepLR', 'step_size': 12, 'dropout_lstm': 0.7139129724354693, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'CELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 36, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 6, 'alpha': 0.9048161384865954, 'centered': False, 'eps': 2.830262974672483e-07, 'lr': 0.0001538268984097869, 'momentum': 0.015507939593735698, 'optimizer': 'RMSp

[36m(eval_config pid=60669)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=60669)[0m {'activation': 'Hardswish', 'activation_transformers': 'Hardswish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 504, 'dropout': 0.3291271414458241, 'dropout_StationIdEmbedding': 0.25297478241770976, 'dropout_timeStampEmbedding': 0.3974229173352428, 'dropout_transformers': 0.233546966213382, 'early_stopping': 6, 'encoder_only': False, 'epochs_classifcation_only': 62, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 1, 'alpha': 0.9820220267903932, 'centered': True, 'eps': 1.3704179718814581e-08, 'lr': 0.0035072715527129047, 'momentum': 0.49900480812856207, 'optimizer': 'RMSprop', 'weight_decay': 0.032580408773536185, 'positive_function': 'relu', 'epochs_complete_problem': 36, 'reg': True, 'transformers_model': True, 'activation_gcn': 'Softmin', 'dropout_gcn': 0.191598303652297



[36m(eval_config pid=60669)[0m {'activation': 'ReLU6', 'activation_transformers': 'SiLU', 'batch_size': 32, 'concatenate_features': True, 'd_model': 576, 'dropout': 0.9606034360693589, 'dropout_StationIdEmbedding': 0.7021465025350949, 'dropout_timeStampEmbedding': 0.9768470093851694, 'dropout_transformers': 0.1405650029640072, 'early_stopping': 9, 'encoder_only': False, 'epochs_classifcation_only': 54, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 8, 'factor': 0.6666864900356849, 'patience': 5, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.00018455992332687843, 'dropout_lstm': 0.42443929217145804, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 3, 'num_layers_transformer': 5, 'lr': 0.0006312750469574822, 'momentum': 0.2638135939945019, 'nesterov': False, 'optimizer': 'SGD', 



[36m(eval_config pid=60669)[0m {'activation': 'ELU', 'activation_transformers': 'Softplus', 'batch_size': 64, 'concatenate_features': True, 'd_model': 504, 'dropout': 0.3957791952489571, 'dropout_StationIdEmbedding': 0.702372492734025, 'dropout_timeStampEmbedding': 0.09474334414227226, 'dropout_transformers': 0.0463891876881809, 'early_stopping': 6, 'encoder_only': False, 'epochs_classifcation_only': 58, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.016169922985659214, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.2747819132178011, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'SiLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 3, 'lr': 1.1078303311413616e-05, 'momentum': 0.26188080369782574, 'nesterov': True, 'optimizer': 'SGD', 'weight_decay': 0.0004189190842914088, 'positive_function': 'relu',



[36m(eval_config pid=60669)[0m {'activation': 'ELU', 'activation_transformers': 'ReLU6', 'batch_size': 32, 'concatenate_features': False, 'd_model': 696, 'dropout': 0.4321527117415681, 'dropout_StationIdEmbedding': 0.03868435467749587, 'dropout_timeStampEmbedding': 0.6928207271119554, 'dropout_transformers': 0.014403770793150472, 'early_stopping': 3, 'encoder_only': False, 'epochs_classifcation_only': 39, 'input_size': 2, 'learnable_pos_encoding': True, 'T_max': 12, 'eta_min': 0.07250020026056199, 'scheduler': 'CosineAnnealingLR', 'dropout_lstm': 0.5503585263795484, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Softsign', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 6, 'num_layers_transformer': 2, 'alpha': 0.9113688075049072, 'centered': True, 'eps': 2.808398740903699e-06, 'lr': 4.10486923010962e-07, 'momentum': 0.1742385654185275, 'optimizer



[36m(eval_config pid=60669)[0m {'activation': 'Softmin', 'activation_transformers': 'SELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1080, 'dropout': 0.43075567017964045, 'dropout_StationIdEmbedding': 0.3223490839324896, 'dropout_timeStampEmbedding': 0.11959456607335395, 'dropout_transformers': 0.32149231283350965, 'early_stopping': 9, 'encoder_only': False, 'epochs_classifcation_only': 31, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 3, 'factor': 0.5410076163403952, 'patience': 9, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.00010877615201678248, 'dropout_lstm': 0.6828398700211156, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 12, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 4, 'alpha': 0.9667077810181143, 'centered': False, 'eps': 3.516832501133623e-08, 'lr': 8.615261318640



[36m(eval_config pid=60669)[0m CUDA is available. Using GPU.
[36m(eval_config pid=60669)[0m {'activation': 'GELU', 'activation_transformers': 'RReLU', 'batch_size': 32, 'concatenate_features': False, 'd_model': 144, 'dropout': 0.30505538233349916, 'dropout_StationIdEmbedding': 0.5032306905819148, 'dropout_timeStampEmbedding': 0.8559672395122064, 'dropout_transformers': 0.6058989839613876, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 52, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.9428669887968822, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Tanh', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 5, 'alpha': 0.9132665251491426, 'centered': True, 'eps': 1.0681541341594436e-06, 'lr': 0.00022473415354181788, 'momentum': 0.09082775790003822, 'optim



[36m(eval_config pid=60669)[0m {'activation': 'ELU', 'activation_transformers': 'Softshrink', 'batch_size': 16, 'concatenate_features': False, 'd_model': 1056, 'dropout': 0.2450016010491265, 'dropout_StationIdEmbedding': 0.8751907044052445, 'dropout_timeStampEmbedding': 0.4980229201128875, 'dropout_transformers': 0.5459436867073139, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 25, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.04660905496478961, 'scheduler': 'StepLR', 'step_size': 17, 'dropout_lstm': 0.18390114890082632, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 12, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.8753430804674709, 'beta_2': 0.9604872687745583, 'eps': 2.2476785782047834e-06, 'lr': 0.0005002538302103282, 'optimizer': 'AdamW'



[36m(eval_config pid=60669)[0m {'activation': 'SELU', 'activation_transformers': 'Softshrink', 'batch_size': 16, 'concatenate_features': True, 'd_model': 1440, 'dropout': 0.424930653895912, 'dropout_StationIdEmbedding': 0.8356749901292031, 'dropout_timeStampEmbedding': 0.7748568722037691, 'dropout_transformers': 0.8864292197781687, 'early_stopping': 7, 'encoder_only': True, 'epochs_classifcation_only': 61, 'input_size': 2, 'learnable_pos_encoding': False, 'T_max': 5, 'eta_min': 0.000845888121512347, 'scheduler': 'CosineAnnealingLR', 'dropout_lstm': 0.8077015242197328, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Softmin', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 192, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 1, 'amsgrad': True, 'beta_1': 0.9854597264805187, 'beta_2': 0.9841297150729207, 'eps': 1.6678915826240303e-06, 'lr': 9.967385128087866e-06, 'optim



[36m(eval_config pid=60669)[0m {'activation': 'RReLU', 'activation_transformers': 'ReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 552, 'dropout': 0.31418065391016503, 'dropout_StationIdEmbedding': 0.9170122040932379, 'dropout_timeStampEmbedding': 0.7519874014575396, 'dropout_transformers': 0.24473354482957022, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 24, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.40693957329744346, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 12, 'num_layers_transformer': 1, 'alpha': 0.9877232284848576, 'centered': True, 'eps': 3.96938206445058e-07, 'lr': 0.0018253708553199364, 'momentum': 0.28568258748991526, 'optimizer': 'RMSprop', 'weight_decay': 0.1769820160131162, 'positive_fun

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-06 20:07:13,292	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-06 20:07:28,015	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-06 20:07:28,016	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_1        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_1
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_1`
[36m(eval_config pid=86188)[0m CUDA is available. Using GPU.
[36m(eval_config pid=86188)[0m GAT
[36m(eval_config pid=86188)[0m {'activation': 'SELU', 'activation_transformers': 'Softshrink', 'batch_size': 128, 'concatenate_features': False, 'd_model': 120, 'dropout': 0.8301701469867244, 'dropout_StationIdEmbedding': 0.5655882640525879, 'dropout_timeStampEmbedding': 0.2939189411543632, 'dropout_transformers': 0.4648875006740148, 'early_stopping': 7, 'encoder_only': T



[36m(eval_config pid=86188)[0m loss is undifined
[36m(eval_config pid=86188)[0m CUDA is available. Using GPU.
[36m(eval_config pid=86188)[0m GAT
[36m(eval_config pid=86188)[0m {'activation': 'ReLU', 'activation_transformers': 'LeakyReLU', 'batch_size': 128, 'concatenate_features': False, 'd_model': 288, 'dropout': 0.8209879365270651, 'dropout_StationIdEmbedding': 0.44365202679620575, 'dropout_timeStampEmbedding': 0.27366779202346486, 'dropout_transformers': 0.9729843269123895, 'early_stopping': 10, 'encoder_only': True, 'epochs_classifcation_only': 15, 'input_size': 2, 'learnable_pos_encoding': False, 'base_lr': 5.4378989153586717e-05, 'max_lr': 0.3093565932930617, 'mode': 'triangular2', 'scheduler': 'CyclicLR', 'step_size_up': 1, 'dropout_lstm': 0.8190940671037286, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Softmin', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_fe

[36m(eval_config pid=86188)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=86188)[0m loss is undifined
[36m(eval_config pid=86188)[0m CUDA is available. Using GPU.
[36m(eval_config pid=86188)[0m GAT
[36m(eval_config pid=86188)[0m {'activation': 'Hardtanh', 'activation_transformers': 'Hardswish', 'batch_size': 16, 'concatenate_features': False, 'd_model': 312, 'dropout': 0.5396895566751263, 'dropout_StationIdEmbedding': 0.8075607913239145, 'dropout_timeStampEmbedding': 0.35021425948049095, 'dropout_transformers': 0.45738321584959396, 'early_stopping': 7, 'encoder_only': True, 'epochs_classifcation_only': 45, 'input_size': 2, 'learnable_pos_encoding': False, 'base_lr': 0.014809498970484393, 'max_lr': 0.1296010430061614, 'mode': 'exp_range', 'scheduler': 'CyclicLR', 'step_size_up': 29, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 24, 'num_layers_transformer': 2, 'lr': 0.05041525421689873, 'momentum': 0.4714499827178006, 'nesterov': True, 'optimizer': 'SGD',



[36m(eval_config pid=86188)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Hardshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 336, 'dropout': 0.02871094933921825, 'dropout_StationIdEmbedding': 0.5973034841166309, 'dropout_timeStampEmbedding': 0.9761405416663385, 'dropout_transformers': 0.9935022305177372, 'early_stopping': 8, 'encoder_only': True, 'epochs_classifcation_only': 76, 'input_size': 2, 'learnable_pos_encoding': False, 'T_max': 2, 'eta_min': 1.0163479384012925e-05, 'scheduler': 'CosineAnnealingLR', 'dropout_lstm': 0.5892482624658961, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'ReLU6', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 6, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 4, 'amsgrad': True, 'beta_1': 0.933261928544172, 'beta_2': 0.9825257647149878, 'eps': 5.267299521705942e-07, 'lr': 3.776445571903993e-07, 'op



[36m(eval_config pid=86188)[0m epoch:  0 loss :  8.686156132346705 acc:  0.0004465980394346069
[36m(eval_config pid=86188)[0m epoch:  1 loss :  8.60980284841437 acc:  0.0003572784315476855
[36m(eval_config pid=86188)[0m epoch:  2 loss :  8.49911426744963 acc:  0.0003126186276042248
[36m(eval_config pid=86188)[0m epoch:  3 loss :  8.424844149539345 acc:  0.0002456289216890338
[36m(eval_config pid=86188)[0m epoch:  4 loss :  8.371795523794074 acc:  0.000513587745349798
[36m(eval_config pid=86188)[0m epoch:  5 loss :  8.333946499071622 acc:  0.0006922269611236406
[36m(eval_config pid=86188)[0m epoch:  6 loss :  8.28223560734799 acc:  0.001027175490699596
[36m(eval_config pid=86188)[0m epoch:  7 loss :  8.218767999347888 acc:  0.002076680883370922
[36m(eval_config pid=86188)[0m epoch:  8 loss :  8.131423533590217 acc:  0.002925217158296675
[36m(eval_config pid=86188)[0m epoch:  9 loss :  8.020259084199605 acc:  0.0036174441194203157
[36m(eval_config pid=86188)[0m epoc



[36m(eval_config pid=86188)[0m epoch:  0 loss :  8.66463127869826 acc:  0.001027175490699596
[36m(eval_config pid=86188)[0m epoch:  1 loss :  8.464838145329402 acc:  0.0046222897081481815
[36m(eval_config pid=86188)[0m epoch:  2 loss :  8.20645870062021 acc:  0.007994104905879464
[36m(eval_config pid=86188)[0m epoch:  3 loss :  7.853411436080933 acc:  0.009400888730098474
[36m(eval_config pid=86188)[0m epoch:  4 loss :  7.59308888728802 acc:  0.01103097157403479
[36m(eval_config pid=86188)[0m epoch:  5 loss :  7.4211236183459945 acc:  0.01299600294754706
[36m(eval_config pid=86188)[0m epoch:  6 loss :  7.30122703405527 acc:  0.01752897304780832
[36m(eval_config pid=86188)[0m epoch:  7 loss :  7.204567021590012 acc:  0.024920170600451062
[36m(eval_config pid=86188)[0m epoch:  8 loss :  7.129659498654879 acc:  0.02279882991313668
[36m(eval_config pid=86188)[0m epoch:  9 loss :  7.078083097017728 acc:  0.055824754929325864
[36m(eval_config pid=86188)[0m epoch:  10 lo



[36m(eval_config pid=86188)[0m {'activation': 'SiLU', 'activation_transformers': 'Softmin', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1200, 'dropout': 0.8978344152517316, 'dropout_StationIdEmbedding': 0.6351256648989213, 'dropout_timeStampEmbedding': 0.5332537847067437, 'dropout_transformers': 0.7912071529729393, 'early_stopping': 9, 'encoder_only': True, 'epochs_classifcation_only': 71, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8894909941303533, 'scheduler': 'StepLR', 'step_size': 5, 'dropout_lstm': 0.9948722394784901, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'ReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 6, 'max_len': 100, 'nb_batchs': 48, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 2, 'amsgrad': True, 'beta_1': 0.9527327445081933, 'beta_2': 0.9710864404063841, 'eps': 1.052996476375188e-06, 'lr': 0.005467900278907302, 'optimizer': 'Adam', 'weig



[36m(eval_config pid=86188)[0m {'activation': 'Softshrink', 'activation_transformers': 'ELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1224, 'dropout': 0.5671097847313655, 'dropout_StationIdEmbedding': 0.2431507577512859, 'dropout_timeStampEmbedding': 0.6683385154989354, 'dropout_transformers': 0.3828629598690466, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 80, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7881903375504518, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 192, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': True, 'beta_1': 0.8477600838001569, 'beta_2': 0.990649735091279, 'eps': 1.4317116847659442e-07, 'lr': 1.0082885521871854e-05, 'optimizer': 'Adam', 'weight_decay': 2.891384205322503e-07, 'positive_fu



[36m(eval_config pid=86188)[0m {'activation': 'Softshrink', 'activation_transformers': 'Softshrink', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1272, 'dropout': 0.12854579791817278, 'dropout_StationIdEmbedding': 0.19238450021312126, 'dropout_timeStampEmbedding': 0.6538570582005737, 'dropout_transformers': 0.3992301696591553, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 80, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'dropout_lstm': 0.8247204716264702, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': True, 'beta_1': 0.8386744976190655, 'beta_2': 0.9907557316370873, 'eps': 1.6619452567615624e-07, 'lr': 1.0323229721054282e-05, 'optimizer': 'Adam', 'weight_decay': 1.3631584396945346e-07, 



[36m(eval_config pid=86188)[0m {'activation': 'Softshrink', 'activation_transformers': 'ELU', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1248, 'dropout': 0.49585900096956337, 'dropout_StationIdEmbedding': 0.2864036836777289, 'dropout_timeStampEmbedding': 0.7806548104755296, 'dropout_transformers': 0.13241337753429783, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 75, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.8303086957303105, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 192, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 1, 'amsgrad': True, 'beta_1': 0.8774676219672991, 'beta_2': 0.9907819147418413, 'eps': 4.9234655156455195e-06, 'lr': 1.770748858481716e-06, 'optimizer': 'Adam', 'weight_decay': 2.247764259217346e-09, 'positive_func



[36m(eval_config pid=86188)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'ELU', 'batch_size': 16, 'concatenate_features': True, 'd_model': 1008, 'dropout': 0.3605725842176154, 'dropout_StationIdEmbedding': 0.3609065767551209, 'dropout_timeStampEmbedding': 0.570607302955783, 'dropout_transformers': 0.5624993018383793, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 59, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'dropout_lstm': 0.6593645563236669, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': True, 'beta_1': 0.8365637728609997, 'beta_2': 0.9770824657171174, 'eps': 5.985097869887318e-07, 'lr': 6.341197488531829e-05, 'optimizer': 'Adam', 'weight_decay': 0.00016958082404274464, 'positive_fu



[36m(eval_config pid=86188)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'ELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 984, 'dropout': 0.36137238237566216, 'dropout_StationIdEmbedding': 0.001895044032539983, 'dropout_timeStampEmbedding': 0.368372085802816, 'dropout_transformers': 0.5720927204261574, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 73, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.6820397738894634, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 2, 'amsgrad': True, 'beta_1': 0.8358288102519749, 'beta_2': 0.9748144032174882, 'eps': 6.165560646873057e-08, 'lr': 9.43401259328304e-05, 'optimizer': 'Adam', 'weight_decay': 5.366267094044646e-05, 'positive_functio



[36m(eval_config pid=86188)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'ELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1176, 'dropout': 0.24085212538021228, 'dropout_StationIdEmbedding': 0.002762140268031453, 'dropout_timeStampEmbedding': 0.2054455380730971, 'dropout_transformers': 0.3616520701255228, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 71, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.46012352861682093, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': True, 'beta_1': 0.8303874274237837, 'beta_2': 0.9643748194109263, 'eps': 6.434880517763961e-08, 'lr': 5.506564773909709e-05, 'optimizer': 'Adam', 'weight_decay': 1.4197120191748532e-05, 'positive_fu



[36m(eval_config pid=86188)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'ELU', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1152, 'dropout': 0.011013474382048194, 'dropout_StationIdEmbedding': 0.00790098293560111, 'dropout_timeStampEmbedding': 0.2214004849006575, 'dropout_transformers': 0.2814219152974762, 'early_stopping': 2, 'encoder_only': False, 'epochs_classifcation_only': 72, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.43434834532781436, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8173809535101957, 'beta_2': 0.9629981549756063, 'eps': 5.535905596153284e-08, 'lr': 0.0019149273234474703, 'optimizer': 'Adam', 'weight_decay': 1.600197278886951e-05, 'positive_



[36m(eval_config pid=86188)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'PReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1320, 'dropout': 0.13199651356910314, 'dropout_StationIdEmbedding': 0.1349210556895789, 'dropout_timeStampEmbedding': 0.19303613251472868, 'dropout_transformers': 0.6966804329161463, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 66, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.30367792710605657, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': True, 'beta_1': 0.8963213758870979, 'beta_2': 0.9540348522499347, 'eps': 2.338940433303009e-09, 'lr': 0.00011960294116477681, 'optimizer': 'Adam', 'weight_decay': 0.00010384410442619273, 'positive_f



[36m(eval_config pid=86188)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'CELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 648, 'dropout': 0.20522205289613513, 'dropout_StationIdEmbedding': 0.07569626942763033, 'dropout_timeStampEmbedding': 0.3486033441766751, 'dropout_transformers': 0.18105456410747045, 'early_stopping': 2, 'encoder_only': False, 'epochs_classifcation_only': 55, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8085756537070063, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.5246383891759951, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': True, 'beta_1': 0.8208303674611096, 'beta_2': 0.9667559904178682, 'eps': 4.369601211192274e-08, 'lr': 0.010345308052864882, 'optimizer': 'Adam', 'weight_decay



[36m(eval_config pid=86188)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Tanhshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 984, 'dropout': 0.08480769825751716, 'dropout_StationIdEmbedding': 0.17963035583085768, 'dropout_timeStampEmbedding': 0.4170156959961103, 'dropout_transformers': 0.5926570139752845, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 74, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.3222794518038893, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': True, 'beta_1': 0.8604000400634382, 'beta_2': 0.9754826879665086, 'eps': 3.304684136482976e-07, 'lr': 4.720358505500233e-05, 'optimizer': 'Adam', 'weight_decay': 3.0391831322775776e-05, 'positi

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-07 00:16:49,221	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-07 00:17:03,791	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-07 00:17:03,793	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_2        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_2
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_2`
[36m(eval_config pid=147718)[0m CUDA is available. Using GPU.




[36m(eval_config pid=147718)[0m {'activation': 'ReLU6', 'activation_transformers': 'Tanhshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1440, 'dropout': 0.06481227396936776, 'dropout_StationIdEmbedding': 0.1708766743744856, 'dropout_timeStampEmbedding': 0.07422947228325949, 'dropout_transformers': 0.0794317889686042, 'early_stopping': 1, 'encoder_only': False, 'epochs_classifcation_only': 63, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.3770916843597625, 'scheduler': 'StepLR', 'step_size': 30, 'dropout_lstm': 0.3185351864747675, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'alpha': 0.938452613050594, 'centered': True, 'eps': 3.6555484290291812e-09, 'lr': 0.08737157533127207, 'momentum': 0.4793949095942875, 'optimizer': 'RMSprop



[36m(eval_config pid=147718)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Tanhshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 816, 'dropout': 0.26070687905349665, 'dropout_StationIdEmbedding': 0.08653789669073876, 'dropout_timeStampEmbedding': 0.16094489916002702, 'dropout_transformers': 0.3143248311414082, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 79, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.1763145348079828, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.8964571198071845, 'beta_2': 0.9575675306205829, 'eps': 3.6159380704877956e-07, 'lr': 2.9024428536914103e-05, 'optimizer': 'AdamW', 'weight_decay': 7.080137709065855e-06, 'p



[36m(eval_config pid=147718)[0m {'activation': 'Hardshrink', 'activation_transformers': 'SiLU', 'batch_size': 128, 'concatenate_features': True, 'd_model': 792, 'dropout': 0.2680783994408866, 'dropout_StationIdEmbedding': 0.0739238728613087, 'dropout_timeStampEmbedding': 0.02209788354660225, 'dropout_transformers': 0.337638725755541, 'early_stopping': 3, 'encoder_only': False, 'epochs_classifcation_only': 69, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 10, 'factor': 0.017659676725530205, 'patience': 2, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.2702315615474546, 'dropout_lstm': 0.14415152640995627, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'alpha': 0.999167930780396, 'centered': False, 'eps': 2.695980735175652e-09, 'lr': 0.0369351670450



[36m(eval_config pid=147718)[0m CUDA is available. Using GPU.
[36m(eval_config pid=147718)[0m {'activation': 'CELU', 'activation_transformers': 'Softsign', 'batch_size': 128, 'concatenate_features': True, 'd_model': 816, 'dropout': 0.18102681845824922, 'dropout_StationIdEmbedding': 0.10392545818255872, 'dropout_timeStampEmbedding': 0.15437432061990405, 'dropout_transformers': 0.29882918903011946, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 78, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.020742256607691828, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.9024997607854329, 'beta_2': 0.9575548291052323, 'eps': 3.30971932219932e-07, 'lr': 0.00042685753975095584, 'optimi



[36m(eval_config pid=147718)[0m CUDA is available. Using GPU.
[36m(eval_config pid=147718)[0m {'activation': 'CELU', 'activation_transformers': 'Softsign', 'batch_size': 128, 'concatenate_features': True, 'd_model': 648, 'dropout': 0.16644297114258483, 'dropout_StationIdEmbedding': 0.11797401955244144, 'dropout_timeStampEmbedding': 0.14776856522258514, 'dropout_transformers': 0.2745678392768168, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 79, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.3017648723625681, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.008118261989633554, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.9073516314833748, 'beta_2': 0.9583504918664006, 'eps': 3.632032183754977e-0



[36m(eval_config pid=147718)[0m {'activation': 'CELU', 'activation_transformers': 'Softsign', 'batch_size': 128, 'concatenate_features': True, 'd_model': 792, 'dropout': 0.48146394546781046, 'dropout_StationIdEmbedding': 0.37022445255740244, 'dropout_timeStampEmbedding': 0.001576203477589161, 'dropout_transformers': 0.06945179989196446, 'early_stopping': 5, 'encoder_only': False, 'epochs_classifcation_only': 55, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.09006872349014237, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 4, 'alpha': 0.9493078422155424, 'centered': True, 'eps': 1.2815905764013671e-08, 'lr': 1.1795758089441892e-05, 'momentum': 0.3973259665301577, 'optimizer': 'RMSprop', 'weight_decay': 1.9495491873470937e-07, 'po



[36m(eval_config pid=147718)[0m CUDA is available. Using GPU.
[36m(eval_config pid=147718)[0m {'activation': 'Hardswish', 'activation_transformers': 'LogSigmoid', 'batch_size': 128, 'concatenate_features': True, 'd_model': 432, 'dropout': 0.28625512985266344, 'dropout_StationIdEmbedding': 0.2868159256091363, 'dropout_timeStampEmbedding': 0.058309692541478206, 'dropout_transformers': 0.10218858329978486, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 47, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.3635282361292788, 'scheduler': 'StepLR', 'step_size': 1, 'dropout_lstm': 0.1990058380600325, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.9297688430477632, 'beta_2': 0.951051691380568, 'eps': 1.03224



[36m(eval_config pid=147718)[0m {'activation': 'Softsign', 'activation_transformers': 'Tanhshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 648, 'dropout': 0.04781246254922128, 'dropout_StationIdEmbedding': 0.22059608669842368, 'dropout_timeStampEmbedding': 0.1624345622398177, 'dropout_transformers': 0.026182522295456034, 'early_stopping': 6, 'encoder_only': True, 'epochs_classifcation_only': 77, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 1, 'factor': 0.025681708553783655, 'patience': 1, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.1549368317282937, 'dropout_lstm': 0.2394864782197647, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.8849779365933899, 'beta_2': 0.957263514555617, 'eps': 2.530107059



[36m(eval_config pid=147718)[0m epoch:  0 loss :  8.122256311319642 acc:  0.00037960833351941584
[36m(eval_config pid=147718)[0m epoch:  1 loss :  8.05166454638465 acc:  0.000513587745349798
[36m(eval_config pid=147718)[0m epoch:  2 loss :  7.986280457448151 acc:  0.0005582475492932586
[36m(eval_config pid=147718)[0m epoch:  3 loss :  7.933377637701519 acc:  0.0008038764709822924
[36m(eval_config pid=147718)[0m epoch:  4 loss :  7.873404850394039 acc:  0.001116495098586517
[36m(eval_config pid=147718)[0m epoch:  5 loss :  7.823244935375149 acc:  0.001496103432105933
[36m(eval_config pid=147718)[0m epoch:  6 loss :  7.77035544282299 acc:  0.0021213406873143827
[36m(eval_config pid=147718)[0m epoch:  7 loss :  7.716384063332768 acc:  0.0027465779425228324
[36m(eval_config pid=147718)[0m epoch:  8 loss :  7.676770218348099 acc:  0.003438804903646473
[36m(eval_config pid=147718)[0m epoch:  9 loss :  7.6309798531613104 acc:  0.004354330884487417
[36m(eval_config pid=147



[36m(eval_config pid=147718)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Softsign', 'batch_size': 128, 'concatenate_features': True, 'd_model': 864, 'dropout': 0.15264521577592885, 'dropout_StationIdEmbedding': 0.09257820445673795, 'dropout_timeStampEmbedding': 0.25366459898702465, 'dropout_transformers': 0.5076449951348454, 'early_stopping': 3, 'encoder_only': False, 'epochs_classifcation_only': 66, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.2999990737302809, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.3620687895634662, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.9511006928012369, 'beta_2': 0.954996327858048, 'eps': 2.651662343920685e-07, 'lr': 1.2732979423880814e-06, 'optimizer': 'AdamW', 'weig



[36m(eval_config pid=147718)[0m CUDA is available. Using GPU.
[36m(eval_config pid=147718)[0m {'activation': 'CELU', 'activation_transformers': 'Tanh', 'batch_size': 128, 'concatenate_features': True, 'd_model': 936, 'dropout': 0.2144262302829353, 'dropout_StationIdEmbedding': 0.7438218963442336, 'dropout_timeStampEmbedding': 0.10865370973419583, 'dropout_transformers': 0.17973301367981903, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 53, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 5, 'factor': 0.27395867139810964, 'patience': 4, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.009191231847049753, 'dropout_lstm': 0.13567017543400267, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 1, 'alpha': 0.9957518132404564, 'centered



[36m(eval_config pid=147718)[0m CUDA is available. Using GPU.
[36m(eval_config pid=147718)[0m {'activation': 'ReLU', 'activation_transformers': 'ReLU', 'batch_size': 32, 'concatenate_features': True, 'd_model': 552, 'dropout': 0.3200467844444552, 'dropout_StationIdEmbedding': 0.33205066692000035, 'dropout_timeStampEmbedding': 0.1524203744540469, 'dropout_transformers': 0.0016516744885342605, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 41, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.0009188326006648317, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 12, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 4, 'amsgrad': False, 'beta_1': 0.9237934647699764, 'beta_2': 0.9603204025088408, 'eps': 9.798426465484697e-08, 'lr': 0.0003416372305808973, 'optimizer':



[36m(eval_config pid=147718)[0m CUDA is available. Using GPU.
[36m(eval_config pid=147718)[0m {'activation': 'PReLU', 'activation_transformers': 'LeakyReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 816, 'dropout': 0.24487312738527295, 'dropout_StationIdEmbedding': 0.4782462609563159, 'dropout_timeStampEmbedding': 0.4465247279357142, 'dropout_transformers': 0.24187457152905742, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 50, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.20720656406138455, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 6, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.9450418529054735, 'beta_2': 0.965252582715933, 'eps': 2.7647566945256997e-08, 'lr': 2.678227992161398e-06, 'optim



[36m(eval_config pid=147718)[0m {'activation': 'Hardtanh', 'activation_transformers': 'Mish', 'batch_size': 128, 'concatenate_features': False, 'd_model': 1056, 'dropout': 0.40500501796957167, 'dropout_StationIdEmbedding': 0.40069081027562425, 'dropout_timeStampEmbedding': 0.054204397348860206, 'dropout_transformers': 0.507200548196927, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 2, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.8667331256154139, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.04588539207539638, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 5, 'amsgrad': False, 'beta_1': 0.9069226788355222, 'beta_2': 0.9702066855498683, 'eps': 3.963272655257734e-06, 'lr': 0.0010873151281478677, 'optimizer': 'AdamW', 'weight_dec



[36m(eval_config pid=147718)[0m {'activation': 'SiLU', 'activation_transformers': 'Hardtanh', 'batch_size': 32, 'concatenate_features': False, 'd_model': 600, 'dropout': 0.4561195515382004, 'dropout_StationIdEmbedding': 0.9417777919007059, 'dropout_timeStampEmbedding': 0.173417021958978, 'dropout_transformers': 0.6362686627523595, 'early_stopping': 3, 'encoder_only': False, 'epochs_classifcation_only': 64, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.13540938818987527, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 4, 'lr': 1.7055595170546112e-05, 'momentum': 0.3830425719103891, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 2.398977035981425e-09, 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 

[36m(eval_config pid=147718)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=147718)[0m {'activation': 'Hardsigmoid', 'activation_transformers': 'Softmin', 'batch_size': 128, 'concatenate_features': True, 'd_model': 840, 'dropout': 0.623608665410623, 'dropout_StationIdEmbedding': 0.1564696070676831, 'dropout_timeStampEmbedding': 0.1023317236799374, 'dropout_transformers': 0.15929640622293462, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 78, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.6433319532151986, 'scheduler': 'StepLR', 'step_size': 10, 'dropout_lstm': 0.2697655062947037, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'RReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 6, 'max_len': 100, 'nb_batchs': 36, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.9612234038792027, 'beta_2': 0.9623928139360105, 'eps': 9.72651506065075e-09, 'lr': 0.025347935396950034, 'optimizer': 'AdamW',

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-07 02:41:01,860	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-07 02:41:16,737	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-07 02:41:16,738	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_3        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_3
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_3`
[36m(eval_config pid=183394)[0m CUDA is available. Using GPU.




[36m(eval_config pid=183394)[0m {'activation': 'GELU', 'activation_transformers': 'ReLU6', 'batch_size': 64, 'concatenate_features': False, 'd_model': 456, 'dropout': 0.0012980306555653853, 'dropout_StationIdEmbedding': 0.10456912036775422, 'dropout_timeStampEmbedding': 0.5254810160750509, 'dropout_transformers': 0.4484911681975673, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 8, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.38031071144816575, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 1, 'lr': 0.011109562390026308, 'momentum': 0.09445856056773963, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 8.65962469596313e-05, 'positive_function': 'exp', 'reg': False, 'transformers_model': True, 'us



[36m(eval_config pid=183394)[0m {'activation': 'Mish', 'activation_transformers': 'Sigmoid', 'batch_size': 128, 'concatenate_features': True, 'd_model': 504, 'dropout': 0.08794049450462536, 'dropout_StationIdEmbedding': 0.20865629698406893, 'dropout_timeStampEmbedding': 0.1322336147870886, 'dropout_transformers': 0.2581026005192587, 'early_stopping': 6, 'encoder_only': True, 'epochs_classifcation_only': 31, 'input_size': 2, 'learnable_pos_encoding': True, 'base_lr': 0.0009871197075032479, 'max_lr': 0.07635522331092133, 'mode': 'triangular2', 'scheduler': 'CyclicLR', 'step_size_up': 9, 'dropout_lstm': 0.484035234259669, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'LeakyReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 4, 'amsgrad': False, 'beta_1': 0.885400090675543, 'beta_2': 0.9673373506900346, 'eps': 4.8589464719



[36m(eval_config pid=183394)[0m epoch:  0 loss :  8.169504349973021 acc:  0.0008931960788692137
[36m(eval_config pid=183394)[0m epoch:  1 loss :  7.3936777956345505 acc:  0.03097157403478999
[36m(eval_config pid=183394)[0m epoch:  2 loss :  6.798426992752972 acc:  0.18652167116986357
[36m(eval_config pid=183394)[0m epoch:  3 loss :  5.878395024467917 acc:  0.2126923162807315
[36m(eval_config pid=183394)[0m epoch:  4 loss :  5.279356479644775 acc:  0.22434852510997477
[36m(eval_config pid=183394)[0m epoch:  5 loss :  5.082611893405433 acc:  0.21514860549762185
[36m(eval_config pid=183394)[0m loss is undifined
[36m(eval_config pid=183394)[0m CUDA is available. Using GPU.
[36m(eval_config pid=183394)[0m GraphSAGE




[36m(eval_config pid=183394)[0m {'activation': 'Softmin', 'activation_transformers': 'Tanhshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 696, 'dropout': 0.541300256763616, 'dropout_StationIdEmbedding': 0.32440931108325116, 'dropout_timeStampEmbedding': 0.3325215479777458, 'dropout_transformers': 0.34721351770104, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 62, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.5687294813995353, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.23713305944913743, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Hardswish', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 24, 'num_layers_transformer': 1, 'lr': 3.3272871705909365e-06, 'momentum': 0.39267756241044116, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 0.016307081781112134, 'positive_function': 

[36m(eval_config pid=183394)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=183394)[0m epoch:  0 loss :  8.020785875409564 acc:  0.00033494852957595514
[36m(eval_config pid=183394)[0m epoch:  1 loss :  8.020754778496572 acc:  0.00033494852957595514
[36m(eval_config pid=183394)[0m epoch:  2 loss :  8.020906733575268 acc:  0.00033494852957595514
[36m(eval_config pid=183394)[0m CUDA is available. Using GPU.




[36m(eval_config pid=183394)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1392, 'dropout': 0.23486814482870977, 'dropout_StationIdEmbedding': 0.012869712632046393, 'dropout_timeStampEmbedding': 0.20990781763196148, 'dropout_transformers': 0.39982692079034005, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 71, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.4727041511204779, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8640403458355227, 'beta_2': 0.9594584700146656, 'eps': 2.2922850836244756e-08, 'lr': 2.8664721598807104e-05, 'optimizer': 'AdamW', 'weight_decay': 1.0190736848145976e-05, 'p



[36m(eval_config pid=183394)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1368, 'dropout': 0.21633943701146666, 'dropout_StationIdEmbedding': 0.06006319940401399, 'dropout_timeStampEmbedding': 0.24817736841872917, 'dropout_transformers': 0.21707783273911457, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 75, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.748792574455236, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.858421889895238, 'beta_2': 0.9604391015598143, 'eps': 1.7901681033341197e-08, 'lr': 2.7271004275205055e-05, 'optimizer': 'AdamW', 'weight_decay': 1.3929402018660987e-06, 'posi



[36m(eval_config pid=183394)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1392, 'dropout': 0.2132341800917258, 'dropout_StationIdEmbedding': 0.055069988690378546, 'dropout_timeStampEmbedding': 0.28081094060414435, 'dropout_transformers': 0.40518460279781665, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 75, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7504566065967552, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8643925986139109, 'beta_2': 0.9503202673300509, 'eps': 1.7039507347789476e-08, 'lr': 4.313868257479694e-06, 'optimizer': 'AdamW', 'weight_decay': 1.4980452342895956e-06, 'pos



[36m(eval_config pid=183394)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1320, 'dropout': 0.2937572585945335, 'dropout_StationIdEmbedding': 0.14329899549129785, 'dropout_timeStampEmbedding': 0.24963007890636787, 'dropout_transformers': 0.22764380063168463, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 71, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.9180716770322955, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.8502403679384819, 'beta_2': 0.9593259292545429, 'eps': 1.653602809193038e-08, 'lr': 1.999907349664514e-05, 'optimizer': 'AdamW', 'weight_decay': 8.133674275845041e-07, 'positi



[36m(eval_config pid=183394)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1392, 'dropout': 0.39893655006454704, 'dropout_StationIdEmbedding': 0.03937634339952692, 'dropout_timeStampEmbedding': 0.1959278187612626, 'dropout_transformers': 0.5318811969083641, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 65, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.626037335541467, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8733727036005741, 'beta_2': 0.9530214850132347, 'eps': 5.326440476770292e-09, 'lr': 8.43784429544044e-07, 'optimizer': 'AdamW', 'weight_decay': 6.936782733401724e-05, 'positive_



[36m(eval_config pid=183394)[0m {'activation': 'RReLU', 'activation_transformers': 'Hardswish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1272, 'dropout': 0.7045868659766934, 'dropout_StationIdEmbedding': 0.25997619658098026, 'dropout_timeStampEmbedding': 0.41481858473695277, 'dropout_transformers': 0.47035257882989645, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 69, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.8650828676885423, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8869391769498158, 'beta_2': 0.9619919102981779, 'eps': 2.86454790422147e-08, 'lr': 4.426161064719228e-06, 'optimizer': 'AdamW', 'weight_decay': 1.0093987311866204e-05, 'positive_



[36m(eval_config pid=183394)[0m {'activation': 'Hardswish', 'activation_transformers': 'PReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1320, 'dropout': 0.13591267613015862, 'dropout_StationIdEmbedding': 0.021355715096755643, 'dropout_timeStampEmbedding': 0.3141526519353306, 'dropout_transformers': 0.4009762494255571, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 80, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7497613309921367, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.8574505286970864, 'beta_2': 0.9680012490499907, 'eps': 5.882632088579272e-09, 'lr': 1.88043262679481e-05, 'optimizer': 'AdamW', 'weight_decay': 2.236058148816801e-08, 'positive_f



[36m(eval_config pid=183394)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1032, 'dropout': 0.04206424576531012, 'dropout_StationIdEmbedding': 0.1969024329519362, 'dropout_timeStampEmbedding': 0.04114825969916583, 'dropout_transformers': 0.16057233067894292, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 76, 'input_size': 2, 'learnable_pos_encoding': True, 'base_lr': 0.000444893292663368, 'max_lr': 0.0840626223586766, 'mode': 'triangular', 'scheduler': 'CyclicLR', 'step_size_up': 23, 'dropout_lstm': 0.5846808633499627, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8725586338848395, 'beta_2': 0.9789791962061831, 'eps': 3.1531773



[36m(eval_config pid=183394)[0m epoch:  0 loss :  8.052920606059413 acc:  0.0006252372552084496
[36m(eval_config pid=183394)[0m epoch:  1 loss :  7.996616926500874 acc:  0.0008262063729540227
[36m(eval_config pid=183394)[0m epoch:  2 loss :  7.9272722151971635 acc:  0.0009825156867561352
[36m(eval_config pid=183394)[0m epoch:  3 loss :  7.850446845639136 acc:  0.0014514436281624723
[36m(eval_config pid=183394)[0m epoch:  4 loss :  7.764176485615392 acc:  0.0016970725498515061
[36m(eval_config pid=183394)[0m epoch:  5 loss :  7.683235746814359 acc:  0.0032155058839291695
[36m(eval_config pid=183394)[0m epoch:  6 loss :  7.601924914698447 acc:  0.004443650492374339
[36m(eval_config pid=183394)[0m epoch:  7 loss :  7.524986870058121 acc:  0.005091217649554518
[36m(eval_config pid=183394)[0m epoch:  8 loss :  7.470863508409069 acc:  0.005001898041667597
[36m(eval_config pid=183394)[0m epoch:  9 loss :  7.422743274319556 acc:  0.005001898041667597
[36m(eval_config pid=1



[36m(eval_config pid=183394)[0m {'activation': 'ReLU6', 'activation_transformers': 'LogSigmoid', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1440, 'dropout': 0.339943726147545, 'dropout_StationIdEmbedding': 0.1603580960400004, 'dropout_timeStampEmbedding': 0.22571627244022935, 'dropout_transformers': 0.32817603885627034, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 73, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7127836068323692, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.824913797527221, 'beta_2': 0.9521636125462684, 'eps': 2.488835076286207e-08, 'lr': 4.628359076506855e-06, 'optimizer': 'AdamW', 'weight_decay': 2.1676028811482392e-07, 'positive_f



[36m(eval_config pid=183394)[0m {'activation': 'ELU', 'activation_transformers': 'SELU', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1200, 'dropout': 0.10530849752579123, 'dropout_StationIdEmbedding': 0.004648442177951087, 'dropout_timeStampEmbedding': 0.4944041311982972, 'dropout_transformers': 0.058679930965360616, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 51, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.19425452486587952, 'scheduler': 'StepLR', 'step_size': 22, 'dropout_lstm': 0.6344844716753606, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.9157157310582519, 'beta_2': 0.9607815413789651, 'eps': 7.839760337087477e-09, 'lr': 1.3483795641215889e-05, 'optimizer': 'AdamW', 'w



[36m(eval_config pid=183394)[0m {'activation': 'Softplus', 'activation_transformers': 'RReLU', 'batch_size': 64, 'concatenate_features': False, 'd_model': 1344, 'dropout': 0.44031019592152043, 'dropout_StationIdEmbedding': 0.13560210590149924, 'dropout_timeStampEmbedding': 0.18474624694175346, 'dropout_transformers': 0.25764684879082145, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 61, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 7, 'factor': 0.23634776462298565, 'patience': 3, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.024117131735883056, 'dropout_lstm': 0.9999884604343798, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.8547843145683762, 'beta_2': 0.9647424675391205, 'eps': 1.394732726



[36m(eval_config pid=183394)[0m {'activation': 'Softsign', 'activation_transformers': 'Tanhshrink', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1152, 'dropout': 0.06670883122058396, 'dropout_StationIdEmbedding': 0.23220397307259277, 'dropout_timeStampEmbedding': 0.5666722094324743, 'dropout_transformers': 0.6111331274044083, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 70, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.8860686225732174, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Hardtanh', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 2, 'lr': 3.734533104079965e-05, 'momentum': 0.4964813497282795, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 1.2190803582560416e-06, 'positive_function': 'relu', 'reg': False, 'transformers_



[36m(eval_config pid=183394)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Tanh', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1272, 'dropout': 0.5230184003523776, 'dropout_StationIdEmbedding': 0.08866550197650655, 'dropout_timeStampEmbedding': 0.13104041522686613, 'dropout_transformers': 0.0382403423304013, 'early_stopping': 3, 'encoder_only': False, 'epochs_classifcation_only': 67, 'input_size': 2, 'learnable_pos_encoding': True, 'base_lr': 0.03014888867201773, 'max_lr': 0.35456745319479893, 'mode': 'triangular2', 'scheduler': 'CyclicLR', 'step_size_up': 12, 'dropout_lstm': 0.42729860920615326, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 192, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 6, 'num_layers_transformer': 3, 'alpha': 0.9216496905883678, 'centered': False, 'eps': 5.0370463085408695e-09, 'lr': 0.1292

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-07 06:14:49,636	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-07 06:15:04,347	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-07 06:15:04,348	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_4        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_4
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_4`
[36m(eval_config pid=236102)[0m CUDA is available. Using GPU.




[36m(eval_config pid=236102)[0m {'activation': 'ReLU', 'activation_transformers': 'LeakyReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 264, 'dropout': 0.18921447977489503, 'dropout_StationIdEmbedding': 0.8783451120581629, 'dropout_timeStampEmbedding': 0.4411108462953692, 'dropout_transformers': 0.4326701753045321, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 48, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.48496243866799893, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8027430683522431, 'beta_2': 0.9689786117017483, 'eps': 4.353000287425135e-08, 'lr': 1.751970616992533e-06, 'optimizer': 'AdamW', 'weight_decay': 6.286975977639781e-07, 'positive_fun



[36m(eval_config pid=236102)[0m {'activation': 'Tanh', 'activation_transformers': 'ReLU', 'batch_size': 64, 'concatenate_features': False, 'd_model': 1224, 'dropout': 0.5831576399529182, 'dropout_StationIdEmbedding': 0.2990453422552868, 'dropout_timeStampEmbedding': 0.271409010561531, 'dropout_transformers': 0.6896369677433796, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 77, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.19222104919759714, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.9507139578736439, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.9835849021157268, 'beta_2': 0.963711335805969, 'eps': 7.199556483162474e-08, 'lr': 6.9435874884811615e-06, 'optimizer': 'AdamW', 'weight_decay': 6



[36m(eval_config pid=236102)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Hardtanh', 'batch_size': 64, 'concatenate_features': True, 'd_model': 984, 'dropout': 0.29896751712271796, 'dropout_StationIdEmbedding': 0.3931028815211049, 'dropout_timeStampEmbedding': 0.6232814553531789, 'dropout_transformers': 0.4870216677368089, 'early_stopping': 1, 'encoder_only': False, 'epochs_classifcation_only': 80, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7719569484872553, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 192, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8798492922675848, 'beta_2': 0.9811305851691509, 'eps': 1.2869855665917602e-08, 'lr': 4.774731308457301e-05, 'optimizer': 'AdamW', 'weight_decay': 0.0015817620111417982, 'posi



[36m(eval_config pid=236102)[0m {'activation': 'PReLU', 'activation_transformers': 'Mish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1416, 'dropout': 0.3709266463526089, 'dropout_StationIdEmbedding': 0.5398274661428972, 'dropout_timeStampEmbedding': 0.3955817697346448, 'dropout_transformers': 0.3637237029431211, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 63, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 3, 'factor': 0.897258964909519, 'patience': 6, 'scheduler': 'ReduceLROnPlateau', 'threshold': 8.080584549844031e-06, 'dropout_lstm': 0.7105133455623256, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Tanhshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 4, 'lr': 0.00039835427073885897, 'momentum': 0.05340950004697964, 'nesterov': True, 'optimizer': 'SGD'



[36m(eval_config pid=236102)[0m {'activation': 'Hardsigmoid', 'activation_transformers': 'Softplus', 'batch_size': 32, 'concatenate_features': True, 'd_model': 912, 'dropout': 0.1543828633140444, 'dropout_StationIdEmbedding': 0.1915331084813851, 'dropout_timeStampEmbedding': 0.21685185271604293, 'dropout_transformers': 0.15329681094697212, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 20, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.6773168859133127, 'scheduler': 'StepLR', 'step_size': 3, 'dropout_lstm': 0.3566787514886083, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 1, 'alpha': 0.9429014819244373, 'centered': False, 'eps': 5.4517615313317995e-08, 'lr': 2.3035100635708954e-06, 'momentum': 9.186930080154476e-05, 'optimize



[36m(eval_config pid=236102)[0m epoch:  0 loss :  7.259811984575712 acc:  0.05310050688877476
[36m(eval_config pid=236102)[0m epoch:  1 loss :  6.188090698535626 acc:  0.16238304713842305
[36m(eval_config pid=236102)[0m epoch:  2 loss :  5.191818420703594 acc:  0.22055244177478062
[36m(eval_config pid=236102)[0m epoch:  3 loss :  4.436965441703796 acc:  0.26385012169796573
[36m(eval_config pid=236102)[0m epoch:  4 loss :  3.928424211648794 acc:  0.30382064622736304
[36m(eval_config pid=236102)[0m epoch:  5 loss :  3.590215800358699 acc:  0.3307951678092133
[36m(eval_config pid=236102)[0m epoch:  6 loss :  3.346536452953632 acc:  0.34843579036688027
[36m(eval_config pid=236102)[0m epoch:  7 loss :  3.1650021663078896 acc:  0.362124020275551
[36m(eval_config pid=236102)[0m epoch:  8 loss :  3.040695260121272 acc:  0.3763928276354867
[36m(eval_config pid=236102)[0m epoch:  9 loss :  2.933615726691026 acc:  0.38257821048165597
[36m(eval_config pid=236102)[0m epoch:  1



[36m(eval_config pid=236102)[0m {'activation': 'Tanhshrink', 'activation_transformers': 'Sigmoid', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1248, 'dropout': 0.3378364401700178, 'dropout_StationIdEmbedding': 0.6271969873068627, 'dropout_timeStampEmbedding': 0.2435602882649876, 'dropout_transformers': 0.571146930296943, 'early_stopping': 4, 'encoder_only': False, 'epochs_classifcation_only': 67, 'input_size': 2, 'learnable_pos_encoding': True, 'base_lr': 1.0212463224330208e-05, 'max_lr': 0.20911668943063563, 'mode': 'triangular', 'scheduler': 'CyclicLR', 'step_size_up': 2, 'dropout_lstm': 0.3936608039544467, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 6, 'num_layers_transformer': 6, 'amsgrad': False, 'beta_1': 0.8447917293198515, 'beta_2': 0.9595394544281279, 'eps': 3.5366



[36m(eval_config pid=236102)[0m {'activation': 'Hardshrink', 'activation_transformers': 'Softmin', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1104, 'dropout': 0.22555105167793071, 'dropout_StationIdEmbedding': 0.057793441448606354, 'dropout_timeStampEmbedding': 0.11579851747214598, 'dropout_transformers': 0.19413105209070972, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 53, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.5147370506522114, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 24, 'num_layers_transformer': 3, 'alpha': 0.9787207652151018, 'centered': True, 'eps': 2.5263767063959107e-07, 'lr': 3.394482902763441e-05, 'momentum': 0.33355430267137515, 'optimizer': 'RMSprop', 'weight_decay': 1.036799336866004e-06

[36m(eval_config pid=236102)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=236102)[0m epoch:  0 loss :  7.015030369391808 acc:  0.1607083044905433
[36m(eval_config pid=236102)[0m epoch:  1 loss :  5.287693698589618 acc:  0.27023647366188064
[36m(eval_config pid=236102)[0m epoch:  2 loss :  4.328094405394334 acc:  0.3299019717303441
[36m(eval_config pid=236102)[0m epoch:  3 loss :  3.7455423538501447 acc:  0.3635977938056852
[36m(eval_config pid=236102)[0m epoch:  4 loss :  3.3674745853130634 acc:  0.38514614920840495
[36m(eval_config pid=236102)[0m epoch:  5 loss :  3.1079805465844963 acc:  0.40035281245115334
[36m(eval_config pid=236102)[0m epoch:  6 loss :  2.923987399614774 acc:  0.4103789384364603
[36m(eval_config pid=236102)[0m epoch:  7 loss :  2.7815703226969792 acc:  0.4176361565772726
[36m(eval_config pid=236102)[0m epoch:  8 loss :  2.6703746758974516 acc:  0.4207846727552866
[36m(eval_config pid=236102)[0m epoch:  9 loss :  2.5818268262423003 acc:  0.4249603644240002
[36m(eval_config pid=236102)[0m epoch: 



[36m(eval_config pid=236102)[0m {'activation': 'Softshrink', 'activation_transformers': 'ReLU6', 'batch_size': 16, 'concatenate_features': True, 'd_model': 1176, 'dropout': 0.5070853730140505, 'dropout_StationIdEmbedding': 0.16587801342984249, 'dropout_timeStampEmbedding': 0.29205325348036293, 'dropout_transformers': 0.12497726841988827, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 78, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.7103881081913198, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.456481359098961, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': True, 'beta_1': 0.935144399029113, 'beta_2': 0.9656618270983361, 'eps': 4.8574369406385944e-08, 'lr': 0.00010406861423571612, 'optimizer': 'AdamW', 'weight_de



[36m(eval_config pid=236102)[0m {'activation': 'Mish', 'activation_transformers': 'Hardswish', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1344, 'dropout': 0.19405620811699947, 'dropout_StationIdEmbedding': 0.12029553523734873, 'dropout_timeStampEmbedding': 0.33094222517985805, 'dropout_transformers': 0.7549187860890786, 'early_stopping': 1, 'encoder_only': False, 'epochs_classifcation_only': 42, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'dropout_lstm': 0.6479502864819442, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'SELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 4, 'amsgrad': False, 'beta_1': 0.9006984236608723, 'beta_2': 0.9742574372367876, 'eps': 7.950082241770404e-08, 'lr': 7.348729687445295e-06, 'optimizer': 'AdamW', 'weight_decay': 2.493938682682645e-05, 'positive



[36m(eval_config pid=236102)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'Tanhshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1056, 'dropout': 0.6293800358023907, 'dropout_StationIdEmbedding': 0.024550087264256136, 'dropout_timeStampEmbedding': 0.5318561847851061, 'dropout_transformers': 0.8262808500245482, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 65, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.2041077143474148, 'scheduler': 'StepLR', 'step_size': 23, 'dropout_lstm': 0.10470707377267989, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 12, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.830053753345513, 'beta_2': 0.9540680567416152, 'eps': 1.3428144563166173e-07, 'lr': 6.600719555550121e-07, 'optimizer': '



[36m(eval_config pid=236102)[0m epoch:  0 loss :  8.021060694181003 acc:  0.0006252372552084496
[36m(eval_config pid=236102)[0m epoch:  1 loss :  8.020760345458985 acc:  0.0006922269611236406
[36m(eval_config pid=236102)[0m epoch:  2 loss :  8.020595594552846 acc:  0.000781546569010562
[36m(eval_config pid=236102)[0m epoch:  3 loss :  8.020387854942909 acc:  0.0008931960788692137
[36m(eval_config pid=236102)[0m epoch:  4 loss :  8.020163858853854 acc:  0.0009378558828126744
[36m(eval_config pid=236102)[0m epoch:  5 loss :  8.020004573235145 acc:  0.0010048455887278656
[36m(eval_config pid=236102)[0m epoch:  6 loss :  8.019789006159856 acc:  0.0011388250005582475
[36m(eval_config pid=236102)[0m epoch:  7 loss :  8.019576835632325 acc:  0.0012281446084451688
[36m(eval_config pid=236102)[0m epoch:  8 loss :  8.019430586007925 acc:  0.0012728044123886295
[36m(eval_config pid=236102)[0m epoch:  9 loss :  8.019220586923453 acc:  0.0012728044123886295
[36m(eval_config pid



[36m(eval_config pid=236102)[0m {'activation': 'Softsign', 'activation_transformers': 'CELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 528, 'dropout': 0.794232607505785, 'dropout_StationIdEmbedding': 0.4210913947066786, 'dropout_timeStampEmbedding': 0.17741066482794376, 'dropout_transformers': 0.22589320678744376, 'early_stopping': 9, 'encoder_only': True, 'epochs_classifcation_only': 46, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.8633564207247018, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 6, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 4, 'alpha': 0.9001533125079709, 'centered': True, 'eps': 2.2772687555266254e-06, 'lr': 0.00029935152808090805, 'momentum': 0.19280486949470169, 'optimizer': 'RMSprop', 'weight_decay': 1.9016187326978194e-05, 'positiv



[36m(eval_config pid=236102)[0m {'activation': 'Hardswish', 'activation_transformers': 'PReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 960, 'dropout': 0.0709098155711059, 'dropout_StationIdEmbedding': 0.30945856059094634, 'dropout_timeStampEmbedding': 0.9294054882340348, 'dropout_transformers': 0.012087171029021349, 'early_stopping': 6, 'encoder_only': False, 'epochs_classifcation_only': 76, 'input_size': 2, 'learnable_pos_encoding': False, 'base_lr': 1.5884807139281368e-05, 'max_lr': 0.07091185906973357, 'mode': 'triangular', 'scheduler': 'CyclicLR', 'step_size_up': 5, 'dropout_lstm': 0.3281658900891189, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 24, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.839472343702114, 'beta_2': 0.9517423218372957, 'eps': 1.992314286



[36m(eval_config pid=236102)[0m {'activation': 'ELU', 'activation_transformers': 'LogSigmoid', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1440, 'dropout': 0.30684583624769574, 'dropout_StationIdEmbedding': 0.2655651355688701, 'dropout_timeStampEmbedding': 0.13842146147581633, 'dropout_transformers': 0.42152062585316596, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 12, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.28580213789945247, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'ELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.9228975514913261, 'beta_2': 0.9580603442310848, 'eps': 7.153323289813479e-09, 'lr': 0.003161554982354393, 'optimizer': 'AdamW', 'weight_decay': 1.5957521151853145e-05, 'positive



[36m(eval_config pid=236102)[0m epoch:  0 loss :  8.03023234535666 acc:  0.0913962887702923
[36m(eval_config pid=236102)[0m epoch:  1 loss :  7.705863131194556 acc:  0.14230846526583749
[36m(eval_config pid=236102)[0m epoch:  2 loss :  7.6544465938536055 acc:  0.14672978585624008
[36m(eval_config pid=236102)[0m epoch:  3 loss :  7.654393504647648 acc:  0.14789094075877007
[36m(eval_config pid=236102)[0m epoch:  4 loss :  7.6493695523558545 acc:  0.14827054909228948
[36m(eval_config pid=236102)[0m epoch:  5 loss :  7.654211388916528 acc:  0.14844918830806333
[36m(eval_config pid=236102)[0m epoch:  6 loss :  7.647995111321201 acc:  0.1483598687001764
[36m(eval_config pid=236102)[0m epoch:  7 loss :  7.636520105249741 acc:  0.148225889288346
[36m(eval_config pid=236102)[0m epoch:  8 loss :  7.645323436801173 acc:  0.14831520889623295
[36m(eval_config pid=236102)[0m epoch:  9 loss :  7.646690697229209 acc:  0.1483375387982047
[36m(eval_config pid=236102)[0m epoch:  10

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-07 09:24:55,526	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-07 09:25:10,292	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-07 09:25:10,293	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_5        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_5
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_5`
[36m(eval_config pid=282961)[0m CUDA is available. Using GPU.




[36m(eval_config pid=282961)[0m {'activation': 'Softplus', 'activation_transformers': 'RReLU', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1008, 'dropout': 0.16821195820533483, 'dropout_StationIdEmbedding': 0.14841863350246676, 'dropout_timeStampEmbedding': 0.3632658260507174, 'dropout_transformers': 0.5511763209829913, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 73, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.6676595277172814, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 24, 'num_layers_transformer': 2, 'amsgrad': True, 'beta_1': 0.9093179418325481, 'beta_2': 0.9564538859282321, 'eps': 4.289354539422935e-07, 'lr': 6.059474741974039e-05, 'optimizer': 'AdamW', 'weight_decay': 1.0038778269415936e-07, 'positive_



[36m(eval_config pid=282961)[0m {'activation': 'Softsign', 'activation_transformers': 'ReLU', 'batch_size': 64, 'concatenate_features': False, 'd_model': 1368, 'dropout': 0.5624398422114418, 'dropout_StationIdEmbedding': 0.35466211127264585, 'dropout_timeStampEmbedding': 0.48452930264041183, 'dropout_transformers': 0.18011858070552153, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 44, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 8, 'factor': 0.43227729393732117, 'patience': 6, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.027386100625346726, 'lstm_model': False, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'alpha': 0.9570807031054822, 'centered': False, 'eps': 7.349415435916268e-09, 'lr': 1.4219526344956858e-06, 'momentum': 0.44916068013737803, 'optimizer': 'RMSprop', 'weight_decay': 0.0015297187478221102, 'positive_function': 'exp', 'reg': False, 'transfo



[36m(eval_config pid=282961)[0m {'activation': 'SELU', 'activation_transformers': 'Tanhshrink', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1176, 'dropout': 0.2572423849012085, 'dropout_StationIdEmbedding': 0.7291587657077927, 'dropout_timeStampEmbedding': 0.653891060993218, 'dropout_transformers': 0.09311329840059308, 'early_stopping': 1, 'encoder_only': False, 'epochs_classifcation_only': 48, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.5125915866913783, 'scheduler': 'StepLR', 'step_size': 9, 'dropout_lstm': 0.9790457332190934, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 12, 'num_layers_transformer': 5, 'lr': 0.0025721432851743384, 'momentum': 0.16851136418441132, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 2.44946659037716e-09, 'positive_function



[36m(eval_config pid=282961)[0m CUDA is available. Using GPU.
[36m(eval_config pid=282961)[0m {'activation': 'Tanh', 'activation_transformers': 'Tanh', 'batch_size': 128, 'concatenate_features': True, 'd_model': 768, 'dropout': 0.14314346166955466, 'dropout_StationIdEmbedding': 0.17727136502200258, 'dropout_timeStampEmbedding': 0.10401683259798905, 'dropout_transformers': 0.4469283567262584, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 55, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7974220468877724, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.86529213639288, 'beta_2': 0.9659153731645884, 'eps': 2.6910702033406116e-07, 'lr': 0.00011208769197689375, 'optimizer': 



[36m(eval_config pid=282961)[0m CUDA is available. Using GPU.
[36m(eval_config pid=282961)[0m {'activation': 'ReLU', 'activation_transformers': 'Hardswish', 'batch_size': 128, 'concatenate_features': True, 'd_model': 336, 'dropout': 0.09855337001364473, 'dropout_StationIdEmbedding': 0.5748062449705604, 'dropout_timeStampEmbedding': 0.0894199242841272, 'dropout_transformers': 0.605230251754783, 'early_stopping': 6, 'encoder_only': True, 'epochs_classifcation_only': 36, 'input_size': 2, 'learnable_pos_encoding': True, 'T_max': 8, 'eta_min': 0.03614893051563325, 'scheduler': 'CosineAnnealingLR', 'dropout_lstm': 0.9173466203406977, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'GELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.8290402123059228, 'beta_2': 0.9775379365381849, 'eps': 1.3



[36m(eval_config pid=282961)[0m {'activation': 'Tanh', 'activation_transformers': 'Tanh', 'batch_size': 32, 'concatenate_features': True, 'd_model': 768, 'dropout': 0.04103687552901361, 'dropout_StationIdEmbedding': 0.4395743172978708, 'dropout_timeStampEmbedding': 0.036792508767713644, 'dropout_transformers': 0.6496266306883016, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 56, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.806981315373564, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.8150818212122708, 'beta_2': 0.9731752652478046, 'eps': 1.1805766476111629e-07, 'lr': 0.00021509996998583062, 'optimizer': 'AdamW', 'weight_decay': 0.025623470556296005, 'positive_functio



[36m(eval_config pid=282961)[0m CUDA is available. Using GPU.
[36m(eval_config pid=282961)[0m {'activation': 'Tanh', 'activation_transformers': 'Tanh', 'batch_size': 16, 'concatenate_features': False, 'd_model': 288, 'dropout': 0.1397743165799917, 'dropout_StationIdEmbedding': 0.18181095374153725, 'dropout_timeStampEmbedding': 0.8256580178311448, 'dropout_transformers': 0.7200530998825372, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 54, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.5555528912478276, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 3, 'num_layers_transformer': 1, 'amsgrad': True, 'beta_1': 0.8632594503835318, 'beta_2': 0.9701106869242241, 'eps': 1.1557901385771028e-06, 'lr': 0.000459284499588945, 'optimizer':



[36m(eval_config pid=282961)[0m {'activation': 'Tanh', 'activation_transformers': 'Tanh', 'batch_size': 64, 'concatenate_features': True, 'd_model': 888, 'dropout': 0.23038104592408168, 'dropout_StationIdEmbedding': 0.37824620541798815, 'dropout_timeStampEmbedding': 0.556171862073551, 'dropout_transformers': 0.6258576086909705, 'early_stopping': 7, 'encoder_only': False, 'epochs_classifcation_only': 59, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.12889488100863017, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.6925845555252007, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 1, 'alpha': 0.9380614590332889, 'centered': True, 'eps': 2.7652696346231813e-08, 'lr': 0.004579984015833657, 'momentum': 0.08043679108743235, 'optimizer': 'RMSprop', 'weight_deca



[36m(eval_config pid=282961)[0m CUDA is available. Using GPU.
[36m(eval_config pid=282961)[0m {'activation': 'Softsign', 'activation_transformers': 'LeakyReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 432, 'dropout': 0.3447829323125935, 'dropout_StationIdEmbedding': 0.23056574861372098, 'dropout_timeStampEmbedding': 0.39104866879312883, 'dropout_transformers': 0.4656398232078552, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 63, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7793811260972364, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 6, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 1, 'lr': 1.9836763426450555e-06, 'momentum': 0.410560820313398, 'nesterov': True, 'optimizer': 'SGD', 'weight_decay': 0.000352682807235384, 'pos



[36m(eval_config pid=282961)[0m CUDA is available. Using GPU.
[36m(eval_config pid=282961)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Mish', 'batch_size': 128, 'concatenate_features': True, 'd_model': 72, 'dropout': 0.2081011915750497, 'dropout_StationIdEmbedding': 0.10584970923744956, 'dropout_timeStampEmbedding': 0.23603122081229674, 'dropout_transformers': 0.6769297756991051, 'early_stopping': 6, 'encoder_only': True, 'epochs_classifcation_only': 33, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 9, 'factor': 0.8002633200469347, 'patience': 9, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.0024560988206451776, 'dropout_lstm': 0.7493994813432109, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 192, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 12, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8410972



[36m(eval_config pid=282961)[0m epoch:  0 loss :  5.732600860253066 acc:  0.22524172118884397
[36m(eval_config pid=282961)[0m epoch:  1 loss :  3.6409438955569695 acc:  0.2976575932831655
[36m(eval_config pid=282961)[0m epoch:  2 loss :  3.1208380467877417 acc:  0.33186700310385636
[36m(eval_config pid=282961)[0m epoch:  3 loss :  2.83414418540315 acc:  0.3586628854699328
[36m(eval_config pid=282961)[0m epoch:  4 loss :  2.636648019630752 acc:  0.36761717616059664
[36m(eval_config pid=282961)[0m epoch:  5 loss :  2.4689917585806933 acc:  0.37829086930308375
[36m(eval_config pid=282961)[0m epoch:  6 loss :  2.332380659565954 acc:  0.3740928477323985
[36m(eval_config pid=282961)[0m epoch:  7 loss :  2.2372386284217147 acc:  0.3795860036174441
[36m(eval_config pid=282961)[0m epoch:  8 loss :  2.0898773427494985 acc:  0.38659759283656747
[36m(eval_config pid=282961)[0m epoch:  9 loss :  2.003453396037667 acc:  0.38161802469687156
[36m(eval_config pid=282961)[0m epoch:

[36m(eval_config pid=282961)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=282961)[0m epoch:  0 loss :  7.586122708571585 acc:  0.008172744121653306
[36m(eval_config pid=282961)[0m epoch:  1 loss :  7.243910222304494 acc:  0.011097961279949982
[36m(eval_config pid=282961)[0m epoch:  2 loss :  6.962121827978837 acc:  0.08255364758948708
[36m(eval_config pid=282961)[0m epoch:  3 loss :  6.44713884654798 acc:  0.14748900252327893
[36m(eval_config pid=282961)[0m epoch:  4 loss :  5.828361671849301 acc:  0.18623138244423107
[36m(eval_config pid=282961)[0m epoch:  5 loss :  5.4065143685591845 acc:  0.21682334814550164
[36m(eval_config pid=282961)[0m epoch:  6 loss :  5.085841653221532 acc:  0.229462072661501
[36m(eval_config pid=282961)[0m epoch:  7 loss :  4.892646741867066 acc:  0.2512337270839381
[36m(eval_config pid=282961)[0m epoch:  8 loss :  4.697303681624563 acc:  0.27369760846749885
[36m(eval_config pid=282961)[0m epoch:  9 loss :  4.612124832052934 acc:  0.27157626778018445
[36m(eval_config pid=282961)[0m epoch: 



[36m(eval_config pid=282961)[0m {'activation': 'Hardtanh', 'activation_transformers': 'Hardsigmoid', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1296, 'dropout': 0.0850547714230167, 'dropout_StationIdEmbedding': 0.2826457984476207, 'dropout_timeStampEmbedding': 0.4324662733899777, 'dropout_transformers': 0.24037013476907143, 'early_stopping': 7, 'encoder_only': False, 'epochs_classifcation_only': 52, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.8371274296683918, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8587365476729337, 'beta_2': 0.9607861888411813, 'eps': 1.0703283003693741e-08, 'lr': 3.063295314996003e-07, 'optimizer': 'AdamW', 'weight_decay': 3.735596068166369e-08, 'pos



[36m(eval_config pid=282961)[0m {'activation': 'Hardsigmoid', 'activation_transformers': 'Softshrink', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1416, 'dropout': 0.057200208245928724, 'dropout_StationIdEmbedding': 0.07242704379271506, 'dropout_timeStampEmbedding': 0.06121609413872914, 'dropout_transformers': 0.4132641473989971, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 70, 'input_size': 2, 'learnable_pos_encoding': True, 'T_max': 25, 'eta_min': 0.0027280909463625025, 'scheduler': 'CosineAnnealingLR', 'dropout_lstm': 0.4519857731800705, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 24, 'num_layers_transformer': 2, 'alpha': 0.9912484729167507, 'centered': False, 'eps': 6.198526123708583e-07, 'lr': 5.636483529357195e-05, 'momentum': 0.346409223583691,



[36m(eval_config pid=282961)[0m {'activation': 'GELU', 'activation_transformers': 'Hardshrink', 'batch_size': 64, 'concatenate_features': True, 'd_model': 672, 'dropout': 0.11608960109022144, 'dropout_StationIdEmbedding': 0.3313240807004375, 'dropout_timeStampEmbedding': 0.01888233950632834, 'dropout_transformers': 0.9462678346226849, 'early_stopping': 8, 'encoder_only': True, 'epochs_classifcation_only': 67, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.9111685055502249, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 1, 'lr': 0.10625506248268199, 'momentum': 0.33939904054899034, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 2.8476802917942196e-06, 'positive_function': 'sig', 'reg': False, 'transformers_model': True



[36m(eval_config pid=282961)[0m CUDA is available. Using GPU.
[36m(eval_config pid=282961)[0m {'activation': 'Tanhshrink', 'activation_transformers': 'Softmin', 'batch_size': 128, 'concatenate_features': True, 'd_model': 864, 'dropout': 0.14917661543852637, 'dropout_StationIdEmbedding': 0.8238426599593924, 'dropout_timeStampEmbedding': 0.19787889535725717, 'dropout_transformers': 0.3308422612571747, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 64, 'input_size': 2, 'learnable_pos_encoding': True, 'base_lr': 0.00036922382080356434, 'max_lr': 0.10734852933029443, 'mode': 'triangular2', 'scheduler': 'CyclicLR', 'step_size_up': 25, 'dropout_lstm': 0.49918763640664365, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1'



[36m(eval_config pid=282961)[0m {'activation': 'Mish', 'activation_transformers': 'Sigmoid', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1344, 'dropout': 0.28831747197292956, 'dropout_StationIdEmbedding': 0.6773714722636933, 'dropout_timeStampEmbedding': 0.16480278848759067, 'dropout_transformers': 0.28781813764833997, 'early_stopping': 10, 'encoder_only': True, 'epochs_classifcation_only': 49, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.6932986004019901, 'lstm_layer_with_layer_norm': True, 'activation_lstm': 'Sigmoid', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8909514190846709, 'beta_2': 0.9696918477138865, 'eps': 2.4547074716140227e-06, 'lr': 1.0282087948458245e-05, 'optimizer': 'AdamW', 'weight_decay': 1.687709251382846e-08, 'po



[36m(eval_config pid=282961)[0m epoch:  0 loss :  8.277191999579678 acc:  0.0013174642163320902
[36m(eval_config pid=282961)[0m epoch:  1 loss :  8.07288722831662 acc:  0.0022999799030882255
[36m(eval_config pid=282961)[0m epoch:  2 loss :  7.8712977802052215 acc:  0.0038630730411093497
[36m(eval_config pid=282961)[0m epoch:  3 loss :  7.714090395374458 acc:  0.004934908335752406
[36m(eval_config pid=282961)[0m epoch:  4 loss :  7.596345248342562 acc:  0.004823258825893754
[36m(eval_config pid=282961)[0m epoch:  5 loss :  7.495501922960041 acc:  0.005582475492932586
[36m(eval_config pid=282961)[0m epoch:  6 loss :  7.495824833877948 acc:  0.0056271352968760464
[36m(eval_config pid=282961)[0m epoch:  7 loss :  7.431319561325202 acc:  0.006810620101377755
[36m(eval_config pid=282961)[0m epoch:  8 loss :  7.412263994457341 acc:  0.007636826474331778
[36m(eval_config pid=282961)[0m epoch:  9 loss :  7.4263102066617055 acc:  0.00908827010249425
[36m(eval_config pid=2829

  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-07 12:15:48,311	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-07 12:16:03,790	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-07 12:16:03,792	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_6        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_6
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_6`
[36m(eval_config pid=325129)[0m CUDA is available. Using GPU.




[36m(eval_config pid=325129)[0m {'activation': 'Softsign', 'activation_transformers': 'Hardswish', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1080, 'dropout': 0.6982788545664582, 'dropout_StationIdEmbedding': 0.621152124953612, 'dropout_timeStampEmbedding': 0.1175394105594772, 'dropout_transformers': 0.8938253628636652, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 54, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 0, 'factor': 0.5839932175187988, 'patience': 4, 'scheduler': 'ReduceLROnPlateau', 'threshold': 2.6073916866169487e-05, 'dropout_lstm': 0.7285644466682207, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 72, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 6, 'alpha': 0.976122219792859, 'centered': False, 'eps': 2.0314563783269275e-06, 'lr': 0.00074948510



[36m(eval_config pid=325129)[0m {'activation': 'Softmin', 'activation_transformers': 'PReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1032, 'dropout': 0.17382228778505543, 'dropout_StationIdEmbedding': 0.0012113331169516517, 'dropout_timeStampEmbedding': 0.28275818844789413, 'dropout_transformers': 0.4712260195060145, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 57, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.21555833725608312, 'scheduler': 'StepLR', 'step_size': 27, 'dropout_lstm': 0.4022500782633984, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 192, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 2, 'lr': 8.330030652151283e-06, 'momentum': 0.4577617790866179, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 5.823050087710426e-08, 'positive_funct

[36m(eval_config pid=325129)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=325129)[0m epoch:  0 loss :  8.020934224752855 acc:  0.00017863921577384274
[36m(eval_config pid=325129)[0m epoch:  1 loss :  8.0206249696422 acc:  0.00017863921577384274
[36m(eval_config pid=325129)[0m epoch:  2 loss :  8.020755283495518 acc:  0.00017863921577384274
[36m(eval_config pid=325129)[0m epoch:  3 loss :  8.02079319579439 acc:  0.00017863921577384274
[36m(eval_config pid=325129)[0m CUDA is available. Using GPU.
[36m(eval_config pid=325129)[0m GCNConv
[36m(eval_config pid=325129)[0m {'activation': 'LeakyReLU', 'activation_transformers': 'CELU', 'batch_size': 64, 'concatenate_features': False, 'd_model': 1248, 'dropout': 0.41857592377860064, 'dropout_StationIdEmbedding': 0.9992517453967049, 'dropout_timeStampEmbedding': 0.3326713947103078, 'dropout_transformers': 0.2615123560582785, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 41, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'lstm_model': Fal



[36m(eval_config pid=325129)[0m {'activation': 'RReLU', 'activation_transformers': 'GELU', 'batch_size': 128, 'concatenate_features': True, 'd_model': 600, 'dropout': 0.7737635793072686, 'dropout_StationIdEmbedding': 0.17519000252911243, 'dropout_timeStampEmbedding': 0.4092491115886116, 'dropout_transformers': 0.21433876172616176, 'early_stopping': 3, 'encoder_only': False, 'epochs_classifcation_only': 62, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.8529788182319261, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.8511654164329339, 'beta_2': 0.9709067987954072, 'eps': 3.1628252883785764e-08, 'lr': 9.136265427713556e-07, 'optimizer': 'Adam', 'weight_decay': 3.602206475017811e-07, 'positive_functi



[36m(eval_config pid=325129)[0m {'activation': 'ReLU6', 'activation_transformers': 'Hardswish', 'batch_size': 32, 'concatenate_features': True, 'd_model': 936, 'dropout': 0.9981029205156808, 'dropout_StationIdEmbedding': 0.2033947301535048, 'dropout_timeStampEmbedding': 0.45583779151986803, 'dropout_transformers': 0.7113105692359017, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 65, 'input_size': 2, 'learnable_pos_encoding': True, 'T_max': 17, 'eta_min': 0.00021331642528687843, 'scheduler': 'CosineAnnealingLR', 'dropout_lstm': 0.8131903964889645, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'LogSigmoid', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8716061299135273, 'beta_2': 0.962573402846431, 'eps': 4.079150547378938e-09, 'lr': 1.622441544049761e-05, 'opti



[36m(eval_config pid=325129)[0m epoch:  0 loss :  8.112470403630683 acc:  0.00037960833351941584
[36m(eval_config pid=325129)[0m epoch:  1 loss :  7.3238808550733205 acc:  0.036531719625750844
[36m(eval_config pid=325129)[0m epoch:  2 loss :  6.431374367247236 acc:  0.14916374517115868
[36m(eval_config pid=325129)[0m epoch:  3 loss :  4.973196557227602 acc:  0.22758636089587567
[36m(eval_config pid=325129)[0m epoch:  4 loss :  4.123810671745463 acc:  0.2691423084652658
[36m(eval_config pid=325129)[0m epoch:  5 loss :  3.855647508134233 acc:  0.2763995266060782
[36m(eval_config pid=325129)[0m epoch:  6 loss :  3.7233513010309096 acc:  0.27488109327200055
[36m(eval_config pid=325129)[0m epoch:  7 loss :  3.7441188223818513 acc:  0.28041890896098964
[36m(eval_config pid=325129)[0m epoch:  8 loss :  3.7895849714887904 acc:  0.27374226827144227
[36m(eval_config pid=325129)[0m epoch:  9 loss :  3.821441650390625 acc:  0.2801509501373289
[36m(eval_config pid=325129)[0m e



[36m(eval_config pid=325129)[0m CUDA is available. Using GPU.
[36m(eval_config pid=325129)[0m {'activation': 'Softsign', 'activation_transformers': 'ELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 96, 'dropout': 0.4916265034111731, 'dropout_StationIdEmbedding': 0.5093633260296117, 'dropout_timeStampEmbedding': 0.23955233203332024, 'dropout_transformers': 0.7369323958193752, 'early_stopping': 6, 'encoder_only': True, 'epochs_classifcation_only': 68, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.7999477690446972, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 5, 'alpha': 0.9190346505446495, 'centered': True, 'eps': 2.8605143217570725e-08, 'lr': 1.3991877761774403e-05, 'momentum': 0.14893249973402697, 'optimizer



[36m(eval_config pid=325129)[0m {'activation': 'ELU', 'activation_transformers': 'LogSigmoid', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1128, 'dropout': 0.869414927497665, 'dropout_StationIdEmbedding': 0.23601656858465375, 'dropout_timeStampEmbedding': 0.0032633030339148383, 'dropout_transformers': 0.05808652889000432, 'early_stopping': 2, 'encoder_only': False, 'epochs_classifcation_only': 24, 'input_size': 2, 'learnable_pos_encoding': False, 'gamma': 0.5933965392884422, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.6726020993135349, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 132, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.8009660756300333, 'beta_2': 0.985718006037852, 'eps': 2.5686189920203655e-09, 'lr': 6.894282909620469e-05, 'optimizer': 'AdamW', 'weight_



[36m(eval_config pid=325129)[0m {'activation': 'CELU', 'activation_transformers': 'SELU', 'batch_size': 16, 'concatenate_features': True, 'd_model': 1368, 'dropout': 0.6402435004726911, 'dropout_StationIdEmbedding': 0.26657991713671453, 'dropout_timeStampEmbedding': 0.2171513140168797, 'dropout_transformers': 0.13938663616720098, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 79, 'input_size': 2, 'learnable_pos_encoding': True, 'cooldown': 4, 'factor': 0.12105520029752503, 'patience': 10, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.00036144919635293046, 'dropout_lstm': 0.8865671310543172, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 96, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': True, 'beta_1': 0.9416612409919667, 'beta_2': 0.9646237783716565, 'eps': 2.1929131358148982



[36m(eval_config pid=325129)[0m {'activation': 'Softplus', 'activation_transformers': 'Hardswish', 'batch_size': 128, 'concatenate_features': True, 'd_model': 840, 'dropout': 0.07991189862618484, 'dropout_StationIdEmbedding': 0.3012024150538483, 'dropout_timeStampEmbedding': 0.365765290576924, 'dropout_transformers': 0.3641449571799821, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 47, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.5149354217727644, 'scheduler': 'StepLR', 'step_size': 14, 'dropout_lstm': 0.6530071716302206, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'Hardshrink', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 5, 'max_len': 100, 'nb_batchs': 60, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8958303830087536, 'beta_2': 0.9567207186565265, 'eps': 2.4698628288045266e-07, 'lr': 5.945264842115342e-06, 'optimizer': '



[36m(eval_config pid=325129)[0m {'activation': 'Softsign', 'activation_transformers': 'RReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1200, 'dropout': 0.24879735946171516, 'dropout_StationIdEmbedding': 0.8680940493691933, 'dropout_timeStampEmbedding': 0.18624064977254484, 'dropout_transformers': 0.6641887724303077, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 72, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.5716689527608813, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 84, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 3, 'num_layers_transformer': 4, 'amsgrad': False, 'beta_1': 0.818161808879219, 'beta_2': 0.9665781557047406, 'eps': 4.982400883965902e-08, 'lr': 0.0004017445004897519, 'optimizer': 'Adam', 'weight_decay': 8.868956217206096e-07, 'positive_f



[36m(eval_config pid=325129)[0m {'activation': 'ReLU', 'activation_transformers': 'ReLU', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1416, 'dropout': 0.22228639024716934, 'dropout_StationIdEmbedding': 0.10971197929876407, 'dropout_timeStampEmbedding': 0.3222707154597041, 'dropout_transformers': 0.39271621728371553, 'early_stopping': 1, 'encoder_only': False, 'epochs_classifcation_only': 52, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'dropout_lstm': 0.7276233255277926, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 12, 'num_layers_transformer': 1, 'lr': 0.033617422096279076, 'momentum': 0.19225390445007326, 'nesterov': True, 'optimizer': 'SGD', 'weight_decay': 8.263987570378271e-07, 'positive_function': 'abs', 'reg': False, 'transformers_model': True, '



[36m(eval_config pid=325129)[0m {'activation': 'Sigmoid', 'activation_transformers': 'Hardtanh', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1296, 'dropout': 0.11903135394656124, 'dropout_StationIdEmbedding': 0.0878556598123226, 'dropout_timeStampEmbedding': 0.6977913756529432, 'dropout_transformers': 0.4441800721114561, 'early_stopping': 9, 'encoder_only': True, 'epochs_classifcation_only': 74, 'input_size': 2, 'learnable_pos_encoding': True, 'base_lr': 0.0023825539308145183, 'max_lr': 0.2417778788122593, 'mode': 'triangular', 'scheduler': 'CyclicLR', 'step_size_up': 8, 'dropout_lstm': 0.3510214778367106, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 4, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 24, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8670226986493064, 'beta_2': 0.971778584754757, 'eps': 1.7233729079



[36m(eval_config pid=325129)[0m {'activation': 'PReLU', 'activation_transformers': 'LeakyReLU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1152, 'dropout': 0.00020035101392942845, 'dropout_StationIdEmbedding': 0.03822377555890902, 'dropout_timeStampEmbedding': 0.9945591160555007, 'dropout_transformers': 0.11062839502517029, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 34, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.23974620679000014, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.889898900589533, 'beta_2': 0.9614086272391106, 'eps': 1.9514414505961705e-06, 'lr': 3.386664318693659e-06, 'optimizer': 'AdamW', 'weight_decay': 3.602725746465123e-05, 'posit



[36m(eval_config pid=325129)[0m CUDA is available. Using GPU.
[36m(eval_config pid=325129)[0m {'activation': 'Hardtanh', 'activation_transformers': 'Softplus', 'batch_size': 64, 'concatenate_features': True, 'd_model': 792, 'dropout': 0.554402115029295, 'dropout_StationIdEmbedding': 0.9708884108804527, 'dropout_timeStampEmbedding': 0.5518884414527024, 'dropout_transformers': 0.15775173013744181, 'early_stopping': 2, 'encoder_only': True, 'epochs_classifcation_only': 29, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.7436442480648722, 'scheduler': 'ExponentialLR', 'dropout_lstm': 0.5984912469305215, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'RReLU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': True, 'beta_1': 0.8556593759718351, 'beta_2': 0.9500484337849381, 'eps': 6.627517235019971e-0



[36m(eval_config pid=325129)[0m {'activation': 'SiLU', 'activation_transformers': 'Hardswish', 'batch_size': 32, 'concatenate_features': True, 'd_model': 1008, 'dropout': 0.2801632291540205, 'dropout_StationIdEmbedding': 0.4591933754349636, 'dropout_timeStampEmbedding': 0.2643943131049443, 'dropout_transformers': 0.5806967525231004, 'early_stopping': 6, 'encoder_only': False, 'epochs_classifcation_only': 60, 'input_size': 2, 'learnable_pos_encoding': False, 'cooldown': 1, 'factor': 0.36347237537102384, 'patience': 7, 'scheduler': 'ReduceLROnPlateau', 'threshold': 0.08994590584201007, 'dropout_lstm': 0.41775579499746784, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 108, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 2, 'alpha': 0.9588217578087365, 'centered': False, 'eps': 2.206698910102262e-09, 'lr': 0.0456997212452



[36m(eval_config pid=325129)[0m {'activation': 'Tanhshrink', 'activation_transformers': 'Mish', 'batch_size': 64, 'concatenate_features': True, 'd_model': 1368, 'dropout': 0.31169826214676205, 'dropout_StationIdEmbedding': 0.13160859652748813, 'dropout_timeStampEmbedding': 0.29653531065964284, 'dropout_transformers': 0.27556631911990104, 'early_stopping': 1, 'encoder_only': True, 'epochs_classifcation_only': 77, 'input_size': 2, 'learnable_pos_encoding': True, 'gamma': 0.0011398799035657325, 'scheduler': 'StepLR', 'step_size': 7, 'dropout_lstm': 0.6329342251839823, 'lstm_layer_with_layer_norm': True, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 180, 'nb_of_pos_ids': 3043, 'normalize_features': 'after', 'num_heads': 3, 'num_layers_transformer': 2, 'lr': 0.0017645426016499638, 'momentum': 0.4264976557791812, 'nesterov': False, 'optimizer': 'SGD', 'weight_decay': 1.178830112551214e-08, 'positive_func




1
loading already preprocessed data: 
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100-without-repeated-elements_3/vocab


  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
2024-03-07 14:02:26,262	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-07 14:02:40,934	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-03-07 14:02:40,935	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------+
| Configuration for experiment     xp_num_7        |
+--------------------------------------------------+
| Search algorithm                 SearchGenerator |
| Scheduler                        FIFOScheduler   |
| Number of trials                 20              |
+--------------------------------------------------+

View detailed results here: /content/tuning/xp_num_7
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/xp_num_7`
[36m(eval_config pid=351644)[0m CUDA is available. Using GPU.




[36m(eval_config pid=351644)[0m {'activation': 'Tanh', 'activation_transformers': 'Tanh', 'batch_size': 128, 'concatenate_features': True, 'd_model': 960, 'dropout': 0.45725375589169126, 'dropout_StationIdEmbedding': 0.019039350643153267, 'dropout_timeStampEmbedding': 0.8800060290461524, 'dropout_transformers': 0.07657673102719267, 'early_stopping': 4, 'encoder_only': True, 'epochs_classifcation_only': 80, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.47297946496673576, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 6, 'max_len': 100, 'nb_batchs': 120, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 6, 'num_layers_transformer': 5, 'amsgrad': False, 'beta_1': 0.8357575159138049, 'beta_2': 0.9638104898314652, 'eps': 8.467320827795803e-09, 'lr': 0.0005690481895893917, 'optimizer': 'AdamW', 'weight_decay': 0.00011981508075403076, 'positive_fun



[36m(eval_config pid=351644)[0m {'activation': 'GELU', 'activation_transformers': 'Softshrink', 'batch_size': 64, 'concatenate_features': True, 'd_model': 888, 'dropout': 0.519654137315249, 'dropout_StationIdEmbedding': 0.07186650563184206, 'dropout_timeStampEmbedding': 0.16745292021889577, 'dropout_transformers': 0.4974269998477895, 'early_stopping': 5, 'encoder_only': True, 'epochs_classifcation_only': 68, 'input_size': 2, 'learnable_pos_encoding': True, 'T_max': 9, 'eta_min': 0.013620024809027471, 'scheduler': 'CosineAnnealingLR', 'dropout_lstm': 0.5106880460589658, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 24, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 1, 'amsgrad': False, 'beta_1': 0.9223522397587934, 'beta_2': 0.9588926512353584, 'eps': 4.1642074338271694e-08, 'lr': 0.0002467082635844658, 'optimizer': '

[36m(eval_config pid=351644)[0m   return self._call_impl(*args, **kwargs)


[36m(eval_config pid=351644)[0m epoch:  0 loss :  8.05355120207134 acc:  0.0005359176473215282
[36m(eval_config pid=351644)[0m epoch:  1 loss :  7.944669362118369 acc:  0.0013397941183038206
[36m(eval_config pid=351644)[0m epoch:  2 loss :  7.851455723611932 acc:  0.002545608824777259
[36m(eval_config pid=351644)[0m epoch:  3 loss :  7.7583009719848635 acc:  0.004064042158854923
[36m(eval_config pid=351644)[0m epoch:  4 loss :  7.6610903237995345 acc:  0.007078578925038519
[36m(eval_config pid=351644)[0m epoch:  5 loss :  7.550861900731137 acc:  0.009333899024183284
[36m(eval_config pid=351644)[0m epoch:  6 loss :  7.486959261643259 acc:  0.010227095103052497
[36m(eval_config pid=351644)[0m epoch:  7 loss :  7.393215184462698 acc:  0.010673693142487105
[36m(eval_config pid=351644)[0m epoch:  8 loss :  7.34486003173025 acc:  0.012929013241631869
[36m(eval_config pid=351644)[0m epoch:  9 loss :  7.299941479532342 acc:  0.01491637451711587
[36m(eval_config pid=351644)



[36m(eval_config pid=351644)[0m {'activation': 'Softsign', 'activation_transformers': 'GELU', 'batch_size': 64, 'concatenate_features': True, 'd_model': 504, 'dropout': 0.1611289472808294, 'dropout_StationIdEmbedding': 0.1905259635966413, 'dropout_timeStampEmbedding': 0.0742556468227499, 'dropout_transformers': 0.7958061391173161, 'early_stopping': 10, 'encoder_only': False, 'epochs_classifcation_only': 46, 'input_size': 2, 'learnable_pos_encoding': False, 'scheduler': None, 'dropout_lstm': 0.7663443880786416, 'lstm_layer_with_layer_norm': False, 'activation_lstm': 'GELU', 'lstm_layer_with_perceptron': True, 'lstm_model': True, 'num_layers_lstm': 3, 'max_len': 100, 'nb_batchs': 156, 'nb_of_pos_ids': 3043, 'normalize_features': 'before', 'num_heads': 12, 'num_layers_transformer': 2, 'amsgrad': False, 'beta_1': 0.8414817558186823, 'beta_2': 0.9555569901737565, 'eps': 1.134557639865927e-08, 'lr': 0.052657538045344315, 'optimizer': 'AdamW', 'weight_decay': 2.556462947812415e-07, 'positiv



[36m(eval_config pid=351644)[0m {'activation': 'LogSigmoid', 'activation_transformers': 'Sigmoid', 'batch_size': 128, 'concatenate_features': True, 'd_model': 1320, 'dropout': 0.051143762192593006, 'dropout_StationIdEmbedding': 0.37233317315623277, 'dropout_timeStampEmbedding': 0.34507038426161707, 'dropout_transformers': 0.41721749831833316, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 66, 'input_size': 2, 'learnable_pos_encoding': True, 'base_lr': 0.04016026631212392, 'max_lr': 0.16261453880976653, 'mode': 'triangular2', 'scheduler': 'CyclicLR', 'step_size_up': 11, 'dropout_lstm': 0.3771393692002015, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 2, 'max_len': 100, 'nb_batchs': 168, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 4, 'alpha': 0.9316564273067711, 'centered': False, 'eps': 3.5253345000905124e-06, 'lr': 8.264



[36m(eval_config pid=351644)[0m {'activation': 'RReLU', 'activation_transformers': 'Tanhshrink', 'batch_size': 128, 'concatenate_features': True, 'd_model': 744, 'dropout': 0.18397777982436023, 'dropout_StationIdEmbedding': 0.14949347815709124, 'dropout_timeStampEmbedding': 0.15176209383182665, 'dropout_transformers': 0.3343336144074971, 'early_stopping': 3, 'encoder_only': True, 'epochs_classifcation_only': 70, 'input_size': 2, 'learnable_pos_encoding': True, 'scheduler': None, 'dropout_lstm': 0.04863489924196046, 'lstm_layer_with_layer_norm': False, 'activation_lstm': None, 'lstm_layer_with_perceptron': False, 'lstm_model': True, 'num_layers_lstm': 1, 'max_len': 100, 'nb_batchs': 144, 'nb_of_pos_ids': 3043, 'normalize_features': None, 'num_heads': 3, 'num_layers_transformer': 3, 'amsgrad': False, 'beta_1': 0.9665370508517013, 'beta_2': 0.9575836482210145, 'eps': 3.5296129260855e-07, 'lr': 1.2253304990255833e-05, 'optimizer': 'AdamW', 'weight_decay': 1.4896539037074301e-06, 'positiv

In [None]:
import pickle

# Load the tuner from the pickle file
with open('/content/drive/MyDrive/hyperparameter_tuning_projet_long/xp_num_9/tuner.pkl', 'rb') as f:
    tuner = pickle.load(f)


In [None]:
torch.tensor([1,2,3]).tolist()

In [None]:
tuner['_tune_config'].search_alg._hpopt_trials.best_trial['misc']['vals']['num_layers']


[]

In [None]:
"print(dir(tuner['_tune_config'].search_alg._hpopt_trials))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_dynamic_trials', '_exp_key', '_ids', '_insert_trial_docs', '_trials', 'aname', 'argmin', 'assert_valid_trial', 'asynchronous', 'attachments', 'average_best_error', 'best_trial', 'count_by_state_synced', 'count_by_state_unsynced', 'delete_all', 'fmin', 'idxs', 'idxs_vals', 'insert_trial_doc', 'insert_trial_docs', 'losses', 'miscs', 'new_trial_docs', 'new_trial_ids', 'refresh', 'results', 'source_trial_docs', 'specs', 'statuses', 'tids', 'trial_attachments', 'trials', 'vals', 'view']


## check performance on new station

In [None]:
def evaluate_repeat(model,dataloader,device,reg=True):
  model.eval()
  with torch.no_grad():
    acc=0
    nb_points=0
    repeat=0
    not_repeat=0
    correct_not_repeat=0
    correct_repeat=0
    incorrect_not_repeat_as_repeat=0
    incorrect_not_repeat=0
    valid_results={}
    for dict_batch in dataloader:
      for key in dict_batch:
        if key!="lengths":
          dict_batch[key]=dict_batch[key].to(device)
      with autocast(device_type=device.type):
        out=model(dict_batch,reg=reg)
        target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["pos_id_target"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
        acc+=(out["next_station"].data.argmax(dim=1)==target_pos_ids.data).sum().item()
        nb_points+=out["next_station"].data.shape[0]
        pred=out["next_station"].data.argmax(dim=1)
        pos_ids=torch.nn.utils.rnn.pack_padded_sequence(dict_batch["pos_id"], lengths=dict_batch["lengths"],batch_first=True, enforce_sorted=False)
        for i in range(len(target_pos_ids.data)):
          if target_pos_ids.data[i]==pos_ids.data[i]:
            repeat+=1

            if target_pos_ids.data[i]==pred[i]:
              correct_repeat+=1
          else:
            not_repeat+=1
            if target_pos_ids.data[i]==pred[i]:
              correct_not_repeat+=1
            if target_pos_ids.data[i]!=pred[i]:
              incorrect_not_repeat+=1

          if pred[i]==pos_ids.data[i] and target_pos_ids.data[i]!=pos_ids.data[i]:
            incorrect_not_repeat_as_repeat+=1
    print(nb_points,"repeat: ",repeat," not_repeat: ",not_repeat," correct_repeat/repeat: ",correct_repeat/repeat," correct_not_repeat/not_repeat: ",correct_not_repeat/not_repeat,incorrect_not_repeat_as_repeat/incorrect_not_repeat)
    return valid_results

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=768,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=0,
                                         num_layers_transformer=6,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test_0.5/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.6509191238701378  correct_not_repeat/not_repeat:  0.2762021385930769 0.4746223564954683


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=600,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=12,
                                         num_layers_transformer=0,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test_0.5/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.572683570872406  correct_not_repeat/not_repeat:  0.24545712973693992 0.38929461542920074


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=600,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=10,
                                         num_layers_transformer=0,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test_0.5/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.5703307491790515  correct_not_repeat/not_repeat:  0.22293411471430757 0.40792435839711844


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=600,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=2,
                                         num_layers_transformer=0,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test_0.5/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.44894038389925184  correct_not_repeat/not_repeat:  0.29502962979160746 0.2873538261112317


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=888,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=2,
                                         num_layers_transformer=5,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


KeyboardInterrupt: 

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=1008,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=6,
                                         num_layers_transformer=6,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test_0.5/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.7462507193879279  correct_not_repeat/not_repeat:  0.23080623646979073 0.5669490561746645


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=1008,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=2,
                                         num_layers_transformer=0,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test_0.5/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.4839957344527574  correct_not_repeat/not_repeat:  0.35011261507511315 0.2974764468371467


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=1008,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=6,
                                         num_layers_transformer=6,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test_0.5/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.6290835844138258  correct_not_repeat/not_repeat:  0.26852681988148086 0.44345460524349045


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=888,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=2,
                                         num_layers_transformer=5,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.5781001387995531  correct_not_repeat/not_repeat:  0.3046073779274453 0.4137291280148423


{}

In [None]:
model=Transformer_encoder_LSTM_decoder(d_model=888,
                                         nb_of_pos_ids=len(vocab)+1,
                                         output_regression_size=2,
                                         output_classfication_size=len(vocab)+1,
                                         num_layers_lstm=3,
                                         num_layers_transformer=6,
                                         encoder_only=False,
                                         nhead=12,
                                         learnable_pos_encoding=True,
                                         new_station_binary_classification=False,
                                         use_gcn=True,
                                         vocab=vocab,
                                         hidden_dim1=128,
                                         hidden_dim2=256,
                                         max_len=100,
                                         dropout=0.1,
                                         batch_first = True,
                                         concatenate_features = False,
                                         keep_input_positions = False,device=device
                                         ).to(device)

model.load_state_dict(torch.load("test/acc.pth"))
evaluate_repeat(model,test_dataloader,device)

  delaunay = weights.Rook.from_dataframe(cells)


RuntimeError: Error(s) in loading state_dict for Transformer_encoder_LSTM_decoder:
	Missing key(s) in state_dict: "transformer_model.transformer.encoder.layers.5.self_attn.in_proj_weight", "transformer_model.transformer.encoder.layers.5.self_attn.in_proj_bias", "transformer_model.transformer.encoder.layers.5.self_attn.out_proj.weight", "transformer_model.transformer.encoder.layers.5.self_attn.out_proj.bias", "transformer_model.transformer.encoder.layers.5.linear1.weight", "transformer_model.transformer.encoder.layers.5.linear1.bias", "transformer_model.transformer.encoder.layers.5.linear2.weight", "transformer_model.transformer.encoder.layers.5.linear2.bias", "transformer_model.transformer.encoder.layers.5.norm1.weight", "transformer_model.transformer.encoder.layers.5.norm1.bias", "transformer_model.transformer.encoder.layers.5.norm2.weight", "transformer_model.transformer.encoder.layers.5.norm2.bias", "transformer_model.transformer.decoder.layers.5.self_attn.in_proj_weight", "transformer_model.transformer.decoder.layers.5.self_attn.in_proj_bias", "transformer_model.transformer.decoder.layers.5.self_attn.out_proj.weight", "transformer_model.transformer.decoder.layers.5.self_attn.out_proj.bias", "transformer_model.transformer.decoder.layers.5.multihead_attn.in_proj_weight", "transformer_model.transformer.decoder.layers.5.multihead_attn.in_proj_bias", "transformer_model.transformer.decoder.layers.5.multihead_attn.out_proj.weight", "transformer_model.transformer.decoder.layers.5.multihead_attn.out_proj.bias", "transformer_model.transformer.decoder.layers.5.linear1.weight", "transformer_model.transformer.decoder.layers.5.linear1.bias", "transformer_model.transformer.decoder.layers.5.linear2.weight", "transformer_model.transformer.decoder.layers.5.linear2.bias", "transformer_model.transformer.decoder.layers.5.norm1.weight", "transformer_model.transformer.decoder.layers.5.norm1.bias", "transformer_model.transformer.decoder.layers.5.norm2.weight", "transformer_model.transformer.decoder.layers.5.norm2.bias", "transformer_model.transformer.decoder.layers.5.norm3.weight", "transformer_model.transformer.decoder.layers.5.norm3.bias", "transformer_lstm__list.2.layer_normalisation.weight", "transformer_lstm__list.2.layer_normalisation.bias", "transformer_lstm__list.2.lstm.weight_ih_l0", "transformer_lstm__list.2.lstm.weight_hh_l0", "transformer_lstm__list.2.lstm.bias_ih_l0", "transformer_lstm__list.2.lstm.bias_hh_l0", "transformer_lstm__list.2.mlp.linear_perceptron_in.weight", "transformer_lstm__list.2.mlp.linear_perceptron_in.bias", "transformer_lstm__list.2.mlp.linear_perceptron_out.weight", "transformer_lstm__list.2.mlp.linear_perceptron_out.bias". 

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.8564101696062832  correct_not_repeat/not_repeat:  0.09422492401215805 0.8021341316208778


(0.643730131126935, 2.2387092113494873, 4.385555267333984)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.6269508107925116  correct_not_repeat/not_repeat:  0.24020904856661782 0.4275024463247568


(0.5190344566683142, 3.329756021499634, 2.0473709106445312)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.7019025694844104  correct_not_repeat/not_repeat:  0.2555815529946863 0.5174044590664747


(0.5773612306040138, 2.5278093814849854, 2.7385761737823486)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.7179745421307424  correct_not_repeat/not_repeat:  0.2436203013273272 0.5562012142237641


(0.5856108172094187, 2.1441447734832764, 1.2037302255630493)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.8789227800534886  correct_not_repeat/not_repeat:  0.11254947409853272 0.8136211314803864


(0.6650741059388481, 1.82258939743042, 2.1341636180877686)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.8187565591252243  correct_not_repeat/not_repeat:  0.1462683956178522 0.7372573126376722


(0.6311055788439596, 2.027265787124634, 2.6414260864257812)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)


163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.3789143166661024  correct_not_repeat/not_repeat:  0.3284642802475345 0.28316509280364704


(0.3648367472709855, 2.700303077697754, 3.602348566055298)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)



163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.4562527506009005  correct_not_repeat/not_repeat:  0.30379829874702063 0.3240153275959545


(0.4137118868488654, 2.806699752807617, 3.5753602981567383)

## clean cache

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()
torch.cuda.memory_allocated()

1772397056