# Preprocessing

In [2]:
from os import makedirs
import torch
import math
import os
import string
import shutil
import subprocess
import pandas as pd
import numpy as np
import pandas as pd

def get_x(value):

    if isinstance(value, str):
        return float(value.split("/")[0])
    elif isinstance(value, float):
        return value

def get_y(value):

    if isinstance(value, str):
        return float(value.split("/")[1])
    elif isinstance(value, float):
        return value

def read_dataframe(name):
  if not os.path.exists(name+".pkl"):
    print("reading dataframe: "+name+".xlsx")
    df=pd.read_excel(name+".xlsx")
    df.to_pickle(name+".pkl")
  else:
    print("using already read daframe")

def get_vocab(poses,vocab):
  for pos in poses:
    if pos not in vocab and not any(isinstance(n, float) and math.isnan(n) for n in pos):
        vocab[pos]=len(vocab)+1
  return vocab

def get_fix_time_encoding(df):

  df['month_sin'] = np.sin(2 * np.pi * df["start time"].dt.month / 12)
  df['month_cos'] = np.cos(2 * np.pi * df["start time"].dt.month / 12)

  df['day_sin'] = np.sin(2 * np.pi * df["start time"].dt.day / 31)
  df['day_cos'] = np.cos(2 * np.pi * df["start time"].dt.day / 31)

  df['hour_sin'] = np.sin(2 * np.pi * df["start time"].dt.hour / 24)
  df['hour_cos'] = np.cos(2 * np.pi * df["start time"].dt.hour / 24)

  df['minute_sin'] = np.sin(2 * np.pi * df["start time"].dt.minute / 60)
  df['minute_cos'] = np.cos(2 * np.pi * df["start time"].dt.minute / 60)

  df['second_sin'] = np.sin(2 * np.pi * df["start time"].dt.second / 60)
  df['second_cos'] = np.cos(2 * np.pi * df["start time"].dt.second / 60)
def get_time_data(df):
  df['month'] =  df["start time"].dt.month
  df['day'] =  df["start time"].dt.day
  df['hour'] =  df["start time"].dt.hour
  df['minute'] = df["start time"].dt.minute
  df['second'] = df["start time"].dt.second
  return df


def tokenize_pos(pos,vocab):

  if math.isnan(pos[0]) and math.isnan(pos[1]):
    return len(vocab)
  else:
    return vocab[pos]

def get_coordinates(df,input_position,full_dataset):

  if full_dataset:
    df['x'] = df['latitude']
    df['y'] = df['longitude']
  else:
    df['x'] = df['location(latitude/lontitude)'].apply(get_x)
    df['y'] = df['location(latitude/lontitude)'].apply(get_y)


  if input_position:
    df['x_normalised']=(df['x']-df['x'].mean())/(df['x'].std())
    df['y_normalised']=(df['y']-df['y'].mean())/df['y'].std()

  return df

def get_joined_coordinates(df):

  df['pos']= list(zip(df['x'],df['y']))
  poses=df['pos'].unique()

  return poses

def get_col_to_keep_and_drop(fixed_time_encoding,input_position,full_dataset):
  col_to_drop_in_df=['date', 'end time','pos']
  col_to_drop_in_dict=['x','y', 'time_to_end', 'time_to_next','start time', 'user id']
  col_to_add_to_dict=[]
  col_in_input=[]
  if not full_dataset:
    col_to_drop_in_df+=['location(latitude/lontitude)']
  else:
    col_to_drop_in_df+=['latitude','longitude']
  if fixed_time_encoding:
    col_to_drop_in_df+=[]
    col_to_drop_in_dict+=['month_sin', 'month_cos', 'day_sin', 'day_cos', 'hour_sin', 'hour_cos', 'minute_sin', 'minute_cos', 'second_sin', 'second_cos']
    col_in_input+=['month_sin', 'month_cos', 'day_sin', 'day_cos', 'hour_sin', 'hour_cos', 'minute_sin', 'minute_cos', 'second_sin', 'second_cos']
  else:
    col_to_add_to_dict+=['month','day','hour','minute','second']
  if input_position:
    col_to_drop_in_dict += ['x_normalised', 'y_normalised']
    col_in_input+=['x_normalised', 'y_normalised']
  return col_to_drop_in_df,col_to_drop_in_dict,col_in_input,col_to_add_to_dict

def process_user_data(df_user,vocab,col_in_input,col_to_drop_in_dict,col_to_add_to_dict):
  #get the time to next connection
  df_user["time_to_next"] =  df_user["start time"].diff(-1).dt.total_seconds()
  dict_user=df_user.to_dict('list')
  #create input
  dict_user["pos_id"],dict_user["pos_id_target"]=torch.tensor(dict_user["pos_id"][:-1]),torch.tensor(dict_user["pos_id"][1:])

  if col_in_input:
    dict_user["input"]=torch.tensor([dict_user[col] for col in col_in_input]).T
    dict_user["input"][:-1]
  if col_to_add_to_dict:
    for col in col_to_add_to_dict:
      dict_user[col]=torch.tensor(dict_user[col])
      dict_user[col][:-1]

  #print(dict_user["input"].shape,dict_user["input"].T.shape)
  dict_user["time_target"]=torch.tensor([dict_user["time_to_end"],dict_user["time_to_next"]]).T
  dict_user["time_target"]=dict_user["time_target"][:-1]
  #clean dictionnary
  for e in col_to_drop_in_dict:
    dict_user.pop(e)
  return dict_user

def normalize_output(list_users):
  #get means and stds
  time_targets=torch.cat([dict_user["time_target"] for dict_user in list_users],dim=0)
  time_targets_mean=time_targets.mean(dim=0)
  time_targets_std=time_targets.std(dim=0)
  #normalize
  for i in range(len(list_users)):
    list_users[i]["time_target"]=(list_users[i]["time_target"]-time_targets_mean)/time_targets_std
  return list_users



def process_dataframe(name,vocab,fixed_time_encoding=False,input_position=False,full_dataset=True,format=".pkl"):
  df= pd.read_pickle(name+format)
  df=df.sort_values('start time')
  df=df.drop(['month'],axis=1)

  df=get_coordinates(df,input_position,full_dataset)

  poses=get_joined_coordinates(df)
  vocab=get_vocab(poses,vocab)
  df['pos_id'] = df['pos'].apply(lambda pos: tokenize_pos(pos,vocab))

  df['time_to_end']=df['end time']-df['start time']
  df['time_to_end']=df['time_to_end'].dt.total_seconds()
  if fixed_time_encoding:
    df=get_fix_time_encoding(df)
  else:
    df=get_time_data(df)

  col_to_drop_in_df,col_to_drop_in_dict,col_in_input,col_to_add_to_dict=get_col_to_keep_and_drop(fixed_time_encoding,input_position,full_dataset)
  df=df.drop(col_to_drop_in_df, axis=1)

  df_user_group = df.groupby('user id')
  list_users=[]
  for user, df_user in df_user_group:
    if len(df_user)>=2 and not df_user['x'].isnull().values.any():
        list_users.append(process_user_data(df_user,vocab,col_in_input,col_to_drop_in_dict,col_to_add_to_dict))
  list_users=normalize_output(list_users)

  return list_users,vocab

def runcmd(cmd, verbose = False, *args, **kwargs):

    process = subprocess.Popen(
        cmd,
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE,
        text = True,
        shell = True
    )
    std_out, std_err = process.communicate()
    if verbose:
        print(std_out.strip(), std_err)
    pass

def get_raw_data(directory,src_directory,full_dataset):
  if  full_dataset:
    shutil.copytree(src_directory,directory)#telecomDataset6mont
  else:
    runcmd('wget http://sguangwang.com/dataset/telecom.zip', verbose = False)
    runcmd('unzip /content/telecom.zip')

def get_processed_dataset(load_dataset_path):
  saved_list_user_path = os.path.join(load_dataset_path,"list_users")
  saved_vocab_path = os.path.join(load_dataset_path,"vocab")
  print("loading already preprocessed data: ")
  print(saved_list_user_path)
  print(saved_vocab_path)
  list_users=torch.load(saved_list_user_path)
  vocab=torch.load(saved_vocab_path)
  return list_users,vocab

def process_raw_data(src_directory_raw_data,directory_raw_data,fixed_time_encoding,input_position,full_dataset):
  list_users=[]
  vocab={}
  if not os.path.exists(directory_raw_data):
    print('getting raw data at: '+src_directory_raw_data)
    get_raw_data(directory_raw_data,src_directory_raw_data,full_dataset)
  for name in os.listdir(directory_raw_data):
    if not name.endswith(".pkl"):
      complete_name=os.path.join(directory_raw_data,".".join(name.split(".")[:-1]))
      print("processing dataframe: "+complete_name)
      read_dataframe(complete_name)
      new_list_users,vocab= process_dataframe(complete_name,vocab,fixed_time_encoding=fixed_time_encoding,input_position=input_position,full_dataset=full_dataset)
      list_users+=new_list_users
  return list_users,vocab

def split_long_sequences(list_users,max_sequence_length):
  new_list_users=[]
  for i in range(len(list_users)):
    seq_length=list_users[i]["input"].shape[0]
    if seq_length>=max_sequence_length:
      nb_of_seq=seq_length//max_sequence_length
      rest=seq_length%max_sequence_length
      list_splitted_seq=nb_of_seq*[{}]
      rest_splitted={}
      for key in list_users[i]:
        for j in range(nb_of_seq):
          list_splitted_seq[j][key]=list_users[i][key][max_sequence_length*j:max_sequence_length*(j+1)]
        if rest>2:
          rest_splitted[key]= list_users[i][key][-rest:]
      new_list_users=new_list_users+list_splitted_seq
      if len(rest_splitted)>0:
        new_list_users+=[rest_splitted]
    else:
      new_list_users.append(list_users[i])

  return new_list_users

def save_processed_data(list_users,vocab,path_to_save_dataset):
    print("creating directory: "+path_to_save_dataset)
    os.makedirs(path_to_save_dataset,exist_ok=True)
    print("saving processed data at: ")
    save_list_user_path = os.path.join(path_to_save_dataset,"list_users")
    save_vocab_path = os.path.join(path_to_save_dataset,"vocab")
    print(save_list_user_path)
    print(save_vocab_path)
    torch.save(list_users,save_list_user_path)
    torch.save(vocab,save_vocab_path)

def get_processed_data(src_directory_raw_data="drive/MyDrive/Shanghai-Telcome-Six-Months-DataSet",directory_raw_data='/content/dataset-telecom',fixed_time_encoding=False,input_position=False,full_dataset=True,spliting_long_sequences=True,max_sequence_length=100,min_sequence_length=3,save=False,path_to_save_dataset="/content/drive/MyDrive/telecomDataset6month",download=False,load_dataset_path="/content/drive/MyDrive/telecomDataset6month"):
  if not download:
    list_users,vocab = get_processed_dataset(load_dataset_path)
  else:
    list_users,vocab=process_raw_data(src_directory_raw_data,directory_raw_data,fixed_time_encoding,input_position,full_dataset)
  if spliting_long_sequences:
    print("spliting sequences longuer than : "+str(max_sequence_length)+ " steps")
    list_users=split_long_sequences(list_users,max_sequence_length)
  if save:
    save_processed_data(list_users,vocab,path_to_save_dataset)
  return list_users,vocab

In [3]:
list_users,vocab=get_processed_data(src_directory_raw_data="drive/MyDrive/Shanghai-Telcome-Six-Months-DataSet",
                                    directory_raw_data='/content/dataset-telecom-6month',
                                    fixed_time_encoding=False,
                                    input_position=True,
                                    full_dataset=True,
                                    spliting_long_sequences=True,
                                    max_sequence_length=100,
                                    min_sequence_length=3,
                                    save=True,
                                    path_to_save_dataset="/content/drive/MyDrive/telecomDataset6month-splited-100",
                                    download=False,
                                    load_dataset_path="/content/drive/MyDrive/telecomDataset6month-splited-100",)

loading already preprocessed data: 
/content/drive/MyDrive/telecomDataset6month-splited-100/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100/vocab
spliting sequences longuer than : 100 steps
creating directory: /content/drive/MyDrive/telecomDataset6month-splited-100
saving processed data at: 
/content/drive/MyDrive/telecomDataset6month-splited-100/list_users
/content/drive/MyDrive/telecomDataset6month-splited-100/vocab


In [4]:
for user in list_users:
  if user["input"].shape[0]>100:
    print(user["input"].shape[0])


In [5]:
vocab

{(31.098701, 121.582178): 1,
 (31.159304, 121.358718): 2,
 (31.401391, 121.463758): 3,
 (31.059755, 121.389723): 4,
 (31.149677, 121.729053): 5,
 (31.252264, 121.38269): 6,
 (31.111396, 121.379353): 7,
 (31.124501, 121.32791): 8,
 (30.731903, 121.334173): 9,
 (31.274417, 121.257132): 10,
 (31.121127, 121.243829): 11,
 (31.158515, 121.1384): 12,
 (31.01217, 121.265575): 13,
 (31.176807, 121.251063): 14,
 (31.19382, 121.49524): 15,
 (31.140384, 121.620758): 16,
 (31.163432, 121.354451): 17,
 (31.145387, 121.61623): 18,
 (31.127217, 121.335543): 19,
 (31.199721, 121.325753): 20,
 (30.887807, 121.346419): 21,
 (31.202883, 121.476296): 22,
 (31.051898, 121.769904): 23,
 (31.159756, 121.525433): 24,
 (31.021462, 121.321255): 25,
 (31.203987, 121.520591): 26,
 (31.339114, 121.321261): 27,
 (31.277512, 121.287969): 28,
 (30.844397, 121.528845): 29,
 (31.246097, 121.39928): 30,
 (31.346801, 121.577398): 31,
 (31.2676, 121.541803): 32,
 (31.036623, 121.377512): 33,
 (31.171101, 121.507145): 34,


# Dataset

## Reproducibility seed

In [6]:
import hashlib
import string
import random
def get_reproducible_seeds(name="ProjectLong",nb_seeds=100):
    # Calculate SHA-256 hash
    sha256_hash = hashlib.sha256(name.encode()).hexdigest()
    # Define character sets
    digits = string.digits
    # Use the hash to seed the random number generator
    hash_as_int = int(sha256_hash, 16)
    random.seed(hash_as_int)
    # Generate a random list of seed of desired length
    reproducibility_seeds = [random.randint(0,10000) for _ in range(nb_seeds)]

    return reproducibility_seeds

In [7]:
reproducibility_seed=get_reproducible_seeds()[0]

## Dataset

In [8]:
from torch.utils.data import Dataset, DataLoader
class VariableLengthDatasetWithPosID(Dataset):
    def __init__(self, time_series, transform=None):
        self.times_series=time_series

    def __len__(self):
        return len(self.times_series)

    def __getitem__(self, idx):
        user_dict=self.times_series[idx]

        return  user_dict['input'],user_dict['month'],user_dict['day'],user_dict['hour'],user_dict['minute'],user_dict['second'],user_dict['pos_id'],user_dict['pos_id_target'], user_dict['time_target']

def create_dataset(list_users,split=[0.8,0.1,0.1]):
  dataset=VariableLengthDatasetWithPosID(list_users)
  generator = torch.Generator().manual_seed(reproducibility_seed)
  dataset_list=torch.utils.data.random_split(dataset,[0.8,0.1,0.1],generator)
  return dataset_list


## Dataloader

In [9]:
def collate_fn_padd(batch):
    '''
    Padds batch of variable length

    note: it converts things ToTensor manually here since the ToTensor transform
    assume it takes in images rather than arbitrary tensors.
    '''
    ## get sequence lengths
    inputs,month,day,hour,minute,second,pos_ids,pos_id_targets,time_targets=zip(*batch)
    lengths = torch.tensor([ input.shape[0] for input in inputs ])
    inputs = torch.nn.utils.rnn.pad_sequence(inputs,batch_first=True,padding_value=0)
    month = torch.nn.utils.rnn.pad_sequence(month,batch_first=True,padding_value=13)
    day = torch.nn.utils.rnn.pad_sequence(day,batch_first=True,padding_value=32)
    hour = torch.nn.utils.rnn.pad_sequence(hour,batch_first=True,padding_value=25)
    minute = torch.nn.utils.rnn.pad_sequence(minute,batch_first=True,padding_value=60)
    second = torch.nn.utils.rnn.pad_sequence(second,batch_first=True,padding_value=60)


    time_targets = torch.nn.utils.rnn.pad_sequence(time_targets,batch_first=True,padding_value=-1)
    pos_ids = torch.nn.utils.rnn.pad_sequence(pos_ids,batch_first=True,padding_value=len(vocab))
    pos_id_targets = torch.nn.utils.rnn.pad_sequence(pos_id_targets,batch_first=True,padding_value=len(vocab))


    return inputs,month,day,hour,minute,second, pos_ids, time_targets, pos_id_targets, lengths

## Instanciate

In [10]:
dataset_list=create_dataset(list_users)
train_dataset=dataset_list[0]
valid_dataset=dataset_list[1]
test_dataset=dataset_list[2]
train_dataloader=DataLoader(train_dataset,batch_size=64,collate_fn=collate_fn_padd,shuffle=True)
valid_dataloader=DataLoader(valid_dataset,batch_size=256,collate_fn=collate_fn_padd,shuffle=False)
test_dataloader=DataLoader(test_dataset,batch_size=256,collate_fn=collate_fn_padd,shuffle=False)

# Model

## Transformer Encoder followed by LSTM

In [11]:
from torch import nn, Tensor
class VanillaPositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 500):
        super().__init__()

        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = (x.transpose(0,1) + self.pe[:x.transpose(0,1).size(0)]).transpose(0,1)
        return self.dropout(x)

In [12]:
class LearnablePositionalEncoding(nn.Module):

    def __init__(self,d_model: int, dropout: float = 0.1, max_len: int = 500):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.positional_embedding=nn.Embedding(num_embeddings=max_len,embedding_dim= d_model)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size,seq_len, embedding_dim]``
        """
        #print(x.shape[1])
        x = x  + self.positional_embedding(torch.arange(0,x.shape[1]).cuda())
        return self.dropout(x)

In [13]:
def get_PositionalEncoding(d_model: int, dropout: float = 0.1, max_len: int = 2000, learnable=False):
  if learnable:
    return LearnablePositionalEncoding(d_model, dropout, max_len)
  else:
    return VanillaPositionalEncoding(d_model, dropout, max_len)

In [14]:
class Encoder_Decoder_Transformer(nn.Module):
    def __init__(self,d_model,num_layers=3,nhead=10,dropout=0.1,batch_first=True):
      super().__init__()
      self.transformer=torch.nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers,  dropout=dropout, batch_first=batch_first)
    def forward(self,x,mask,src_key_padding_mask,is_causal):
      return self.transformer(x,
                       x,
                       src_mask=mask,
                       tgt_mask=mask,
                       memory_mask=mask,
                       src_key_padding_mask=src_key_padding_mask,
                       tgt_key_padding_mask=src_key_padding_mask,
                       memory_key_padding_mask=src_key_padding_mask,
                       src_is_causal=is_causal,
                       tgt_is_causal=is_causal,
                       memory_is_causal=is_causal)



def get_Transformer_architecture(d_model,encoder_only=False,num_layers=3,nhead=10,dropout=0.1,batch_first=True):
  if encoder_only:
    encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead,batch_first=batch_first)
    return nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
  else:
    return Encoder_Decoder_Transformer(d_model,num_layers,nhead,dropout,batch_first=batch_first)

In [26]:
from torch import nn
from torch.nn import Embedding, LSTM

def get_mask(bath_size,sequence_length,lengths):
  mask=torch.zeros(bath_size,sequence_length).cuda()
  for i, length in enumerate(lengths):
    mask[i,length:]=1
  return mask.bool()

class MLP(nn.Module):
  def __init__(self,d_model):
    super().__init__()
    self.dim_perceptron=2*d_model
    self.linear_perceptron_in=nn.Linear(d_model,self.dim_perceptron)
    self.linear_perceptron_out=nn.Linear(self.dim_perceptron,d_model)

  def forward(self,x):
    return self.linear_perceptron_out(F.relu(self.linear_perceptron_in(x)))

class TimeStampEmbedding(nn.Module):
  def __init__(self,d_model):
    super().__init__()
    self.month_embedding = nn.Embedding(num_embeddings=13,embedding_dim=d_model)
    self.day_embedding = nn.Embedding(num_embeddings=32,embedding_dim=d_model)
    self.hour_embedding = nn.Embedding(num_embeddings=25,embedding_dim=d_model)
    self.minute_embedding = nn.Embedding(num_embeddings=60,embedding_dim=d_model)
    self.second_embedding = nn.Embedding(num_embeddings=60,embedding_dim=d_model)

  def forward(self,x,month,day,hour,minute,second):
    return x
    + self.month_embedding(month)
    + self.day_embedding(day)
    + self.hour_embedding(hour)
    + self.minute_embedding(minute)
    + self.second_embedding(second)


class Transformer_LSTM_Layer(nn.Module):
  def __init__(self,d_model,output_regression_size,output_classfication_size,embedding_dim,num_layers,num_heads,dropout=0.1,batch_first=True):
    super().__init__()
    self.layer_normalisation=torch.nn.LayerNorm(d_model)
    self.lstm=LSTM(input_size=d_model, hidden_size=d_model,batch_first=batch_first,num_layers=1,dropout=dropout)
    self.mlp=MLP(d_model)
    self.dropout=nn.Dropout(p=dropout)

  def forward(self,x,batch_sizes,sorted_indices,unsorted_indices,lengths):
    x=self.lstm(x)[0].data+x.data
    x=torch.torch.nn.utils.rnn.PackedSequence(x, batch_sizes, sorted_indices, unsorted_indices)
    x,_=torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, padding_value=0.0)
    x=self.layer_normalisation(x)
    x=self.dropout(x)
    x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths,batch_first=True, enforce_sorted=False)
    x=x.data
    x=self.mlp(x)+x
    x=torch.torch.nn.utils.rnn.PackedSequence(x, batch_sizes, sorted_indices, unsorted_indices)
    x,_=torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, padding_value=0.0)
    x=self.layer_normalisation(x)
    x=self.dropout(x)
    return torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths,batch_first=True, enforce_sorted=False)


class  Transformer_encoder_LSTM_decoder(nn.Module):
  def __init__(self,d_model,nb_of_pos_ids,output_regression_size,output_classfication_size,embedding_dim,num_layers_lstm,num_layers_transformer,encoder_only,nhead,learnable_pos_encoding,new_station_binary_classification,use_gcn,vocab,hidden_dim1, hidden_dim2,max_len=500,dropout=0.1,batch_first=True,concatenate_feature=True):
    super().__init__()
    self.dropout=nn.Dropout(p=dropout)
    self.layer_normalisation=torch.nn.LayerNorm(d_model)
    self.station_embeddings=nn.Embedding(num_embeddings=nb_of_pos_ids,embedding_dim=embedding_dim)
    self.timeEmbedding=TimeStampEmbedding(d_model)
    self.num_feature=2
    if num_layers_transformer>0:
      self.pos_encoder = get_PositionalEncoding(d_model, dropout, max_len,learnable_pos_encoding)
      self.transformer_model=get_Transformer_architecture(d_model,encoder_only,num_layers_transformer,nhead,dropout,batch_first)
    if num_layers_lstm>0:
      self.transformer_lstm__list = nn.ModuleList([Transformer_LSTM_Layer(d_model,output_regression_size,output_classfication_size,embedding_dim,1,nhead,dropout=dropout,batch_first=batch_first) for layer in range(num_layers_lstm)])
    self.linear_reg=nn.Linear(d_model,output_regression_size)
    self.classifier=nn.Linear(d_model,output_classfication_size)
    self.new_station_binary_classification=new_station_binary_classification
    if self.new_station_binary_classification:
      self.binary_classifier=nn.Linear(d_model,1)
    self.num_layers_transformer=num_layers_transformer
    self.num_layers_lstm=num_layers_lstm
    if self.use_gcn:
      self.num_feature+=1
      self.gcn = GCN(hidden_dim1, hidden_dim2, d_model,vocab)

  def forward(self,x,month,day,hour,minute,second,pos_id,lengths,reg=True):
    #BEFORE: x.shape=(batch_size, max_sequence_length,input_size); pos_id.shape=(batch_size,max_sequence_length)
    x=self.station_embeddings(pos_id)
    x=self.timeEmbedding(x,month,day,hour,minute,second)
    #print(self.gcn(pos_id).isnan().sum())
    #flf
    x+=self.gcn(pos_id)
    if self.num_layers_transformer>0:
      self.pos_encoder(x)
      x=self.pos_encoder(x)
      with torch.no_grad():
        mask_x = get_mask(x.shape[0],x.shape[1],lengths)
        causal_mask=torch.nn.Transformer.generate_square_subsequent_mask(x.shape[1],device= torch.device('cuda'))
      x=self.transformer_model(x,causal_mask,mask_x,is_causal=True)
      x=self.timeEmbedding(x,month,day,hour,minute,second)
      x+=self.station_embeddings(pos_id)
      x+=self.gcn(pos_id)
    #print(x.shape)
    x=torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths,batch_first=True, enforce_sorted=False)
    batch_sizes=x.batch_sizes
    sorted_indices=x.sorted_indices
    unsorted_indices=x.unsorted_indices
    if self.num_layers_lstm>0:
      for transformer_lstm in self.transformer_lstm__list:
        x=transformer_lstm(x,batch_sizes,sorted_indices,unsorted_indices,lengths)
    x=F.relu(x.data)
    out={}
    out["next_station"]=torch.nn.utils.rnn.PackedSequence(self.classifier(x), batch_sizes, sorted_indices, unsorted_indices)
    if reg:
      out["time_regression"]=torch.nn.utils.rnn.PackedSequence(torch.exp(self.linear_reg(x)), batch_sizes, sorted_indices, unsorted_indices)
    if self.new_station_binary_classification:
      out["new_station"]=  torch.nn.utils.rnn.PackedSequence( self.binary_classifier(x), batch_sizes, sorted_indices, unsorted_indices)
    return out



## graph_deepLearning

In [16]:
!pip install libpysal

Collecting libpysal
  Downloading libpysal-4.9.2-py3-none-any.whl (2.8 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.8 MB[0m [31m9.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.8/2.8 MB[0m [31m44.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: libpysal
Successfully installed libpysal-4.9.2


In [17]:

x_array=[key[0] for key in vocab]
y_array=[key[1] for key in vocab]
coordinates=np.column_stack((x_array,y_array))
cells, generators = voronoi_frames(coordinates, clip="convex hull")
delaunay = weights.Rook.from_dataframe(cells)
delaunay_graph = delaunay.to_networkx()
positions = dict(zip(delaunay_graph.nodes, coordinates))
nx.set_node_attributes(delaunay_graph,positions,"coordinates")


NameError: name 'voronoi_frames' is not defined

In [18]:
import os
import torch
if 'IS_GRADESCOPE_ENV' not in os.environ:
  torch_version = str(torch.__version__)
  scatter_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
  sparse_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
  !pip install torch-scatter -f $scatter_src
  !pip install torch-sparse -f $sparse_src
  !pip install torch-geometric
  !pip install ogb

Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_scatter-2.1.2%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m73.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt21cu121
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_sparse-0.6.18%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt21cu121
Collecting torch-geometric
  Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K     [9

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
from libpysal.cg import voronoi_frames
from libpysal import weights, examples
from torch_geometric.utils import from_networkx
from torch_geometric.nn import GCNConv
import numpy as np
def get_net(vocab):
  x_array=[key[0] for key in vocab]
  y_array=[key[1] for key in vocab]
  coordinates=np.column_stack((x_array,y_array))
  cells, generators = voronoi_frames(coordinates, clip="convex hull")
  delaunay = weights.Rook.from_dataframe(cells)
  delaunay_graph = delaunay.to_networkx()
  positions = dict(zip(delaunay_graph.nodes, coordinates))
  nx.set_node_attributes(delaunay_graph,positions,"coordinates")
  distance=np.linalg.norm(np.concatenate([delaunay_graph.nodes[index[0]]["coordinates"][None,:] for index in delaunay_graph.edges], axis=0)-np.concatenate([delaunay_graph.nodes[index[1]]["coordinates"][None,:] for index in delaunay_graph.edges], axis=0), axis=1)
  nx.set_edge_attributes(delaunay_graph,dict(zip(delaunay_graph.edges,distance)),"distance")
  net=from_networkx(delaunay_graph)
  return net

class GCN(nn.Module):
    def __init__(self, hidden_dim1, hidden_dim2, output_dim,vocab):
        super(GCN, self).__init__()
        net=get_net(vocab)
        self.edge_index=edge_index = net.edge_index.long().cuda()
        self.distance= net.distance.float().cuda()
        self.coordinates=net.coordinates.float().cuda()
        mean_distance=self.distance.mean()
        std_distance=self.distance.std()
        self.distance=(((self.distance-mean_distance)/std_distance)+1)/2

        mean_coordinates=self.coordinates.mean(dim=0)
        std_coordinates=self.coordinates.std(dim=0)
        #print(self.coordinates.shape,mean_coordinates.unsqueeze(0).shape)
        self.coordinates=(self.coordinates-mean_coordinates.unsqueeze(0))/std_coordinates.unsqueeze(0)
        #print(self.distance.isnan().sum(),"eer",self.coordinates.isnan().sum())

        self.conv1 = GCNConv(input_dim, hidden_dim1)
        self.conv2 = GCNConv(hidden_dim1, hidden_dim2)
        self.conv3 = GCNConv(hidden_dim2, output_dim)

    def forward(self, pos_id):
        x = self.conv1(self.coordinates, self.edge_index,)#self.distance)
        #print("test",x.isnan().sum())
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, self.edge_index,self.distance)
        #print(x.isnan().sum())
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, self.edge_index,self.distance)
        x=torch.cat((x,torch.zeros(1,x.shape[1]).cuda()),dim=0)
        #print(x.isnan().sum())
        return x[pos_id]


# Example usage
input_dim = 2  # Latitude and Longitude
hidden_dim1 = 128
hidden_dim2 = 256
output_dim = 768
model = GCN( hidden_dim1, hidden_dim2, output_dim,vocab).cuda()
# Assuming you have your graph data in appropriate format

output = model(torch.randint(len(vocab)+1,size=[64,100]).cuda())

  delaunay = weights.Rook.from_dataframe(cells)
  data_dict[key] = torch.as_tensor(value)


In [None]:
import torch
predicted=torch.randint(2,(100,))
pos_ids_target=torch.randint(2,(100,))
index_1=pos_ids_target.nonzero()
index_0=(pos_ids_target==0).nonzero()
slice_index_O=torch.randperm(index_0.shape[0])[:index_1.shape[0]]
predicted[index]


# Trainning

## loss

In [21]:
class Loss_next_station_classification(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))

  def forward(self,x,target_pos_ids,lengths,target):
    target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(target_pos_ids, lengths=lengths,batch_first=True, enforce_sorted=False)
    index_repeat=target.nonzero()
    index_non_repeat=(target==0).nonzero()
    nb_non_repeat=index_non_repeat.shape[0]
    slice_repeat=index_repeat[torch.randperm(index_repeat.shape[0])[:int(nb_non_repeat/3)]]
    loss_classification=self.criterion(x.data[index_non_repeat.squeeze()],target_pos_ids.data[index_non_repeat.squeeze()])
    #loss_classification+=self.criterion(x.data[slice_repeat.squeeze()],target_pos_ids.data[slice_repeat.squeeze()])
    #loss_classification=self.criterion(x.data,target_pos_ids.data)
    return loss_classification

class Loss_time_regression(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion = nn.MSELoss(reduction='none')
  def forward(self,y,time_targets,lengths):
    time_targets=torch.nn.utils.rnn.pack_padded_sequence(time_targets, lengths=lengths,batch_first=True, enforce_sorted=False)
    mask_time_targets = (time_targets.data != -1)
    loss_regression=self.criterion(y.data,time_targets.data)
    loss_regression = (loss_regression * mask_time_targets.float()).mean()
    return loss_regression

class Loss_new_station_binary_classification(nn.Module):
  def __init__(self, ) -> None:
    super().__init__()
    self.criterion =  nn.BCEWithLogitsLoss()
  def forward(self,z,target_pos_ids,pos_ids,lengths):
    #print(target_pos_ids)
    target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(target_pos_ids, lengths=lengths,batch_first=True, enforce_sorted=False)
    pos_ids=torch.nn.utils.rnn.pack_padded_sequence(pos_ids, lengths=lengths,batch_first=True, enforce_sorted=False)
    target=(target_pos_ids.data==pos_ids.data).type(torch.LongTensor).cuda()
    loss_classification=self.criterion(z.data.squeeze(),target.float())
    return loss_classification,target


class Total_loss(nn.Module):
  def __init__(self,new_station_binary_classification=False) -> None:
    super().__init__()
    self.loss_next_station_classification = Loss_next_station_classification()
    self.loss_time_regression = Loss_time_regression()
    self.new_station_binary_classification=new_station_binary_classification
    if self.new_station_binary_classification:
      self.loss_new_station_binary_classification=Loss_new_station_binary_classification()


  def forward(self, out, target_pos_ids, time_targets, pos_ids, lengths, reg=False):
    loss={}
    if self.new_station_binary_classification:
      loss["new_station"],target=self.loss_new_station_binary_classification(out["new_station"],target_pos_ids,pos_ids,lengths)
      #loss["total"]=loss["new_station"]
    loss["classification"]=self.loss_next_station_classification(out["next_station"],target_pos_ids,lengths,target)
    loss["total"]=loss["classification"]

    if reg:
      loss["time_regression"]=self.loss_time_regression(out["time_regression"],time_targets,lengths)
      loss["total"]+=loss["time_regression"]

    return loss





## evalution

In [22]:
def evaluate(model,dataloader,criterion):
  model.eval()
  with torch.no_grad():
    acc=0
    nb_points=0
    for x, month, day, hour, minute, second, pos_ids, time_targets, target_pos_ids, lengths in dataloader:
      batch_size=x.shape[0]
      x=x.float().cuda()
      pos_ids=pos_ids.cuda()
      time_targets = time_targets.cuda()
      target_pos_ids = target_pos_ids.cuda()
      with autocast(device_type="cuda"):
        out=model(x,month,day,hour,minute,second,pos_ids,lengths,reg=True)
        loss=criterion(out, target_pos_ids, time_targets, pos_ids, lengths,reg=True)
        target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(target_pos_ids, lengths=lengths,batch_first=True, enforce_sorted=False)
        acc+=(out["next_station"].data.argmax(dim=1)==target_pos_ids.data).sum()
        nb_points+=out["next_station"].data.shape[0]
    return acc.item()/(nb_points),loss["classification"].item(),loss["time_regression"].item()

##training

In [23]:
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch import autocast
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
def train(
          epochs_classifcation_only,
          epochs_complete_problem,
          input_size,
          num_heads,
          d_model,
          nb_of_pos_ids,
          num_layers_lstm,
          num_layers_transformer,
          encoder_only,
          output_regression_size,
          output_classfication_size,
          nb_batchs,
          dropout,
          max_len,
          weight_decay,
          lr,
          learnable_pos_encoding,
          new_station_binary_classification,
          vocab,hidden_dim1, hidden_dim2
          ):

  embedding_dim=d_model
  epochs=epochs_complete_problem+ epochs_classifcation_only
  epochs=epochs_complete_problem+ epochs_classifcation_only
  model=Transformer_encoder_LSTM_decoder(d_model=d_model,
                                         nb_of_pos_ids=nb_of_pos_ids,
                                         output_regression_size=output_regression_size,
                                         output_classfication_size=output_classfication_size,
                                         embedding_dim=embedding_dim,
                                         num_layers_lstm=num_layers_lstm,
                                         num_layers_transformer=num_layers_transformer,
                                         encoder_only=encoder_only,
                                         nhead=num_heads,
                                         learnable_pos_encoding=learnable_pos_encoding,
                                         new_station_binary_classification=new_station_binary_classification,
                                         vocab=vocab,hidden_dim1=hidden_dim1, hidden_dim2=hidden_dim2,
                                         max_len=max_len,
                                         dropout=dropout,
                                         ).cuda()
  optimizer_encoder = optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay)
  criterion= Total_loss(new_station_binary_classification=new_station_binary_classification)
  train_losses=[]
  valid_accs=[]
  valid_losses_classification=[]
  valid_losses_regression=[]
  for epoch in range(epochs):
    model.train()
    epoch_losses=[]
    i=0
    for x, month, day, hour, minute, second,pos_ids, time_targets, target_pos_ids, lengths in train_dataloader:
      #print(x.shape)
      i+=1
      if i>=nb_batchs:
        break
      optimizer_encoder.zero_grad()
      x=x.float().cuda()
      pos_ids=pos_ids.cuda()
      time_targets = time_targets.cuda()
      target_pos_ids = target_pos_ids.cuda()
      with autocast(device_type="cuda"):
        reg=epoch >= epochs_classifcation_only
        #print(x)
        out=model(x,month,day,hour,minute,second,pos_ids,lengths, reg)
        #print(out)
        #yv
        loss=criterion(out, target_pos_ids, time_targets, pos_ids, lengths, reg)
        loss["total"].backward()
        optimizer_encoder.step()
      epoch_losses.append(loss["total"].cpu().item())
      del loss
      del x
      del pos_ids
      del target_pos_ids
    loss_epoch=np.mean(epoch_losses)
    train_losses.append(loss_epoch)
    valid_acc,valid_loss_classification,valid_loss_regression=evaluate(model,valid_dataloader,criterion)
    valid_accs.append(valid_acc)
    valid_losses_classification.append(valid_loss_classification)
    valid_losses_regression.append(valid_loss_regression)
    print("epoch: "+str(epoch)+" train loss: "+str(loss_epoch)+" valid_acc: "+str(valid_acc)," valid_loss_classification: "+str(valid_loss_classification)+" valid_loss_regression: "+str(valid_loss_regression))
  return model

## Instance of training

In [41]:
model=train(
          epochs_classifcation_only=11,
          epochs_complete_problem =0,
          input_size=2,
          num_heads=12,
          d_model=888,
          nb_of_pos_ids=len(vocab)+1,
          num_layers_lstm=6,
          num_layers_transformer=8,
          encoder_only=False,
          output_regression_size=2,
          output_classfication_size=len(vocab)+1,
          nb_batchs=70,
          dropout=0.1,
          max_len=100,
          weight_decay=0,
          lr=1e-4,
          learnable_pos_encoding=True,
          new_station_binary_classification=True,
          vocab=vocab, hidden_dim1=128, hidden_dim2=256,
          #concatenate_feature=True
          )

  delaunay = weights.Rook.from_dataframe(cells)


epoch: 0 train loss: 7.65563462437063 valid_acc: 0.002446235722510971  valid_loss_classification: 9.432498931884766 valid_loss_regression: 2.30960750579834
epoch: 1 train loss: 7.284949129906254 valid_acc: 0.02267592045065513  valid_loss_classification: 10.262377738952637 valid_loss_regression: 3.6408164501190186
epoch: 2 train loss: 6.868906332098919 valid_acc: 0.07460085275901777  valid_loss_classification: 7.939478397369385 valid_loss_regression: 2.449186325073242
epoch: 3 train loss: 6.207157314687535 valid_acc: 0.13517164109427032  valid_loss_classification: 7.274097442626953 valid_loss_regression: 2.5340797901153564
epoch: 4 train loss: 5.54513268539871 valid_acc: 0.17883041299679436  valid_loss_classification: 6.845264434814453 valid_loss_regression: 2.5623276233673096
epoch: 5 train loss: 4.899852531543677 valid_acc: 0.227344309234073  valid_loss_classification: 6.284029006958008 valid_loss_regression: 2.514657974243164
epoch: 6 train loss: 4.565951654876488 valid_acc: 0.244866

In [None]:
model=train(
          epochs_classifcation_only=50,
          epochs_complete_problem =50,
          input_size=2,
          num_heads=10,
          d_model=300,
          nb_of_pos_ids=len(vocab)+1,
          num_layers_lstm=1,
          num_layers_transformer=0,
          encoder_only=False,
          output_regression_size=2,
          output_classfication_size=len(vocab)+1,
          nb_batchs=40,
          dropout=0,
          max_len=100,
          weight_decay=0,
          lr=5e-4,
          learnable_pos_encoding=True,
          new_station_binary_classification=False
          )

epoch: 0 train loss: 7.329884394621238 valid_acc: 0.19729233450561762  valid_loss_classification: 7.7301788330078125 valid_loss_regression: 1.5886894464492798
epoch: 1 train loss: 5.976874155875964 valid_acc: 0.4469141950141608  valid_loss_classification: 6.434479713439941 valid_loss_regression: 1.7368848323822021
epoch: 2 train loss: 4.723571055974716 valid_acc: 0.5731972238648034  valid_loss_classification: 5.147871971130371 valid_loss_regression: 1.7278903722763062
epoch: 3 train loss: 3.8915583231510262 valid_acc: 0.6259998132644486  valid_loss_classification: 4.435739994049072 valid_loss_regression: 1.745749831199646
epoch: 4 train loss: 3.27751027009426 valid_acc: 0.6592574149575177  valid_loss_classification: 4.031399726867676 valid_loss_regression: 1.8383245468139648
epoch: 5 train loss: 2.878502729611519 valid_acc: 0.6748934051227786  valid_loss_classification: 3.7951579093933105 valid_loss_regression: 1.8549515008926392
epoch: 6 train loss: 2.607596727517935 valid_acc: 0.6838

In [64]:
model=train(
          epochs_classifcation_only=30,
          epochs_complete_problem =0,
          input_size=2,
          num_heads=10,
          d_model=300,
          nb_of_pos_ids=len(vocab)+1,
          num_layers_lstm=1,
          num_layers_transformer=0,
          encoder_only=False,
          output_regression_size=2,
          output_classfication_size=len(vocab)+1,
          nb_batchs=70,
          dropout=0.1,
          max_len=100,
          weight_decay=0,
          lr=5e-4,
          learnable_pos_encoding=True,
          new_station_binary_classification=True,
          vocab=vocab, hidden_dim1=128, hidden_dim2=256,
          )


  delaunay = weights.Rook.from_dataframe(cells)


epoch: 0 train loss: 14.458231829214787 valid_acc: 0.27099063209984126  valid_loss_classification: 13.874046325683594 valid_loss_regression: 1.8129009008407593
epoch: 1 train loss: 11.706424934276637 valid_acc: 0.4846223273474215  valid_loss_classification: 10.610663414001465 valid_loss_regression: 1.8791038990020752
epoch: 2 train loss: 9.63578942893208 valid_acc: 0.5779029597584887  valid_loss_classification: 9.373394012451172 valid_loss_regression: 2.0248842239379883
epoch: 3 train loss: 8.174956107485121 valid_acc: 0.6242133764899941  valid_loss_classification: 8.772051811218262 valid_loss_regression: 1.958383321762085
epoch: 4 train loss: 7.144522929537123 valid_acc: 0.6485948149761912  valid_loss_classification: 7.653692245483398 valid_loss_regression: 2.0124785900115967
epoch: 5 train loss: 6.341517552085545 valid_acc: 0.6618219165292085  valid_loss_classification: 7.010973930358887 valid_loss_regression: 1.9042747020721436
epoch: 6 train loss: 5.905943448992743 valid_acc: 0.670

In [43]:
def evaluate(model,dataloader,criterion_classification,criterion_regression):
  model.eval()
  with torch.no_grad():
    acc=0
    nb_points=0
    repeat=0
    not_repeat=0
    correct_not_repeat=0
    correct_repeat=0
    incorrect_not_repeat_as_repeat=0
    incorrect_not_repeat=0
    for x, month, day, hour, minute, second, pos_ids, time_targets, target_pos_ids, lengths in dataloader:
      batch_size=x.shape[0]
      x=x.float().cuda()
      pos_ids=pos_ids.cuda()
      time_targets = time_targets.cuda()
      target_pos_ids = target_pos_ids.cuda()
      with autocast(device_type="cuda"):
        out=model(x,month,day,hour,minute,second,pos_ids,lengths,reg=True)
        x=out["next_station"]
        y=out["time_regression"]

        target_pos_ids=torch.nn.utils.rnn.pack_padded_sequence(target_pos_ids, lengths=lengths,batch_first=True, enforce_sorted=False)
        time_targets=torch.nn.utils.rnn.pack_padded_sequence(time_targets, lengths=lengths,batch_first=True, enforce_sorted=False)
        ## compute mask
        mask_time_targets = (time_targets.data != -1)
        loss_classification=criterion_classification(x.data,target_pos_ids.data)
        #print("predicted : ",x.data.argmax(dim=1),"ground truth : ",target_pos_ids.data)
        acc+=(x.data.argmax(dim=1)==target_pos_ids.data).sum()
        pred=x.data.argmax(dim=1)
        pos_ids=torch.nn.utils.rnn.pack_padded_sequence(pos_ids, lengths=lengths,batch_first=True, enforce_sorted=False)
        for i in range(len(target_pos_ids.data)):
          if target_pos_ids.data[i]==pos_ids.data[i]:
            repeat+=1

            if target_pos_ids.data[i]==pred[i]:
              correct_repeat+=1
          else:
            not_repeat+=1
            if target_pos_ids.data[i]==pred[i]:
              correct_not_repeat+=1
            if target_pos_ids.data[i]!=pred[i]:
              incorrect_not_repeat+=1

          if pred[i]==pos_ids.data[i] and target_pos_ids.data[i]!=pos_ids.data[i]:
            incorrect_not_repeat_as_repeat+=1




        nb_points+=x.data.shape[0]

        loss_regression=criterion_regression(y.data,time_targets.data)
        loss_regression = (loss_regression * mask_time_targets.data.float()).mean()
        loss=loss_classification+loss_regression
    print(nb_points,"repeat: ",repeat," not_repeat: ",not_repeat," correct_repeat/repeat: ",correct_repeat/repeat," correct_not_repeat/not_repeat: ",correct_not_repeat/not_repeat,incorrect_not_repeat_as_repeat/incorrect_not_repeat)
    return acc.item()/(nb_points),loss_classification.item(),loss_regression.item()

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.9290768137039168  correct_not_repeat/not_repeat:  0.1201810588003761 0.8826394929787499


(0.703362682824141, 1.4782196283340454, 0.7996112108230591)

In [None]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.9280019635058736  correct_not_repeat/not_repeat:  0.12199602020511251 0.8838912133891214


(0.7030942051535509, 1.4916588068008423, 0.8002792000770569)

In [59]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.8000778631639527  correct_not_repeat/not_repeat:  0.2599330869650784 0.6543552771540007


(0.6493559586788458, 1.520933985710144, 0.8001168966293335)

In [67]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)

163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.844417549680084  correct_not_repeat/not_repeat:  0.20434716056941682 0.7496014950805254


(0.6658124195329709, 1.6400545835494995, 1.670668125152588)

In [44]:
criterion_classification=torch.nn.CrossEntropyLoss(ignore_index=len(vocab))
criterion_regression=mse_loss = nn.MSELoss(reduction='none')
evaluate(model,test_dataloader,criterion_classification,criterion_regression)



163887 repeat:  118156  not_repeat:  45731  correct_repeat/repeat:  0.25378313416161685  correct_not_repeat/not_repeat:  0.32549036758435196 0.19419049471568436


(0.2737923081147376, 3.320213556289673, 2.4352128505706787)

In [38]:
import gc
#del target_pos_ids
del model
del optimizer_encoder
#del pos_ids
#del x
del loss



NameError: name 'model' is not defined

In [40]:
torch.cuda.empty_cache()
gc.collect()
torch.cuda.memory_allocated()

50673152