# Temporal Node to Vec
Authors: Mohammad Ghassemi (ghassem3@msu.edu), Sanaz Hasanzadeh

## Step 1: Import Required Libraries

In [3]:
# NOTE!!! YOU HAVE TO RUN THIS CELL TWICE!
from math import ceil
from config import params
from metrics import get_metrics
from utils import graph_utils
import loader
import models

In [4]:
#!pip install pandasql
import pandas   as pd
import pandasql as ps
import networkx as nx
import numpy    as np
import datetime as datetime

## Step 2: Import required Data

In [12]:
# First let's import the dataframe
df = pd.read_csv('data/people_data_u19.csv')
df

Unnamed: 0,pmid,pub_date,year,time_string,paper_post_2017,author_had_u19_in_2017,is_pi_a,name_key_a,papers_before_2017_a,papers_after_2017_a,is_pi_b,name_key_b,author_had_u19_in_2017_b,papers_before_2017_b,papers_after_2017_b
0,25565280,2015-01-21,2015,"datetime(2015,1,21,0,0)",0,0,0,G Broussard,3,13,0,R Liang,0,3,3
1,25565280,2015-01-21,2015,"datetime(2015,1,21,0,0)",0,0,1,L Tian,3,19,0,R Liang,0,3,3
2,25565280,2015-01-21,2015,"datetime(2015,1,21,0,0)",0,0,0,G Broussard,3,13,1,L Tian,0,3,19
3,25663667,2015-02-06,2015,"datetime(2015,2,6,0,0)",0,0,1,C Eroglu,20,36,0,W Chung,0,3,0
4,25663667,2015-02-06,2015,"datetime(2015,2,6,0,0)",0,0,0,N Allen,3,8,0,W Chung,0,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19592,30482694,2018-12-05,2018,"datetime(2018,12,5,0,0)",1,1,1,C Brody,5,17,1,D Tank,1,11,27
19593,30482694,2018-12-05,2018,"datetime(2018,12,5,0,0)",1,1,0,C Guo,4,2,1,D Tank,1,11,27
19594,30592258,2018-12-28,2018,"datetime(2018,12,28,0,0)",1,1,0,D Hampton,0,2,0,J Haley,1,0,2
19595,30592258,2018-12-28,2018,"datetime(2018,12,28,0,0)",1,1,0,E Marder,6,4,0,J Haley,1,0,2


## Step 3: Format the data

In [6]:
# Takes only the three columns we care about
graph_df = ps.sqldf("""SELECT pub_date as time,
                   name_key_a as id1,
                   name_key_b as id2
                 FROM df
                 LIMIT 10000
            """)

# Take the year from the timestamp column.
graph_df['time'] = pd.to_datetime(graph_df['time'])
cleansed         = [x.year for x in list(graph_df['time'])]
graph_df['time'] = cleansed 
graph_df

Unnamed: 0,time,id1,id2
0,2015,G Broussard,R Liang
1,2015,L Tian,R Liang
2,2015,G Broussard,L Tian
3,2015,C Eroglu,W Chung
4,2015,N Allen,W Chung
...,...,...,...
9995,2015,R Bruno,W Grueber
9996,2015,R Mann,W Grueber
9997,2015,V Voleti,W Grueber
9998,2015,C Lacefield,R Mann


## Step 4: Generate the Network X Graph

In [7]:
graph_nx = loader.dataset_loader.df2graph(graph_df, 'id1', 'id2', 'time', create_using=nx.Graph())

## Step 5: Generate Node Embeddings

In [8]:
tnodeembed = models.tNodeEmbed(graph_nx, task        = 'temporal_link_prediction', 
                                         dump_folder = 'results')

[2015 2016 2017 2018]
2018
2018


In [9]:
X, y = tnodeembed.get_dataset()

In [10]:
X['train'] = graph_utils.nodes2embeddings(X['train'], graph_nx         = tnodeembed.graph_nx, 
                                                      train_time_steps = tnodeembed.train_time_steps, 
                                                      dimensions       = 128)
tnodeembed.fit(X['train'] ,y['train'])


2021-08-24 17:39:13.045683: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-08-24 17:39:13.237568: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)




In [11]:
X['test'] = graph_utils.nodes2embeddings(X['test'], graph_nx         = tnodeembed.graph_nx, 
                                                    train_time_steps = tnodeembed.train_time_steps,
                                                    dimensions       = 128)
tnodeembed.predict(X['test'])

array([[6.99065626e-01],
       [7.52841234e-01],
       [7.97684193e-01],
       [8.25304747e-01],
       [5.89627028e-03],
       [1.45763159e-04],
       [6.63841729e-06],
       [7.04576313e-01],
       [7.50896275e-01],
       [7.74086833e-01],
       [2.61683954e-06],
       [7.24922666e-06],
       [6.51258230e-03],
       [1.54405832e-04],
       [7.64765620e-01],
       [7.56891847e-01],
       [8.51655602e-01],
       [2.02634931e-03],
       [4.04198468e-02],
       [8.85966659e-01],
       [9.99860823e-01],
       [6.45560026e-03],
       [4.01914120e-03],
       [1.57484412e-03],
       [2.11572647e-03],
       [9.99534965e-01],
       [9.99580383e-01],
       [9.99460816e-01],
       [9.97514248e-01],
       [9.97310698e-01],
       [2.67288089e-03],
       [1.99141860e-01],
       [7.31655955e-03],
       [5.56439161e-04],
       [3.36956382e-02],
       [9.99401867e-01],
       [9.95731413e-01],
       [9.95176017e-01],
       [1.11672878e-02],
       [5.62119782e-02],
