# Trend prediction for scientific concepts

In [3]:
import pickle
import random
import numpy as np
from scipy import sparse, special
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

from datetime import date, timedelta, datetime
import time
import requests
# random.seed(42)

In [6]:
#   Training args   #
#######################
SEQSIZE = 3

RATIO = 0.5 # training 0/1 ratio

NN_BATCHSIZE = 500
EPOCHS = 10
CUTOFFTYPE = 'undirctf'#  ['dirctf,undirctf']

#   Preprocessing   #
##################
NUM_OF_VERTICES=64719 # number of vertices of the semantic net
VERBOSE = 2

number_of_years = 3
number_of_months = 1
time_snapshots = 3

raw_data_connected_ratio = 0.5

EDGES_USED = 1*(10**6)
FILTERUNCONNECTED = True
TESTSIZE = 1*(10**3)

NUM_OF_FEATURES = 5

BATCH_SIZE = 1*(10**6)
ratio = 0.5
train_test_connected_number = []

rndidtest = np.random.permutation(range(TESTSIZE))

## Dataset names

Please download the datasets in a zip file from the URL below, and extract the *.pkl files in the root directory of this notebook.

In [None]:
file_names = ['SemanticGraph_delta_1_cutoff_0_minedge_1.pkl',
'SemanticGraph_delta_1_cutoff_0_minedge_3.pkl',
'SemanticGraph_delta_1_cutoff_5_minedge_1.pkl',
'SemanticGraph_delta_1_cutoff_5_minedge_3.pkl',
'SemanticGraph_delta_1_cutoff_25_minedge_1.pkl',
'SemanticGraph_delta_1_cutoff_25_minedge_3.pkl',
'SemanticGraph_delta_3_cutoff_0_minedge_1.pkl',
'SemanticGraph_delta_3_cutoff_0_minedge_3.pkl',
'SemanticGraph_delta_3_cutoff_5_minedge_1.pkl',
'SemanticGraph_delta_3_cutoff_5_minedge_3.pkl',
'SemanticGraph_delta_3_cutoff_25_minedge_1.pkl',
'SemanticGraph_delta_3_cutoff_25_minedge_3.pkl',
'SemanticGraph_delta_5_cutoff_0_minedge_1.pkl',
'SemanticGraph_delta_5_cutoff_0_minedge_3.pkl',
'SemanticGraph_delta_5_cutoff_5_minedge_1.pkl',
'SemanticGraph_delta_5_cutoff_5_minedge_3.pkl',
'SemanticGraph_delta_5_cutoff_25_minedge_1.pkl',
'SemanticGraph_delta_5_cutoff_25_minedge_3.pkl']

In [None]:
def get_undir(matrix):
    return matrix + matrix.T

def get_matrix(pairs, number_of_vertices = 64719, convert_to_undirected = True):
    matrix = sparse.csr_matrix((np.ones(len(pairs)), (pairs[:,0], pairs[:,1])), shape=(number_of_vertices,number_of_vertices))
    if convert_to_undirected:
        matrix = get_undir(matrix)
    return matrix

def get_degs(matrix, convert_to_undirected = True):
    if convert_to_undirected:
        matrix = get_undir(matrix)
    return np.array(matrix.sum(0))[0]

def get_percentiles(arr, percentile = 1):
    percentiles = np.percentile(arr, np.arange(0,100,percentile))
    # print(percentiles)
    return percentiles

In [None]:
VERBOSE = 0
def print_progress(print_output, importance_rank = 2, verbose = VERBOSE):
    '''
    The entire runtime receives a fixed VERBOSE value
    Prints the info while executing:
    2: Every thing
    1: Only completion of each stage.
    0: Nothing'''
    if importance_rank <= verbose:
        if type(print_output) == tuple:
            print(*print_output)
        else:
            print(print_output)

<a id='train'></a>
# 3. Creating historic training data

In [None]:
def create_training_data(full_graph,year_start,years_delta,filter_data = True, edges_used=100000,
                         vertex_degree_cutoff=10, connected_ratio = 0.5, return_graphs = False, unique_samples = False, weight = 3):

    # day_curr=date(2014,12,31)
    day_origin = date(1990,1,1)
    years=[year_start,year_start+years_delta]    
    
    all_G=[]
    all_edge_lists=[]
    all_sparse=[]
    for yy in years:
        print_progress(f'years_delta create: {years_delta}', 1, VERBOSE)
        print_progress(('Create Graph for ', yy),2, VERBOSE)
        day_curr=date(yy,12,31)
        # print('    All the edges before ', day_curr)
        all_edges_curr=full_graph[full_graph[:,2]<(day_curr-day_origin).days]
        adj_mat_sparse_curr = sparse.csr_matrix((np.ones(len(all_edges_curr)), (all_edges_curr[:,0], all_edges_curr[:,1])), shape=(NUM_OF_VERTICES,NUM_OF_VERTICES))
        G_curr=nx.from_scipy_sparse_matrix(adj_mat_sparse_curr, parallel_edges=False, create_using=None, edge_attribute='weight')

        all_G.append(G_curr)
        all_sparse.append(adj_mat_sparse_curr)
        all_edge_lists.append(all_edges_curr)

        print_progress(('    Done: Create Graph for ', yy),2, VERBOSE)
        print_progress(('    num of edges: ', G_curr.number_of_edges()),2, VERBOSE)


    if CUTOFFTYPE == 'dirctf':
        all_degs=np.array(all_sparse[0].sum(0))[0]
    ######## 
    # Modify degree calculation to adapt undirected graph
    if CUTOFFTYPE == 'undirctf':
        all_degs=np.array((all_sparse[0]+all_sparse[0].T).sum(0))[0]


    ## Create all edges to be predicted
    all_vertices=np.array(range(NUM_OF_VERTICES))

    unconnected_vertex_pairs=[]
    unconnected_vertex_pairs_solution=[]
# filtering the pairs by degree cutoff and ration of connected to unconnected
    if filter_data == True:
        large_deg_pairs = []
        for pair in all_edge_lists[1][:,0:2]: #full_graph[:,0:2]: >>> debugged for faster execution 20-11-2021
            if all_degs[pair[0]]>=vertex_degree_cutoff and all_degs[pair[1]]>=vertex_degree_cutoff:
                large_deg_pairs.append(pair)
        large_deg_pairs = np.array(large_deg_pairs)
        print_progress((f'    Cutoff size: {vertex_degree_cutoff}'),2, VERBOSE)       
        print_progress((f'    Potential pairs after cutoff: {large_deg_pairs.shape[0]}'),2, VERBOSE)

        vertex_large_degs=all_vertices[all_degs>=vertex_degree_cutoff] # use only vertices with degrees larger than 10.

        ################
        # enable or diable the unique edges here
        if unique_samples:
            large_deg_pairs = np.unique(large_deg_pairs, axis=0)
        print_progress((f'    Number of pairs: {large_deg_pairs.shape[0]}'),2, VERBOSE)
        
        # Shuffle the large deg pairs
        idxrnd = np.random.permutation(np.arange(len(large_deg_pairs)))
        large_deg_pairs = large_deg_pairs[idxrnd]

        time_start=time.time()

        for pair in large_deg_pairs:
            v1,v2 = pair[0], pair[1]

            edge_current = all_G[0].has_edge(v1,v2) # 0/1 (Boolean like): if v1 and v2 are connected now
          
            edge_sol = (all_G[1].get_edge_data(v1,v2)['weight'] > (weight-1)) if all_G[1].has_edge(v1, v2) else False

            if v1!=v2 and not edge_current:

                    ###### Filter unconnected pairs out 
                if FILTERUNCONNECTED == True:
                    if edge_sol: #(not edge_sol and random.random()<0.005) or :
                        unconnected_vertex_pairs.append((v1,v2))
                        unconnected_vertex_pairs_solution.append(edge_sol)

                        if len(unconnected_vertex_pairs)%10**6==0:
                            time_end=time.time()
                            print_progress(('    edge progress (',time_end-time_start,'sec): ',len(unconnected_vertex_pairs)/10**6,'M/',edges_used/10**6,'M'),2, VERBOSE)
                            time_start=time.time()
                        if len(unconnected_vertex_pairs)>= (edges_used*connected_ratio):
                            break
                else:
                    unconnected_vertex_pairs.append((v1,v2))
                    unconnected_vertex_pairs_solution.append(edge_sol)
                    if len(unconnected_vertex_pairs)%10**6==0:
                        time_end=time.time()
                        print_progress(('    edge progress (',time_end-time_start,'sec): ',len(unconnected_vertex_pairs)/10**6,'M/',edges_used/10**6,'M'),2, VERBOSE)
                        time_start=time.time()
        ##### generating negative sample (unconnected at end year)
        train_0_samples = []
        train_0_samples_solutions = []
        vertex_large_degs_list = [v for v in vertex_large_degs]

        while len(train_0_samples) < edges_used*(1-connected_ratio): #len(unconnected_vertex_pairs):
            v1,v2=random.sample(vertex_large_degs_list, 2)
            if not all_G[0].has_edge(v1,v2) and ((all_G[1].get_edge_data(v1,v2)['weight'] < weight) if all_G[1].has_edge(v1, v2) else True):
                train_0_samples.append([v1,v2])
                train_0_samples_solutions.append(0)
                if len(train_0_samples)%10**6==0:
                    print_progress((f'Negative sample size: {len(train_0_samples)}') ,2 , VERBOSE)                    
        unconnected_vertex_pairs = unconnected_vertex_pairs + train_0_samples
        unconnected_vertex_pairs_solution = unconnected_vertex_pairs_solution + train_0_samples_solutions
    #######
    # end of filtered data selection
    if filter_data == False:
        vertex_large_degs=all_vertices[all_degs>=vertex_degree_cutoff] # use only vertices with degrees larger than 10.
        vertex_large_degs_list = [v for v in vertex_large_degs]
        all_vertices_list = [v for v in all_vertices]
        while len(unconnected_vertex_pairs) < edges_used: #len(unconnected_vertex_pairs):
            v1,v2=random.sample(vertex_large_degs_list, 2)

            if v1!=v2 and not all_G[0].has_edge(v1,v2):
                unconnected_vertex_pairs.append((v1,v2))
                temp_sol = (all_G[1].get_edge_data(v1,v2)['weight'] > (weight-1)) if all_G[1].has_edge(v1, v2) else False
                unconnected_vertex_pairs_solution.append(temp_sol)

    #########
    # end of no filter data selection   
    print_progress(f'Data of year: {year_start}',1,VERBOSE)
    print_progress(('Number of unconnected vertex pairs for prediction: ', len(unconnected_vertex_pairs_solution)) ,1, VERBOSE)
    print_progress(('Number of vertex pairs that will be connected: ' , sum(unconnected_vertex_pairs_solution)) ,1, VERBOSE)
    print_progress(('Ratio of vertex pairs that will be connected: ' , sum(unconnected_vertex_pairs_solution)/len(unconnected_vertex_pairs_solution)),1, VERBOSE)
    
    unconnected_vertex_pairs=np.array(unconnected_vertex_pairs, dtype= 'int32')
    unconnected_vertex_pairs_solution=np.array(list(map(int, unconnected_vertex_pairs_solution)))
    all_edge_lists=all_edge_lists[0]
    
    if return_graphs:
        return all_edge_lists, unconnected_vertex_pairs, unconnected_vertex_pairs_solution, all_G
    else:
        return all_edge_lists, unconnected_vertex_pairs, unconnected_vertex_pairs_solution

<a id='features'></a>
## 4.2 Creating features

# Matrix break down

In [None]:
def calculate_v_secs(sparse_mat, sorted_v, sec_number = 1000):
    '''
    Calculates the start and end indices of each section of v_list that corresponds to a section of matrix in the divided matrix
    '''
    mat_size = sparse_mat.shape[0]
    sec_width = mat_size//sec_number

    if mat_size % sec_width != 0:
      list_lims = np.empty([sec_number+1,2], dtype = int)
    else:
      list_lims = np.empty([sec_number,2], dtype = int)

    list_lims[:] = -1 # np.nan

    for sec_index in range(sec_number):
      sec_start = sec_index*sec_width
      sec_end = sec_start + sec_width
      if sec_start <= sorted_v[:,0].max():
        list_sec_lims = np.where(np.logical_and(sorted_v[:,0]>=sec_start, sorted_v[:,0]<sec_end))[0] 
        if list_sec_lims.size !=0:
          list_lims[sec_index,0] = list_sec_lims[0]
          # By adding a "1" end itself is not included. As in v[a:b], last selected index is b-1
          list_lims[sec_index,1] = list_sec_lims[-1]+1
        else:
          list_lims[sec_index,0] = -1 # np.nan
          list_lims[sec_index,1] = -1 # np.nan

      else:
        break

    if mat_size % sec_width != 0:
      sec_start = sec_number*sec_width
      sec_end = mat_size # incorrect <<< sorted_v[:,0].max()
      if sec_start <= sorted_v[:,0].max():
        list_sec_lims = np.where(np.logical_and(sorted_v[:,0]>=sec_start, sorted_v[:,0]<sec_end))[0] 
        if list_sec_lims.size !=0:
          list_lims[sec_number,0] = list_sec_lims[0]
          list_lims[sec_number,1] = list_sec_lims[-1]+1
    list_lims = np.array(list_lims)
    # indices of sorted vlist, sorted vlist, sorting back indices
    #sort_index
    print_progress(('Vlist limits calculated.') ,2, VERBOSE)
    return list_lims

In [None]:
# breaks matrix to row sections

def matrix_sec(sparse_mat, vlist, sec_number=1000):
    # there might be a small remaining sec at the end so the total secs will be sec_number + 1
    sort_index = np.argsort(vlist[:,0])
    sorted_v = vlist[sort_index]
    sort_back = np.argsort(sort_index)

    v_list_lims = calculate_v_secs(sparse_mat,sorted_v, sec_number)

    mat_size = sparse_mat.shape[0]
    FR = np.array(sparse_mat.sum(0))[0]
    FF = []
    MF = []


    sec_width = mat_size//sec_number
    if mat_size % sec_width > 0:
        total_sec_number = sec_number + 1
    else:
      total_sec_number = sec_number
    
    for sec_index in range(total_sec_number):

        start_row = sec_index*sec_width
        if sec_index < total_sec_number - 1:
            end_row = start_row + sec_width
        else:
            end_row = mat_size

        mat_sec = sparse_mat[start_row:end_row]
        mat_square_sec = mat_sec*sparse_mat
        # FF_sec = mat_square_sec.sum(1)  debugged version below
        FF_sec = mat_square_sec.sum(0)
        FF.append(FF_sec)      

        # Checking if the section indices exists in vlist (sorted vlist)

        if v_list_lims[sec_index,0] > -1:
            for index, pair in enumerate(sorted_v[v_list_lims[sec_index,0] : v_list_lims[sec_index,1]]):
                MF.append(mat_square_sec[pair[0]-sec_index*sec_width,pair[1]])
        del(mat_square_sec)

        if mat_size % sec_width != 0:
            start_row = sec_number*sec_width
            end_row = mat_size         
    
    # FF = np.concatenate([np.array(sec) for sec in FF])   debugged version is below
    FF = np.concatenate([np.array(sec) for sec in FF], axis = 0).sum(axis=0)
    FF = np.array(FF)
    FF = FF.reshape(FF.shape[0],)

    # Sorting MF back to original order
    MF = np.array(MF)
    MF = MF[sort_back]
    print_progress(('FR, FF, MF calculated.') ,2, VERBOSE)
    return(FR, FF, MF)

In [None]:
# Calculating graph features - matrix break-down is used due to memory usage limits
def compute_all_properties(all_sparse,FR,FF,CLE,MF,ii,v1,v2):
    """
    Computes hand-crafted properties for one vertex in vlist
    """
    all_properties=[]
    for i in range(len(FR)):
        
        all_properties.append(FR[i][v1])
        all_properties.append(FR[i][v2])
        all_properties.append(FF[i][v1])
        all_properties.append(FF[i][v2])

        all_properties.append(MF[i][ii])

    return all_properties

def compute_all_properties_of_list(all_sparse,vlist, minmaxscale=True):

    time_start=time.time()
    FR = []
    FF = []
    CLE = []
    MF = []

    for index, mat_snapshot in enumerate(all_sparse):
        mat = mat_snapshot + mat_snapshot.T


        CLE.append(np.zeros(NUM_OF_VERTICES))


        FR_temp, FF_temp, MF_temp = matrix_sec(mat, vlist, sec_number = 10)

        ## Friends (degrees)
        # FR_temp = np.log(FR_temp+1)
        if FR_temp.max()>0 and minmaxscale:
            print(f'FR_max_{index}: {FR_temp.max()}')
            FR_temp=FR_temp/FR_temp.max()
        FR.append(FR_temp)

        ## Friends of Friends (total shared neiboughrs)
        # FF_temp = np.log(FF_temp+1)
        if FF_temp.max()>0 and minmaxscale:
            FF_temp=FF_temp/FF_temp.max()
        FF.append(FF_temp)

        # MF_temp = np.log(MF_temp+1)
        if MF_temp.max()>0 and minmaxscale:
            MF_temp = MF_temp/MF_temp.max()
        MF.append(MF_temp)


    FRmax = max([i.max() for i in FR])
    FFmax = max([i.max() for i in FF])
    MFmax = max([i.max() for i in MF])

    """
    Computes hand-crafted properties for all vertices in vlist
    """

    all_properties=[]
    print_progress(('    Computed all matrix squares, ready to ruuuumbleeee...'),2,VERBOSE)

    all_properties = np.zeros([len(vlist), len(all_sparse)*5], dtype='float32')
    for ii in range(len(vlist)):
        vals=compute_all_properties(all_sparse,
                                    FR,
                                    FF,
                                    CLE,
                                    MF,
                                    ii,
                                    vlist[ii][0],
                                    vlist[ii][1])

        # all_properties.append(vals)
        all_properties[ii] = vals
        if ii%10**5==0:
            print_progress(('compute features: (',time.time()-time_start,'sec) ',ii/10**6,'M/',len(vlist)/10**6,'M'),2)
            time_start=time.time()
    print_progress('Feature generation completed.',1,VERBOSE)
    return all_properties

In [None]:
# Generate training batches

def data_gen(data_train0, data_train1, batch_size=1000000):
  data_train0 = np.array(data_train0)
  data_train1 = np.array(data_train1)
  y_batch = []
  x_batch = []
  for i in range(batch_size//2):
      for output, data in enumerate([data_train0, data_train1]):
          rndid = np.random.randint(0, len(data), 1)
          x_batch.append(data[rndid][0])
          y_batch.append(output)
  return np.array(x_batch), np.array(y_batch)

def data_gen_rnd(data_train0, data_train1, batch_size=1000000, ratio = 0.5):
  data_train0 = np.array(data_train0)
  data_train1 = np.array(data_train1)
  y_batch = []
  x_batch = []
  for i in range(batch_size):
      if random.random() > ratio:
          rnd_output_id = 0
      else:
          rnd_output_id = 1
      data = [data_train0, data_train1][rnd_output_id]
      rndid = np.random.randint(0, len(data), 1)
      x_batch.append(data[rndid][0])
      y_batch.append(rnd_output_id)
  return np.array(x_batch), np.array(y_batch)  
    

In [1]:
def preprocess_data(test_calculated = False, minmaxscale = True, years_delta = 3, weight=3):
    print_progress(f'Cutoff degree: {CUTOFF}', 1, VERBOSE)
    print_progress(f'years_delta: {years_delta}', 1, VERBOSE)
    # Training data: Predicting 2014 from 2011
    train_dynamic_graph_sparse,data_edges_train,solution_train = create_training_data(full_dynamic_graph_sparse_2017,
                                                                                              YEAR_START_TRAIN-years_delta,
                                                                                              years_delta,
                                                                                              edges_used=EDGES_USED,
                                                                                              vertex_degree_cutoff=CUTOFF,
                                                                                              filter_data=True,
                                                                                              connected_ratio = raw_data_connected_ratio,
                                                                                              weight = weight
                                                                                      )

    #######################
    ############################
    # Creating adjacency matrices

    # Training matrices
    year_start = YEAR_START_TRAIN
    day_origin = date(1990,1,1)
    years=[(year_start-years_delta) - i for i in range(number_of_years)]

    train_sparse_mat=[]
    for yy in years:
        for month in [12,6][0:number_of_months]:
            if month == 12:
                day_curr=date(yy,month,31)
            if month == 6:
                day_curr=date(yy,month,30)
            train_edges_curr=train_dynamic_graph_sparse[train_dynamic_graph_sparse[:,2]<(day_curr-day_origin).days]

            adj_mat_sparse_curr = sparse.csr_matrix((np.ones(len(train_edges_curr)), (train_edges_curr[:,0], train_edges_curr[:,1])), shape=(NUM_OF_VERTICES,NUM_OF_VERTICES))
            train_sparse_mat.append(adj_mat_sparse_curr)
            print_progress((f'    Created Graph Matrix for {yy}-{month} with {adj_mat_sparse_curr.size} edges'),2,VERBOSE)


    print_progress('    Shuffle train data...',2,VERBOSE)
    x = np.arange(len(data_edges_train))
    random.shuffle(x)

    data_edges_train = data_edges_train[x]
    solution_train = solution_train[x]


    print_progress(('Training, connected  : ',sum(solution_train==1)),1,VERBOSE)
    print_progress(('Training, unconnected: ',sum(solution_train==0),1,VERBOSE))

    #################
    # Creating the features

    data_train=compute_all_properties_of_list(train_sparse_mat,data_edges_train, minmaxscale=minmaxscale)
    print_progress(f'Train data features of {YEAR_START_TRAIN-years_delta} and train solutions of {YEAR_START_TRAIN} computed.',1,VERBOSE)

    data_train0=[]
    data_train1=[]
    for ii in range(len(data_edges_train)):
        if solution_train[ii]==1:
            data_train1.append(data_train[ii])
        else:
            data_train0.append(data_train[ii])

    data_train1 = np.array(data_train1, dtype='float32')
    data_train0 = np.array(data_train0, dtype='float32')


    print(f'Number of features: {len(data_train[0])}')
    print_progress('All the train features for the ',2,VERBOSE)

    NUM_OF_FEATURES = 5
    data_train0_arr = np.array(data_train0)
    data_train0_arr_reshaped = data_train0_arr.reshape(data_train0_arr.shape[0], time_snapshots, NUM_OF_FEATURES)
    data_train1_arr = np.array(data_train1)
    data_train1_arr_reshaped = data_train1_arr.reshape(data_train1_arr.shape[0], time_snapshots, NUM_OF_FEATURES)


    del(data_train0_arr)
    del(data_train1_arr)

    xtrain,ytrain = data_gen_rnd(data_train0_arr_reshaped, data_train1_arr_reshaped, batch_size=BATCH_SIZE, ratio = ratio)
    xtest,ytest = xtrain[0:1000],ytrain[0:1000]

    del(data_train0_arr_reshaped)
    del(data_train1_arr_reshaped)

    return (xtrain,ytrain,xtest,ytest)

## Model consistency experiment

In [2]:
# LSTM Model
def train_nn():
    from keras.models import Sequential
    from keras.layers import Dense
    from keras.layers import LSTM
    from keras import callbacks

    model = Sequential()
    model.add(LSTM(units=400,
                  return_sequences=True,
                  input_shape=(None, NUM_OF_FEATURES,)))
    model.add(LSTM(units=400,
                  return_sequences=False,
                  input_shape=(None, NUM_OF_FEATURES,)))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['AUC'])
    # print(model.summary())
    history = model.fit(xtrain[:,:,FMASK], ytrain, epochs=EPOCHS, batch_size=NN_BATCHSIZE,
                  validation_data=(xtest[:,:,FMASK], ytest), verbose=VERBOSE)
    return history, model


In [None]:
# Trains and evaluates the LSTM model on each of the 18 datasets
for current_experiment in range(18):

    with open(file_names[current_experiment], 'rb') as f:
        edges = pickle.load(f)
    print(file_names[current_experiment])
    full_dynamic_graph_sparse_2017, \
    unconnected_vertex_pairs_2017, \
    unconnected_vertex_pairs_solution, \
    YEAR_START_FILE, \
    YEARS_DELTA, \
    CUTOFF, \
    MIN_WEIGHT = edges


    # day_curr=date(2017,12,31)
    day_origin=date(1990,1,1)


    # Horizon size experiment
    history_list = []
    VERBOSE = 2

    history_list = []

    connected_edges_stats = []
    FMASK = [0,1,2,3,4]

    YEAR_START_TEST = YEAR_START_FILE + YEARS_DELTA
    YEAR_START_TRAIN = YEAR_START_FILE
    TEST_CALCULATED = False
    xtrain,ytrain,xtest,ytest = preprocess_data(test_calculated = False, minmaxscale = True, years_delta=YEARS_DELTA, weight=MIN_WEIGHT)

    NUM_OF_FEATURES = 5
    for train_num in range(3):
        hist, model = train_nn()
        history_list.append([hist])

    hist_array = np.array([[max(history_list[i][0].history['auc']),max(history_list[i][0].history['val_auc'])] for i in range(len(history_list))])
    hist_max = hist_array.max(axis=0)

    # Create properties for evaluation
    year_start = YEAR_START_FILE
    full_dynamic_graph_sparse = full_dynamic_graph_sparse_2017
    unconnected_vertex_pairs = unconnected_vertex_pairs_2017

    print('2) Makes predictions for '+str(year_start)+' -> '+str(year_start+YEARS_DELTA)+' data.')
    years=[year_start,year_start-1,year_start-2]#,year_start-4,year_start-7,year_start-12]

    print('2.1) Computes the 15 properties for the '+str(year_start)+' data.')
    eval_sparse=[]
    for yy in years:
        print('    Create Graph for ', yy)
        day_curr=date(yy,12,31)
        eval_edges_curr=full_dynamic_graph_sparse[full_dynamic_graph_sparse[:,2]<(day_curr-day_origin).days]
        adj_mat_sparse_curr = sparse.csr_matrix(
                                                (np.ones(len(eval_edges_curr)), (eval_edges_curr[:,0], eval_edges_curr[:,1])),
                                                shape=(NUM_OF_VERTICES,NUM_OF_VERTICES)
                                              )

        eval_sparse.append(adj_mat_sparse_curr)

    print('    compute all properties for evaluation')
    eval_examples=compute_all_properties_of_list(eval_sparse,unconnected_vertex_pairs)
    eval_examples=np.array(eval_examples)
    eval_examples_org=eval_examples 

    eval_reshaped = eval_examples_org.reshape(eval_examples_org.shape[0], 3, 5)

    scores = model.evaluate(eval_reshaped, unconnected_vertex_pairs_solution, verbose=0)

    with open(f'logs_{current_experiment+1}.txt', 'a') as myfile:
        myfile.write(f'File name: {file_names[current_experiment]}' + '\n')
        myfile.write(f'Validation AUC {scores[1]}' + '\n')
        myfile.write(f'Validatin Size {len(unconnected_vertex_pairs_solution)}' + '\n')
        myfile.write(f'Validation positives: {unconnected_vertex_pairs_solution.sum()}' + '\n')
        myfile.write(f'YEAR_START: {YEAR_START_FILE}' + '\n')
        myfile.write(f'YEARS_DELTA: {YEARS_DELTA}' + '\n')
        myfile.write(f'CUTOFF: {CUTOFF}' + '\n')
        myfile.write(f'MIN_WEIGHT {MIN_WEIGHT}' + '\n')
        myfile.write(f'Train AUC {hist_max[0]}' + '\n')
        myfile.write(f'Test AUC {hist_max[1]}' + '\n')
        myfile.write(f'Train Size {len(ytrain)}' + '\n')
        myfile.write(f'Train positives {ytrain.sum()}' + '\n')  