https://github.com/dayuyang1999/LDG

In [3]:
import os
import pickle

In [4]:
LDG_location = '/home/dalab5/Projects/MA/LDG'

In [5]:
os.chdir(LDG_location)

**Training log of "Running the baseline DyRep model [1] on Social Evolution:"**


```bash
~~~~~ Script arguments ~~~~~
data_dir ./SocialEvolution/
dataset social
prob 0.8
batch_size 200
n_hid 32
epochs 2
seed 1111
lr 0.0002
lr_decay_step [10]
weight 1
wdecay 0
model dyrep
bilinear False
bilinear_enc False
encoder None
sparse False
n_rel 2
device cuda
association CloseFriend
resume 
log_interval 300
results results
soft_attn False
freq False
verbose False
torch 1.8.0
start time: 2021-09-12 16:43:53.505488
experiment_ID:  buec-xiaoflambda_505488
gitcommit 2036957 

loading data from ./SocialEvolution/data_prob0.8.pkl
TRAIN
Event type=SMS, k=1, number of events=4319
Event type=Proximity, k=2, number of events=31011
Event type=Calls, k=3, number of events=8187
Event type=CloseFriend, k=0, number of events=365
TEST
Event type=SMS, k=1, number of events=288
Event type=Proximity, k=2, number of events=9094
Event type=Calls, k=3, number of events=1080
Event type=CloseFriend, k=0, number of events=73

number of training parameters: 3460

Starting training...

TRAIN epoch=1/2, batch=220/220, sec/iter: 2.9215, loss=0.287, loss components: [16.929515838623047, 40.473880767822266]
the model is saved to results/checkpoints/checkpoint_dygraphs_buec-xiaoflambda_505488_epoch1_batch220.pth.tar

TEST batch=53/53, loss=347.308, psi=[0.4648135006427765, 0.45860326290130615], loss1 min/max=0.0195/0.7698, loss2 min/max=0.0036/0.1480, integral time stamps=5000, sec/iter=3.0337
----------------------------------------------------------------------------------------------------
Epoch 1: results per event type for all test time slots: 
====== CloseFriend       (73      events):      MAR=36.79+-16.89         HITS_10=0.212+-0.261
====== SMS               (288     events):      MAR=19.54+-14.77         HITS_10=0.292+-0.423
====== Proximity         (9094    events):      MAR=27.69+-10.18         HITS_10=0.037+-0.164
====== Calls             (1080    events):      MAR=28.19+-16.80         HITS_10=0.198+-0.344
====== Com               (10462   events):      MAR=27.52+-11.27         HITS_10=0.061+-0.211
----------------------------------------------------------------------------------------------------

TRAIN epoch=2/2, batch=220/220, sec/iter: 2.9055, loss=0.275, loss components: [19.30032730102539, 35.67718505859375]
the model is saved to results/checkpoints/checkpoint_dygraphs_buec-xiaoflambda_505488_epoch2_batch220.pth.tar

TEST batch=53/53, loss=333.265, psi=[0.44680055975914, 0.4191657602787018], loss1 min/max=0.0075/0.6281, loss2 min/max=0.0014/0.1224, integral time stamps=5000, sec/iter=3.1022
----------------------------------------------------------------------------------------------------
Epoch 2: results per event type for all test time slots: 
====== CloseFriend       (73      events):      MAR=35.77+-16.61         HITS_10=0.260+-0.288
====== SMS               (288     events):      MAR=9.52+-12.56  HITS_10=0.700+-0.434
====== Proximity         (9094    events):      MAR=13.24+-11.03         HITS_10=0.443+-0.460
====== Calls             (1080    events):      MAR=21.27+-14.90         HITS_10=0.328+-0.403
====== Com               (10462   events):      MAR=13.97+-11.81         HITS_10=0.438+-0.457
----------------------------------------------------------------------------------------------------
end time: 2021-09-12 17:11:25.432984


```

**Running our latent dynamic graph (LDG) model with a learned graph, sparse prior and biliear interactions:**

```bash

~~~~~ Script arguments ~~~~~
data_dir ./SocialEvolution/
dataset social
prob 0.8
batch_size 200
n_hid 32
epochs 2
seed 1111
lr 0.0002
lr_decay_step [10]
weight 1
wdecay 0
model dyrep
bilinear True
bilinear_enc True
encoder mlp
sparse True
n_rel 2
device cuda
association CloseFriend
resume 
log_interval 300
results results
soft_attn True
freq False
verbose False
torch 1.8.0
start time: 2021-09-12 19:04:08.375378
experiment_ID:  buec-xiaoflambda_375378
gitcommit 2036957 

loading data from ./SocialEvolution/data_prob0.8.pkl
TRAIN
Event type=SMS, k=1, number of events=4319
Event type=Proximity, k=2, number of events=31011
Event type=Calls, k=3, number of events=8187
Event type=CloseFriend, k=0, number of events=365
TEST
Event type=SMS, k=1, number of events=288
Event type=Proximity, k=2, number of events=9094
Event type=Calls, k=3, number of events=1080
Event type=CloseFriend, k=0, number of events=73
/home/dalab5/Projects/MA/LDG/encoder.py:81: UserWarning: nn.init.xavier_normal is now deprecated in favor of nn.init.xavier_normal_.
  nn.init.xavier_normal(m.weight.data)
Using factor graph MLP encoder.

number of training parameters: 176935

```

In [15]:
import matplotlib.pyplot as plt
import os
from os.path import join as pjoin
import numpy as np
import datetime
import pickle
import pandas
import itertools
import torch
import torch.utils
from torch.utils.data import DataLoader


In [11]:
from data_loader import EventsDataset

In [12]:
class SocialEvolutionDataset(EventsDataset):
    '''
    Class to load batches for training and testing
    '''

    FIRST_DATE = datetime.datetime(2008, 9, 11)  # consider events starting from this time
    EVENT_TYPES =  ['SMS', 'Proximity', 'Calls']

    def __init__(self,
                 subj_features,
                 data,
                 MainAssociation,
                 data_train=None,
                 verbose=False):
        super(SocialEvolutionDataset, self).__init__()

        self.subj_features = subj_features
        self.data = data
        self.verbose = verbose
        self.all_events = []
        self.event_types_num = {}
        self.time_bar = None
        self.MainAssociation = MainAssociation
        self.TEST_TIMESLOTS = [datetime.datetime(2009, 5, 10), datetime.datetime(2009, 5, 20), datetime.datetime(2009, 5, 31),
                               datetime.datetime(2009, 6, 10), datetime.datetime(2009, 6, 20), datetime.datetime(2009, 6, 30)]
        self.FIRST_DATE = SocialEvolutionDataset.FIRST_DATE
        self.event_types = SocialEvolutionDataset.EVENT_TYPES

        k = 1  # k >= 1 for communication events
        print(data.split.upper())
        for t in self.event_types:
            print('Event type={}, k={}, number of events={}'.format(t, k, len(data.EVENT_TYPES[t].tuples)))

            events = list(filter(lambda x: x[3].toordinal() >= self.FIRST_DATE.toordinal(),
                                 data.EVENT_TYPES[t].tuples))
            self.all_events.extend(events)
            self.event_types_num[t] = k
            k += 1

        n = len(self.all_events)
        self.N_nodes = subj_features.shape[0]

        if data.split == 'train':
            Adj_all, keys, Adj_all_last = self.get_Adjacency()

            if self.verbose:
                print('initial and final associations', self.MainAssociation, Adj_all.sum(), Adj_all_last.sum(),
                      np.allclose(Adj_all, Adj_all_last))


        # Initial topology
        if len(list(data.Adj.keys())) > 0:

            keys = sorted(list(data.Adj[list(data.Adj.keys())[0]].keys()))  # relation keys
            keys.remove(MainAssociation)
            keys = [MainAssociation] + keys  # to make sure CloseFriend goes first

            k = 0  # k <= 0 for association events
            for rel in keys:

                if rel != MainAssociation:
                    continue
                if data_train is None:
                    date = sorted(list(data.Adj.keys()))[0]  # first date
                    Adj_prev = data.Adj[date][rel]
                else:
                    date = sorted(list(data_train.Adj.keys()))[-1]  # last date of the training set
                    Adj_prev = data_train.Adj[date][rel]
                self.event_types_num[rel] = k

                N = Adj_prev.shape[0]

                # Associative events
                for date_id, date in enumerate(sorted(list(data.Adj.keys()))):  # start from the second survey
                    if date.toordinal() >= self.FIRST_DATE.toordinal():
                        # for rel_id, rel in enumerate(sorted(list(dygraphs.Adj[date].keys()))):
                        assert data.Adj[date][rel].shape[0] == N
                        for u in range(N):
                            for v in range(u + 1, N):
                                # if two nodes become friends, add the event
                                if data.Adj[date][rel][u, v] > 0 and Adj_prev[u, v] == 0:
                                    assert u != v, (u, v, k)
                                    self.all_events.append((u, v, rel, date))

                    Adj_prev = data.Adj[date][rel]

                # print(data.split, rel, len(self.all_events) - n)
                print('Event type={}, k={}, number of events={}'.format(rel, k, len(self.all_events) - n))
                n = len(self.all_events)
                k -= 1

        self.all_events = sorted(self.all_events, key=lambda x: int(x[3].timestamp()))

        if self.verbose:
            print('%d events' % len(self.all_events))
            print('last 10 events:')
            for event in self.all_events[-10:]:
                print(event)

        self.n_events = len(self.all_events)

        H_train = np.zeros((N, N))
        c = 0
        for e in self.all_events:
            H_train[e[0], e[1]] += 1
            H_train[e[1], e[0]] += 1
            c += 1
        if self.verbose:
            print('H_train', c, H_train.max(), H_train.min(), H_train.std())
        self.H_train = H_train


    @staticmethod
    def load_data(data_dir, prob, dump=True):
        data_file = pjoin(data_dir, 'data_prob%s.pkl' % prob)
        if os.path.isfile(data_file):
            print('loading data from %s' % data_file)
            with open(data_file, 'rb') as f:
                data = pickle.load(f)
        else:
            data = {'initial_embeddings': SubjectsReader(pjoin(data_dir, 'Subjects.csv')).features_onehot}
            for split in ['train', 'test']:
                data.update(
                    {split: SocialEvolution(data_dir, split=split, MIN_EVENT_PROB=prob)})
            if dump:
                # dump data files to avoid their generation again
                print('saving data to %s' % data_file)
                with open(data_file, 'wb') as f:
                    pickle.dump(data, f, protocol=2)  # for compatibility
        return data

    def get_Adjacency(self, multirelations=False):
        dates = sorted(list(self.data.Adj.keys()))
        Adj_all = self.data.Adj[dates[0]]
        Adj_all_last = self.data.Adj[dates[-1]]
        # Adj_friends = Adj_all[self.MainAssociation].copy()
        if multirelations:
            keys = sorted(list(Adj_all.keys()))
            keys.remove(self.MainAssociation)
            keys = [self.MainAssociation] + keys  # to make sure CloseFriend goes first
            Adj_all = np.stack([Adj_all[rel].copy() for rel in keys], axis=2)
            Adj_all_last = np.stack([Adj_all_last[rel].copy() for rel in keys], axis=2)
        else:
            keys = [self.MainAssociation]
            Adj_all = Adj_all[self.MainAssociation].copy()
            Adj_all_last = Adj_all_last[self.MainAssociation].copy()

        return Adj_all, keys, Adj_all_last


    def time_to_onehot(self, d):
        x = []
        for t, max_t in [(d.weekday(), 7), (d.hour, 24), (d.minute, 60), (d.second, 60)]:
            x_t = np.zeros(max_t)
            x_t[t] = 1
            x.append(x_t)
        return np.concatenate(x)

In [13]:
from social_data_loader import SubjectsReader, SocialEvolution, CSVReader

In [16]:
data_dir = './SocialEvolution'
data = SocialEvolutionDataset.load_data(data_dir, 0.8)
train_set = SocialEvolutionDataset(data['initial_embeddings'], data['train'], 'CloseFriend', verbose=False)
test_set = SocialEvolutionDataset(data['initial_embeddings'], data['test'], 'CloseFriend',
                            data_train=data['train'], verbose=False)
initial_embeddings = data['initial_embeddings'].copy()
A_initial = train_set.get_Adjacency()[0]


train_loader = DataLoader(train_set, batch_size=200, shuffle=False)
test_loader = DataLoader(test_set, batch_size=200, shuffle=False)

loading data from ./SocialEvolution/data_prob0.8.pkl
TRAIN
Event type=SMS, k=1, number of events=4319
Event type=Proximity, k=2, number of events=31011
Event type=Calls, k=3, number of events=8187
Event type=CloseFriend, k=0, number of events=365
TEST
Event type=SMS, k=1, number of events=288
Event type=Proximity, k=2, number of events=9094
Event type=Calls, k=3, number of events=1080
Event type=CloseFriend, k=0, number of events=73


##### Original Data Desceiption in the paper

Size:
- 83 nodes
- millions of events $o^{t}=(u, v, \tau, k)$
 


Interaction:
- commuication: [SMS, Proximity, Call]
- association: CLoseFriend


Training
- Sep 2008 ~ April 2009, 43000 communication events

Test
- May 2009 ~ June 2009, 10000  communication events


![](https://cdn.mathpix.com/snip/images/AVRtAbYF-MQlnuYgnVaVIHh6PqkFjCiDgI5j8iaTNOc.original.fullsize.png)

##### Randomly Explore Data Looking`

In [17]:
data['train'] == train_set.data # data is an attributes of train_set object

True

In [19]:
data['train'].EVENT_TYPES['SMS'].tuples[:10] # a list contains events 

[(60, 0, 'SMS', datetime.datetime(2008, 1, 1, 15, 0, 25)),
 (60, 0, 'SMS', datetime.datetime(2008, 1, 6, 12, 1, 36)),
 (60, 0, 'SMS', datetime.datetime(2008, 3, 31, 19, 49, 18)),
 (0, 60, 'SMS', datetime.datetime(2008, 4, 26, 18, 35, 34)),
 (0, 60, 'SMS', datetime.datetime(2008, 4, 26, 19, 35, 34)),
 (0, 60, 'SMS', datetime.datetime(2008, 4, 26, 20, 35, 34)),
 (0, 60, 'SMS', datetime.datetime(2008, 6, 20, 9, 30, 44)),
 (0, 60, 'SMS', datetime.datetime(2008, 6, 20, 10, 30, 44)),
 (0, 60, 'SMS', datetime.datetime(2008, 6, 20, 11, 30, 44)),
 (0, 60, 'SMS', datetime.datetime(2008, 7, 8, 3, 52, 20))]

In [20]:
for t in train_set.event_types:
    print(len(data['train'].EVENT_TYPES[t].tuples))

4319
31011
8187


In [21]:
Adj_all_start, keys, Adj_all_last = train_set.get_Adjacency()

In [22]:
Adj_all_start

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [23]:
Adj_all_last

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [24]:
keys # what cause a new adjecent

['CloseFriend']

In [25]:
train_set[0] # call __getitem__

AttributeError: 'NoneType' object has no attribute 'copy'