In [1]:
import os
path_to_files = 'data/simulated_Hawkes/simulated_Hawkes/tmp_trunc_K4_C5'

files = os.listdir(path_to_files)

In [2]:
import pandas as pd

In [3]:
import torch
def cmp_to_key(mycmp):
    'Convert a cmp= function into a key= function'
    class K(object):
        def __init__(self, obj, *args):
            self.obj = obj
        def __lt__(self, other):
            return mycmp(self.obj, other.obj) < 0
        def __gt__(self, other):
            return mycmp(self.obj, other.obj) > 0
        def __eq__(self, other):
            return mycmp(self.obj, other.obj) == 0
        def __le__(self, other):
            return mycmp(self.obj, other.obj) <= 0
        def __ge__(self, other):
            return mycmp(self.obj, other.obj) >= 0
        def __ne__(self, other):
            return mycmp(self.obj, other.obj) != 0
    return K

def compare(a,b):
    tmp1 = int(a[:-4])
    tmp2 = int(b[:-4])
    return tmp1 - tmp2

def get_partition(df, num_of_steps, num_of_classes, end_time = None):
    if end_time == None:
        end_time = df['time'][len(df['time'])-1]
    res = torch.zeros(num_of_steps, num_of_classes + 1)
    dt = end_time/num_of_steps
    res[:,0] = end_time/num_of_steps
    for i in range(len(df['time'])):
        k = int(df['time'][i]/dt)
        if k == num_of_steps:
            k-=1
        res[k,int(df['event'][i])+1] += 1
    return res
def get_dataset(path_to_files, n_classes, n_steps):
    files = os.listdir(path_to_files)
    target = None
    if 'clusters.csv' in files:
        files.remove('clusters.csv')
        target = torch.Tensor(pd.read_csv(path_to_files+'/clusters.csv')['cluster_id'])
    #print(target)
    files = sorted(files, key = cmp_to_key(compare))
    data = torch.zeros(len(files), n_steps, n_classes + 1)
    for i, f in enumerate(files):
        df = pd.read_csv(path_to_files+'/'+f)
        data[i,:,:] = get_partition(df, n_steps, n_classes)
    return data, target


In [4]:
data, target = get_dataset(path_to_files, 5, 1024)

In [5]:
import torch
from models.models import LSTM_cluster_point_processes
from utils.trainers import Trainer_clusterwise

device = 'cuda:1'

model = LSTM_cluster_point_processes(6,128, 3, 5, 4, 1024, dropout = 0.3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01, weight_decay = 1e-4)
trainer = Trainer_clusterwise(model, optimizer, device, data, data, target, target, 4,\
                             l = 1, max_epochs = 100, max_m_step_epochs = 30,\
                              lr_update_tol = 15, lr_update_param = 0.9,\
                              batch_size = 150)

In [6]:
losses, purs, purs_val = trainer.train()

Beginning e-step
tensor(1600., device='cuda:1')
Cluster partition
Cluster 0 :  0.0475  with pi =  tensor(0.2500, device='cuda:1')
Cluster 1 :  0.085  with pi =  tensor(0.2500, device='cuda:1')
Cluster 2 :  0.848125  with pi =  tensor(0.2500, device='cuda:1')
Cluster 3 :  0.019375  with pi =  tensor(0.2500, device='cuda:1')
Purity for random model: 0.28625
Beginning m-step
Loss on sub_epoch 1/30: 66162.021875
Loss on sub_epoch 11/30: 63070.371875
Loss on sub_epoch 21/30: 63023.815625
Cluster partition
Cluster 0 :  0.0225  with pi =  tensor(0.2500, device='cuda:1')
Cluster 1 :  0.548125  with pi =  tensor(0.2500, device='cuda:1')
Cluster 2 :  0.08875  with pi =  tensor(0.2500, device='cuda:1')
Cluster 3 :  0.340625  with pi =  tensor(0.2500, device='cuda:1')
On epoch 1/100 average loss = 63224.22875, purity = 0.42875
Validation loss = 671797.125, purity = 0.42875
Beginning e-step
tensor(1600., device='cuda:1')
Beginning m-step
Updating lr
Loss on sub_epoch 1/30: 63091.260546875
Loss on s

ValueError: too many values to unpack (expected 3)

In [None]:
# losses, purs = trainer.train()

In [26]:
losses, purs = trainer.train()

Beginning e-step
tensor(800., device='cuda:0')
Purity for random model: 0.51375
Beginning m-step
Loss on sub_epoch 1/30: 15105.7927734375
Loss on sub_epoch 11/30: 11890.847265625
Loss on sub_epoch 21/30: 11710.6833984375
On epoch 1/100 average loss = 12044.801451822917, purity = 0.68125
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 11489.5048828125
Loss on sub_epoch 11/30: 11243.4041015625
Updating lr
Loss on sub_epoch 21/30: 11540.073046875
On epoch 2/100 average loss = 11344.5803125, purity = 0.7075
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 11333.2453125
Loss on sub_epoch 11/30: 11256.861328125
Updating lr
Loss on sub_epoch 21/30: 11221.6671875
On epoch 3/100 average loss = 11262.53220703125, purity = 0.76625
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 11271.055859375
Loss on sub_epoch 11/30: 11238.580078125
Loss on sub_epoch 21/30: 11267.84296875
Updati

KeyboardInterrupt: 

In [None]:
import numpy as np
n_tries = 0
purs = []
stds = []
divs = [16,32,64,128,256,512,512+256,1024]
for div in divs:
    res = []
    while n_tries<5:
        data, target = get_dataset(path_to_files, 5, div)
        device = 'cuda:0'

        model = LSTM_cluster_point_processes(6, 128, 3, 5, 2, div, dropout = 0.3).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr = 0.001, weight_decay = 1e-4)
        trainer = Trainer_clusterwise(model, optimizer, device, data, target, 2,\
                                 max_epochs = 10, max_m_step_epochs = 30,\
                                  lr_update_tol = 15, lr_update_param = 0.1,\
                                  batch_size = 100)
        losses, purys = trainer.train()
        if purys:
            res.append(max(purys))
            n_tries += 1
    print('div:',div,np.mean(res),'+-', np.std(res))
    purs.append(np.mean(res))
    stds.append(np.std(res))
    n_tries = 0
    

Beginning e-step
tensor(800., device='cuda:0')
Purity for random model: 0.56125
Beginning m-step
Loss on sub_epoch 1/30: 8691.068150111607
Loss on sub_epoch 11/30: 6911.942592075893
Loss on sub_epoch 21/30: 6719.337193080357
On epoch 1/10 average loss = 7009.695805431547, purity = 0.5925
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6609.807826450893
Updating lr
Loss on sub_epoch 11/30: 6519.581473214285
Loss on sub_epoch 21/30: 6435.210518973215
On epoch 2/10 average loss = 6488.663325427828, purity = 0.61375
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6468.265276227678
Loss on sub_epoch 11/30: 6451.822823660715
Updating lr
Loss on sub_epoch 21/30: 6448.0205078125
On epoch 3/10 average loss = 6415.738337053572, purity = 0.5925
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6412.758021763393
Updating lr
Loss on sub_epoch 11/30: 6384.948102678572
Loss on sub_epo

Loss on sub_epoch 1/30: 8582.82972935268
Loss on sub_epoch 11/30: 7048.546526227678
Loss on sub_epoch 21/30: 6712.087123325893
On epoch 1/10 average loss = 6992.533510044643, purity = 0.51875
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6626.097028459822
Loss on sub_epoch 11/30: 6424.03173828125
Updating lr
Loss on sub_epoch 21/30: 6452.72509765625
On epoch 2/10 average loss = 6458.073567708333, purity = 0.58875
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6399.85302734375
Loss on sub_epoch 11/30: 6357.452218191965
Updating lr
Loss on sub_epoch 21/30: 6314.155343191965
On epoch 3/10 average loss = 6360.032603236607, purity = 0.59
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6265.348284040178
Loss on sub_epoch 11/30: 6302.521344866072
Updating lr
Loss on sub_epoch 21/30: 6272.2412109375
On epoch 4/10 average loss = 6320.71144438244, purity = 0.5975
Beginning e

Loss on sub_epoch 11/30: 6783.529924665178
Loss on sub_epoch 21/30: 6403.173688616072
On epoch 1/10 average loss = 6872.657786923363, purity = 0.57875
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6219.632463727678
Loss on sub_epoch 11/30: 6182.254534040178
Loss on sub_epoch 21/30: 6005.264369419643
Updating lr
On epoch 2/10 average loss = 6080.505022321428, purity = 0.59375
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5995.279296875
Loss on sub_epoch 11/30: 5939.613490513393
Loss on sub_epoch 21/30: 5966.826939174107
Updating lr
On epoch 3/10 average loss = 5921.8537155877975, purity = 0.60875
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5918.416085379465
Loss on sub_epoch 11/30: 5910.93359375
Updating lr
Loss on sub_epoch 21/30: 5896.714913504465
On epoch 4/10 average loss = 5911.487041945685, purity = 0.61875
Beginning e-step
tensor(800., device='cuda:0')
B

On epoch 1/10 average loss = 6888.003896949404, purity = 0.52125
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6232.47021484375
Loss on sub_epoch 11/30: 6137.925362723215
Updating lr
Loss on sub_epoch 21/30: 6035.224190848215
On epoch 2/10 average loss = 6113.277580915179, purity = 0.57375
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6024.247488839285
Loss on sub_epoch 11/30: 6096.5078125
Updating lr
Loss on sub_epoch 21/30: 6027.856515066965
On epoch 3/10 average loss = 6009.408956473214, purity = 0.59125
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6035.887834821428
Loss on sub_epoch 11/30: 5938.136579241072
Updating lr
Loss on sub_epoch 21/30: 5931.856863839285
On epoch 4/10 average loss = 5989.890741257441, purity = 0.6100000000000001
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 6011.755859375
Loss on sub_epoch 11

tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5864.031808035715
Loss on sub_epoch 11/30: 5734.88427734375
Loss on sub_epoch 21/30: 5591.860770089285
Updating lr
On epoch 2/10 average loss = 5671.284709821429, purity = 0.6812499999999999
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5492.6640625
Loss on sub_epoch 11/30: 5532.079520089285
Loss on sub_epoch 21/30: 5457.401227678572
Updating lr
On epoch 3/10 average loss = 5512.645135788691, purity = 0.70625
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5450.796107700893
Loss on sub_epoch 11/30: 5466.924037388393
Loss on sub_epoch 21/30: 5503.318777901785
Updating lr
On epoch 4/10 average loss = 5505.459349423363, purity = 0.7375
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5466.022739955357
Loss on sub_epoch 11/30: 5517.821010044643
Loss on sub_epoch 21/30: 5466.431710379465
Updating lr
On

Loss on sub_epoch 1/30: 5486.808942522322
Loss on sub_epoch 11/30: 5329.388532366072
Loss on sub_epoch 21/30: 5241.965471540178
Updating lr
On epoch 2/10 average loss = 5302.32456984747, purity = 0.64125
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5165.001046316965
Loss on sub_epoch 11/30: 5224.572126116072
Loss on sub_epoch 21/30: 5174.5927734375
Updating lr
On epoch 3/10 average loss = 5162.987897600447, purity = 0.64625
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5138.61572265625
Loss on sub_epoch 11/30: 5159.974748883928
Loss on sub_epoch 21/30: 5145.529854910715
Updating lr
On epoch 4/10 average loss = 5157.410065569196, purity = 0.6775
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5185.329171316965
Loss on sub_epoch 11/30: 5033.819893973215
Updating lr
Loss on sub_epoch 21/30: 5214.6640625
On epoch 5/10 average loss = 5154.5782435825895, purity = 0.687

Loss on sub_epoch 11/30: 5263.513462611607
Loss on sub_epoch 21/30: 5281.362095424107
On epoch 2/10 average loss = 5305.195021856399, purity = 0.67
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Updating lr
Loss on sub_epoch 1/30: 5187.579031808035
Loss on sub_epoch 11/30: 5116.4921875
Loss on sub_epoch 21/30: 5153.863071986607
On epoch 3/10 average loss = 5139.662939453125, purity = 0.66875
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5100.956891741072
Updating lr
Loss on sub_epoch 11/30: 5238.364536830357
Loss on sub_epoch 21/30: 5088.0791015625
On epoch 4/10 average loss = 5123.926376488095, purity = 0.7025
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5054.004813058035
Updating lr
Loss on sub_epoch 11/30: 5159.25830078125
Loss on sub_epoch 21/30: 5134.211565290178
Updating lr
On epoch 5/10 average loss = 5125.429268973215, purity = 0.71
Beginning e-step
tensor(800., device='cuda:0'

tensor(800., device='cuda:0')
Purity for random model: 0.51
Beginning m-step
Loss on sub_epoch 1/30: 9033.69614955357
Loss on sub_epoch 11/30: 6273.26318359375
Loss on sub_epoch 21/30: 5784.116280691965
On epoch 1/10 average loss = 6420.246254185268, purity = 0.8674999999999999
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5434.354282924107
Loss on sub_epoch 11/30: 5198.698939732143
Loss on sub_epoch 21/30: 4970.623046875
On epoch 2/10 average loss = 5140.531763857887, purity = 0.7375
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 4895.596330915178
Updating lr
Loss on sub_epoch 11/30: 4844.176827566965
Loss on sub_epoch 21/30: 4851.385811941965
On epoch 3/10 average loss = 4860.1602074032735, purity = 0.83875
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 4817.839146205357
Updating lr
Loss on sub_epoch 11/30: 4767.688197544643
Loss on sub_epoch 21/30: 4901.6895228

Loss on sub_epoch 1/30: 8975.962193080357
Loss on sub_epoch 11/30: 6315.005440848215
Loss on sub_epoch 21/30: 5621.342354910715
On epoch 1/10 average loss = 6288.029450334821, purity = 0.6637500000000001
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 5371.574916294643
Loss on sub_epoch 11/30: 5195.980817522322
Loss on sub_epoch 21/30: 5056.26806640625
Updating lr
On epoch 2/10 average loss = 5111.010102771577, purity = 0.615
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 4910.202497209822
Loss on sub_epoch 11/30: 4916.176130022322
Loss on sub_epoch 21/30: 4822.384695870535
Updating lr
On epoch 3/10 average loss = 4899.117656017485, purity = 0.5375000000000001
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 4896.769112723215
Loss on sub_epoch 11/30: 4947.499930245535
Loss on sub_epoch 21/30: 4885.304547991072
Updating lr
On epoch 4/10 average loss = 4873.541473679315

In [19]:
print('div 16 0.6295 +- 0.09964813094')

div 16 0.6295 +- 0.09964813094


In [52]:
purs

[0.50125,
 0.7875000000000001,
 0.9625,
 0.99,
 0.9924999999999999,
 0.9924999999999999,
 0.9924999999999999,
 0.9924999999999999,
 0.9924999999999999,
 0.9924999999999999,
 0.784]

In [54]:
stds

[0.030335622624235017,
 0.0650163440990033,
 0.06638147331899162,
 0.13284012947900947,
 0.16613887263370963,
 0.19262560837022688,
 0.17143037945475123]

In [5]:
import numpy as np
n_tries = 0
purs = []
stds = []
divs = [16,32,64,128,256]
for div in divs:
    res = []
    while n_tries<5:
        data, target = get_dataset(path_to_files, 5, 512)
        device = 'cuda:0'

        model = LSTM_cluster_point_processes(6, div, 3, 5, 2, dropout = 0.1).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
        trainer = Trainer_clusterwise(model, optimizer, device, data, target, 2,\
                                 max_epochs = 10, max_m_step_epochs = 30,\
                                  lr_update_tol = 15, lr_update_param = 0.1,
                                  batch_size = 100)
        losses, purys = trainer.train()
        if purys:
            res.append(max(purys))
            n_tries += 1
    print('hidden_size:',div,np.mean(res),'+-', np.std(res))
    purs.append(np.mean(res))
    stds.append(np.std(res))
    n_tries = 0
    

Beginning e-step
tensor(800., device='cuda:0')
Purity for random model: 0.50625
Beginning m-step
Loss on sub_epoch 1/30: 25428.183035714286
Loss on sub_epoch 11/30: 24460.67159598214
Loss on sub_epoch 21/30: 23034.750837053572
On epoch 1/10 average loss = 23730.872023809523, purity = 0.56625
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 22385.310546875
Loss on sub_epoch 11/30: 22205.417131696428
Loss on sub_epoch 21/30: 22005.378069196428
On epoch 2/10 average loss = 22218.565578497022, purity = 0.5825
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 22109.06640625
Loss on sub_epoch 11/30: 21886.440011160714
Updating lr
Loss on sub_epoch 21/30: 21831.343191964286
On epoch 3/10 average loss = 21926.311002604165, purity = 0.77
Beginning e-step
tensor(800., device='cuda:0')
Beginning m-step
Loss on sub_epoch 1/30: 21782.83565848214
Loss on sub_epoch 11/30: 21838.365234375
Loss on sub_epoch 21/30: 21804.295758

KeyboardInterrupt: 

In [1]:
import torch
import pickle
with open('success.pkl', 'rb') as f:
    a = pickle.load(f)

In [2]:
a

[]