In [1]:
import numpy as np
from scipy.stats import rayleigh
from scipy.special import digamma,erf
import scipy.io as sio
import pandas as pd

In [2]:
def parseMat(data):
    dataSeqs= IPTVData['Seqs']  
    mdtype = dataSeqs.dtype  
    seqs = {n: dataSeqs[n].flatten() for n in mdtype.names}
    dataStats = IPTVData['Stats']  
    mdtype = dataSeqs.dtype  
    stats = {n: dataSeqs[n].flatten() for n in mdtype.names}
    return seqs,stats

In [3]:
class Model:
    def __init__(self,seqs,K):
            self.N = len(seqs['Time'])
            D= np.zeros(self.N)
            sigma = np.zeros(self.N)
            Tmax = np.zeros(self.N)
            self.K = K
            for i in range(self.N):
                D[i] = np.max(seqs['Mark'][i])
                sigma[i] = (4*(np.std(seqs['Time'][i])**5) / (3* len(seqs['Time'][i])))**0.2
                Tmax[i] = seqs['Time'][i][-1][0]
            self.D = np.int(np.max(D))
            Tmax = np.mean(Tmax)
            self.w = np.mean(sigma)
            self.landmark = self.w* range(np.int(np.ceil(Tmax/self.w)))
            self.alpha = np.ones(K)
            M = len(self.landmark)
            self.beta = np.ones((self.D,M,self.K,self.D)) / (M*(self.D**2))
            self.b = np.ones((self.D,K))/self.D
            label = np.round(K * np.random.rand(self.N))
            self.r = np.zeros((self.N,K))
            for k in range(K):
                self.r[label==k,k] = 1
            self.kernel = "gauss"
class Alg:
    def __init__ (self, outer = 8,rho = 0.1,inner = 5,thres =  1e-5,Tmax = []):
        self.outer = outer
        self.rho = rho
        self.inner = inner
        self.thres = thres
        self.Tmax = Tmax

In [4]:
linkedInData = sio.loadmat('Data/LinkedinData.mat')
IPTVData = sio.loadmat('Data/IPTVData.mat')
IPTVSeqs,IPTVStats = parseMat(IPTVData)
linkedInSeqs,linedInStats = parseMat(linkedInData)

In [5]:
def kernel_int(dt,model):
    distance = np.tile(dt.flatten(),(len(model.landmark),1))
    landmark = np.tile(model.landmark,(len(dt.flatten()),1)).T
    distance -= landmark
    G = 0
    if model.kernel == 'gauss':
        G = 0.5 *(erf(distance/(np.sqrt(2))*model.w)
                  + erf(landmark/(np.sqrt(2))*model.w))
    elif model.kernel == 'exp':
        G = 1 - np.exp(-model.w * (distance-landmark));
        G[G<0] = 0
    return G

In [6]:
def kernel(dt,model):
    distance = np.tile(dt.flatten(),(len(model.landmark),1))
    landmark = np.tile(model.landmark,(len(dt.flatten()),1)).T
    distance -= landmark
    g = 0
    if model.kernel == 'gauss':
        g = np.exp(-(distance**2)/(2*(model.w**2))/(np.sqrt(2*np.pi)*model.w))
    elif model.kernel == 'exp':
        g = model.w * np.exp(-model.w * distance)
        g[g>1] = 0
    return g

In [7]:
def E_log_pi(alpha):
    return digamma(alpha)  - digamma(np.sum(alpha))

In [8]:
def Expectation_DMHP(Seqs,model,alg):
    Nk = np.sum(model.r,0)
    alpha = model.alpha + Nk
    LL = E_log_pi(alpha)
    EX = np.zeros((model.N,K))
    for c in range(model.N):
        time = seqs['Time'][c].flatten()
        event = seqs['Mark'][c].flatten()
        Tstart = seqs['Start'][c].flatten()
        if not alg.Tmax:
            Tstop = seqs['Stop'][c].flatten()
        else:
            Tstop = alg.Tmax
            indt = time < alg.Tmax
            time = time[indt]
            event = event[indt]
        N = len(time)
        G = kernel_int(Tstop-time,model)
        for i in range(N):
            ui = event[i]
            ti = time[i]
        
            E_lambda_i = (np.sqrt(np.pi/2) * model.b[ui-1,:]).flatten()
            V_lambda_i = (2 - np.sqrt(np.pi/2) * (model.b[ui-1,:]**2)).flatten()
            if i > 0:
                uj = event[0:i].flatten()
                tj = time[0:i].flatten()
                gij = kernel(ti-tj,model)
                auiuj = model.beta[uj-1,:,:,uj-1]
                tiled = np.tile(gij,(1,1,1))
                tiled = np.tile(tiled.T,(1,1,model.K))
                pij = np.multiply(tiled,auiuj)
            
                tmp = np.sum(pij,axis=(0,1))
                E_lambda_i += tmp
                tmp = np.sum(pij**2,axis=(0,1))
                V_lambda_i += tmp
            LL += np.log(E_lambda_i) - (V_lambda_i/ (2*(E_lambda_i**2)))
        LL -= (Tstop-Tstart).flatten() * (np.sqrt(np.pi/2) * np.sum(model.b))
        temp = np.tile(G,(model.K,1,1)).T
        temp2 = np.multiply(temp,np.sum(model.beta[event-1,:,:,:],axis = 3))
        tmp = np.sum(temp2,(0,1))
        LL -= tmp
        XX = (LL - np.max(LL))
        EX[c,:] = (np.exp(XX) / np.sum(np.exp(XX)))
    model.r = EX