In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
layers = keras.layers
act = keras.activations
import pickle
import tensorflow_probability as tfp
tfd = tfp.distributions
import matplotlib.pyplot as plt

## Entraînement avec la loi Gamma

In [3]:
class Model:
    def __init__(self, context_size=32, dist=tfd.Gamma):
        self.context_size = context_size
        self.encoder = keras.layers.GRU(context_size, return_sequences=True)
        self.decoder = keras.layers.Dense(2, activation="relu")
        self.optimizer = keras.optimizers.Adam(learning_rate=0.001)
        self.dist = dist
        self.dist_params = {}


    def get_context(self, inter_times):
        tau = tf.expand_dims(inter_times, axis=-1)
        log_tau = tf.math.log(tf.clip_by_value(tau, 1e-8, tf.reduce_max(tau)))
        input = tf.concat([tau, log_tau], axis=-1)
        output = self.encoder(input)
        context = tf.pad(output[:, :-1, :], [[0, 0], [1, 0], [0, 0]])
        return context


    def get_inter_times_distribution(self, context):
        params = self.decoder(context)
        p1 = params[..., 0]
        p2 = params[..., 1]
        self.dist_params.setdefault("p1", []).append(p1)
        self.dist_params.setdefault("p2", []).append(p2)
        # print("P1~~~~~~\n", p1, "\n")
        # print("P2~~~~~~\n", p2, "\n")
        return self.dist(concentration=p1, rate=p2)


    def nll_loss(self, inter_times, seq_lengths):
        context = self.get_context(inter_times)
        inter_times_dist = self.get_inter_times_distribution(context)

        log_pdf = inter_times_dist.log_prob(inter_times)
        log_surv = inter_times_dist.log_survival_function(inter_times)

        # construit un masque pour ne sélectionner que les éléments
        # nécessaires dans chaque liste
        mask = np.cumsum(np.ones_like(log_pdf), axis=-1) \
            <= np.expand_dims(seq_lengths, axis=-1)
        log_like = tf.reduce_sum(log_pdf * mask, axis=-1)
        
        # idx est une liste de la forme [(a1, b1), (a2, b2), ...]
        # gather_nd sélectionne les éléments correspondant à ces indices
        # (ligne et colonne)
        idx = list(zip(range(len(seq_lengths)), seq_lengths))
        log_surv_last = tf.gather_nd(log_surv, idx)
        log_like += log_surv_last

        print(f"LOG_PDF~~~~~~ : {np.sum(np.isnan(log_pdf))}\n", log_pdf, "\n")
        print(f"LOG_SURV~~~~~~ : {np.sum(np.isnan(log_surv))}\n", log_surv, "\n")

        return -log_like
    

    @property
    def weights(self):
        return self.encoder.trainable_weights + self.decoder.trainable_weights
        
    
    def fit(self, epochs, inter_times, seq_lengths, t_end):
        for epoch in range(epochs + 1):
            with tf.GradientTape() as tape:
                loss = tf.reduce_mean(self.nll_loss(inter_times,
                                                    seq_lengths)) / t_end
            grads = tape.gradient(loss, self.weights)
            self.optimizer.apply_gradients(zip(grads, self.weights))

            # if epoch % 10 == 0:
            #     print(f"Loss at epoch {epoch}: {loss:.2f}")

            print(f"Loss at epoch {epoch}: {loss:.2f}")
            sum_nan = [np.sum(np.isnan(grad)) for grad in grads]
            print(f"GRADS~~~~~~ : {sum_nan}\n", grads, "\n")


    def sample(self, batch_size, t_end):
        inter_times = np.empty((batch_size, 0))
        next_context = tf.zeros(shape=(batch_size, 1, 32))
        generated = False

        while not generated:
            dist = self.get_inter_times_distribution(next_context)
            next_inter_times = dist.sample()
            inter_times = tf.concat([inter_times, next_inter_times], axis=-1)
            tau = tf.expand_dims(next_inter_times, axis=-1)
            log_tau = tf.math.log(
                tf.clip_by_value(tau, 1e-8, tf.reduce_max(tau)))
            input = tf.concat([tau, log_tau], axis=-1)
            next_context = self.encoder(input)

            generated = np.sum(inter_times, axis=-1).min() >= t_end

        return np.cumsum(inter_times, axis=-1)
    

    def next(self, inter_times, num_preds=1):
        inter_time = inter_times[-1]
        preds = []
        for _ in range(num_preds):
            last = inter_time
            tau = tf.expand_dims(last, axis=-1)
            log_tau = tf.math.log(
                tf.clip_by_value(tau, 1e-8, tf.reduce_max(tau)))
            input = tf.concat([tau, log_tau], axis=-1)
            context = self.encoder(input)
            dist = self.get_inter_times_distribution(context)
            inter_time = dist.sample(1)
            preds.append(inter_time)
        return inter_times[-1] + np.cumsum(preds)
    
file = open("data/shchur.pkl", "rb")
data = pickle.load(file)
t_end = data["t_end"]
arrival_times = data["arrival_times"]
seq_lengths = [len(times) for times in arrival_times]
inter_times_list = [np.diff(times, prepend=0, append=t_end)
                    for times in arrival_times]
inter_times = np.asarray([np.pad(inter_times, (0, np.max(seq_lengths) - size))
        for size, inter_times in zip(seq_lengths, inter_times_list)])
inter_times = tf.Variable(inter_times, dtype=tf.float32)
inter_times = tf.clip_by_value(inter_times, 1e-8, tf.reduce_max(inter_times))

model = Model(context_size=32)
model.fit(40, inter_times, seq_lengths, t_end)

LOG_PDF~~~~~~ : 1000
 tf.Tensor(
[[        nan -1.1158078  -0.3449545  ...        -inf        -inf
         -inf]
 [        nan -2.2312498  -0.68151593 ...        -inf        -inf
         -inf]
 [        nan -1.0404117  -2.7347007  ...        -inf        -inf
         -inf]
 ...
 [        nan -2.4676666  -0.5064529  ...        -inf        -inf
         -inf]
 [        nan -1.0413579  -1.6412879  ... -5.9676466         -inf
         -inf]
 [        nan  0.65008783 -0.14993227 ...        -inf        -inf
         -inf]], shape=(1000, 135), dtype=float32) 

LOG_SURV~~~~~~ : 0
 tf.Tensor(
[[-0.0000000e+00 -5.6608611e-01 -5.8571255e-01 ... -0.0000000e+00
  -0.0000000e+00 -0.0000000e+00]
 [-0.0000000e+00 -1.1754100e+00 -4.8082647e-01 ... -0.0000000e+00
  -0.0000000e+00 -0.0000000e+00]
 [-0.0000000e+00 -9.1536570e-01 -1.4509006e+00 ... -0.0000000e+00
  -0.0000000e+00 -0.0000000e+00]
 ...
 [-0.0000000e+00 -1.2951281e+00 -4.4745243e-01 ... -0.0000000e+00
  -0.0000000e+00 -0.0000000e+00]
 [-0.0

KeyboardInterrupt: 

## Entraînement basé sur un mélange

In [95]:
class Model:
    def __init__(self, context_size=32, R=1, dist=tfd.Weibull):
        self.context_size = context_size
        self.encoder = keras.layers.GRU(context_size, return_sequences=True)
        self.decoder = keras.layers.Dense(R * 3)
        self.optimizer = keras.optimizers.RMSprop(learning_rate=0.001)
        self.R = R
        self.dist = dist
        self.dist_params = {}
        self.eps = 1e-8


    def get_context(self, inter_times):
        tau = tf.expand_dims(inter_times, axis=-1)
        log_tau = tf.math.log(tf.clip_by_value(tau, 1e-8, tf.reduce_max(tau)))
        input = tf.concat([tau, log_tau], axis=-1)
        output = self.encoder(input)
        context = tf.pad(output[:, :-1, :], [[0, 0], [1, 0], [0, 0]])
        return context


    def get_inter_times_distribution(self, context):
        params = self.decoder(context)
        w = tfd.Categorical(logits=tf.math.log(act.softmax(params[..., :self.R])))
        p1 = act.softplus(params[..., self.R:2*self.R])
        p2 = act.softplus(params[..., 2*self.R:])
        # print("W~~~~~~\n", w, "\n")
        # print("P1~~~~~~\n", p1, "\n")
        # print("P2~~~~~~\n", p2, "\n")

        self.dist_params.setdefault("w", []).append(w)
        self.dist_params.setdefault("p1", []).append(p1)
        self.dist_params.setdefault("p2", []).append(p2)
        return tfd.MixtureSameFamily(
            mixture_distribution=w,
            components_distribution=self.dist(p1, p2))


    def nll_loss(self, inter_times, seq_lengths):
        context = self.get_context(inter_times)
        inter_times_dist = self.get_inter_times_distribution(context)

        inter_times = tf.clip_by_value(inter_times, self.eps, tf.reduce_max(inter_times))
        log_pdf = inter_times_dist.log_prob(inter_times)
        log_surv = inter_times_dist.log_survival_function(inter_times)

        # construit un masque pour ne sélectionner que les éléments
        # nécessaires dans chaque liste
        mask = np.cumsum(np.ones_like(log_pdf), axis=-1) \
            <= np.expand_dims(seq_lengths, axis=-1)
        log_like = tf.reduce_sum(log_pdf * mask, axis=-1)
        
        # idx est une liste de la forme [(a1, b1), (a2, b2), ...]
        # gather_nd sélectionne les éléments correspondant à ces indices
        # (ligne et colonne)
        idx = list(zip(range(len(seq_lengths)), seq_lengths))
        log_surv_last = tf.gather_nd(log_surv, idx)
        log_like += log_surv_last

        # print(f"LOG_PDF~~~~~~ : {np.sum(np.isnan(log_pdf))}\n", log_pdf, "\n")
        # print(f"LOG_SURV~~~~~~ : {np.sum(np.isnan(log_surv))}\n", log_surv, "\n")

        return -log_like
    

    @property
    def weights(self):
        return self.encoder.trainable_weights + self.decoder.trainable_weights
        
    
    def fit(self, epochs, inter_times, seq_lengths, t_end):
        for epoch in range(epochs + 1):
            with tf.GradientTape() as tape:
                loss = tf.reduce_mean(self.nll_loss(inter_times,
                                                    seq_lengths)) / t_end
            grads = tape.gradient(loss, self.weights)
            self.optimizer.apply_gradients(zip(grads, self.weights))

            # if epoch % 10 == 0:
            #     print(f"Loss at epoch {epoch}: {loss:.2f}")
            print(f"Loss at epoch {epoch}: {loss:.2f}")
            # sum_nan = [np.sum(np.isnan(grad)) for grad in grads]
            # print(f"GRADS~~~~~~ : {sum_nan}\n", grads, "\n")
            
            # break  # debug


    def sample(self, batch_size, t_end):
        inter_times = np.empty((batch_size, 0))
        next_context = tf.zeros(shape=(batch_size, 1, 32))
        generated = False

        while not generated:
            dist = self.get_inter_times_distribution(next_context)
            next_inter_times = dist.sample()
            inter_times = tf.concat([inter_times, next_inter_times], axis=-1)
            tau = tf.expand_dims(next_inter_times, axis=-1)
            log_tau = tf.math.log(
                tf.clip_by_value(tau, 1e-8, tf.reduce_max(tau)))
            input = tf.concat([tau, log_tau], axis=-1)
            next_context = self.encoder(input)

            generated = np.sum(inter_times, axis=-1).min() >= t_end

        return np.cumsum(inter_times, axis=-1)
    

    def next(self, inter_times, num_preds=1):
        inter_time = inter_times[-1]
        preds = []
        for _ in range(num_preds):
            last = inter_time
            tau = tf.expand_dims(last, axis=-1)
            log_tau = tf.math.log(
                tf.clip_by_value(tau, 1e-8, tf.reduce_max(tau)))
            input = tf.concat([tau, log_tau], axis=-1)
            context = self.encoder(input)
            dist = self.get_inter_times_distribution(context)
            inter_time = dist.sample(1)
            preds.append(inter_time)
        return inter_times[-1] + np.cumsum(preds)


file = open("data/shchur.pkl", "rb")
data = pickle.load(file)
t_end = data["t_end"]
arrival_times = data["arrival_times"]
seq_lengths = [len(times) for times in arrival_times]
inter_times_list = [np.diff(times, prepend=0, append=t_end)
                    for times in arrival_times]
inter_times = np.asarray([np.pad(inter_times, (0, np.max(seq_lengths) - size))
        for size, inter_times in zip(seq_lengths, inter_times_list)])
inter_times = tf.Variable(inter_times, dtype=tf.float32)
inter_times = tf.clip_by_value(inter_times, 1e-8, tf.reduce_max(inter_times))

model = Model(context_size=32, R=5, dist=tfd.Weibull)
model.fit(30, inter_times, seq_lengths, t_end)

Loss at epoch 0: 0.91
Loss at epoch 1: 0.90
Loss at epoch 2: 0.89
Loss at epoch 3: 0.88
Loss at epoch 4: 0.88
Loss at epoch 5: 0.87
Loss at epoch 6: 0.87
Loss at epoch 7: 0.86
Loss at epoch 8: 0.86
Loss at epoch 9: 0.85
Loss at epoch 10: 0.85
Loss at epoch 11: 0.85
Loss at epoch 12: 0.84
Loss at epoch 13: nan
Loss at epoch 14: nan
Loss at epoch 15: nan
Loss at epoch 16: nan
Loss at epoch 17: nan
Loss at epoch 18: nan
Loss at epoch 19: nan
Loss at epoch 20: nan
Loss at epoch 21: nan
Loss at epoch 22: nan
Loss at epoch 23: nan
Loss at epoch 24: nan
Loss at epoch 25: nan
Loss at epoch 26: nan
Loss at epoch 27: nan
Loss at epoch 28: nan
Loss at epoch 29: nan
Loss at epoch 30: nan


In [1]:
model = Model(context_size=32, R=1, dist=tfd.Weibull)
model.fit(30, inter_times, seq_lengths, t_end)

NameError: name 'Model' is not defined

In [89]:
import pandas as pd
prob = tfd.Categorical(probs=[0.1, 0.3, 0.2])
samples = prob.sample(10000)
pd.Series(samples).value_counts().sort_index() / samples.shape[0]

0    0.1667
1    0.4969
2    0.3364
Name: count, dtype: float64

In [90]:
tfd.Categorical?

[1;31mInit signature:[0m
[0mtfd[0m[1;33m.[0m[0mCategorical[0m[1;33m([0m[1;33m
[0m    [0mlogits[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mprobs[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdtype[0m[1;33m=[0m[0mtf[0m[1;33m.[0m[0mint32[0m[1;33m,[0m[1;33m
[0m    [0mforce_probs_to_zero_outside_support[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mvalidate_args[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mallow_nan_stats[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mname[0m[1;33m=[0m[1;34m'Categorical'[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
Categorical distribution over integers.

The Categorical distribution is parameterized by either probabilities or
log-probabilities of a set of `K` classes. It is defined over the integers
`{0, 1, ..., K-1}`.

The Categorical distribution is closely related to the `OneHotCategorical` a