# À propos de ce livret

Recherches liées au développement de cette méthode : 
>Mehmet Gonen, Samuel Kaski (2014). **Kernelized Bayesian Matrix Factorization**.  
>Mehmet Gonen, Suleiman A. Khan, Samuel Kaski (2013). **Kernelized Bayesian Matrix Factorization**.


La méthode implantés provient de ces sources : 
>[**Dépôt 1**](https://github.com/mehmetgonen/kbmf) | *Version en R et MATLAB*



**Note sur le cachier**
- Revoir la décomposition (génération des facteurs latents pour la création des noyaux)
- Améliorer la performance du code (Rcpp ?)

## Préparation pour l'utilisation

In [None]:
# Chargement des données
library(R.matlab)

# Barre de progression
library(progress)

In [None]:
setwd("C:/Users/amass/OneDrive/02_Education/02_Maitrise/Cours/PROJET_MAITRISE/application")

# Données

## Chargement des données

In [None]:
# Tenseur de données
tensor <- readMat("data/Guangzhou-data-set/tensor.mat")
tensor <- tensor$tensor  # Prendre l'array 
dim(tensor)

# Matrice de données aléatoires
random_matrix <- readMat("data/Guangzhou-data-set/random_matrix.mat")
random_matrix <- random_matrix$random.matrix
dim(random_matrix)

# Tenseur de données aléatoires
random_tensor <- readMat("data/Guangzhou-data-set/random_tensor.mat")
random_tensor <- random_tensor$random.tensor
dim(random_tensor)

## Format des données

In [None]:
# Indiquer si utiliser le jeu de données tronqué ou complet
petit_donnees = TRUE
# Indiquer si je veux travailler avec un tenseur ou une matrice
matrice = TRUE

capteurs = 50
jours = 10
sequences = dim(tensor)[3]  # (la séquence de temps du tenseur est pleine)

# Choix de données tronquées ou complètes
if (petit_donnees == TRUE) {
    # Si l'utilisateur choisi de travailler avec les données tronquées
    tensor = tensor[1:capteurs, 1:jours, 1:sequences, drop = FALSE]
    
    random_tensor = random_tensor[1:capteurs, 1:jours, 1:sequences, drop = FALSE]
}

# Choix de matrice ou de tenseur
if (matrice == TRUE) {
    # Si l'utilisateur choisi de travailler avec une matrice
    mat_dense <- array(tensor, c(dim(tensor[, , ])[1], dim(tensor[, , ])[2] * dim(tensor[, 
        , ])[3]))
}

dim(tensor)
dim(mat_dense)

## Scénarios de manquants

In [None]:
tx_manquant = 0.2
manquants_aleatoires = TRUE

# Manquants aléatoires
if (manquants_aleatoires == TRUE) {
    print("Manquants aléatoires")
    mat_binaire <- round(random_tensor + 0.5 - tx_manquant)
    mat_binaire <- array(mat_binaire, c(dim(random_tensor[, , ])[1], dim(random_tensor[, 
        , ])[2] * dim(random_tensor[, , ])[3]))
    dim(mat_binaire)
    
    # Création de la matrice de données manquantes (element-wise)
    mat_manq <- mat_dense * mat_binaire
    head(mat_manq)
}

# Manquants non-aléatoires
if (manquants_aleatoires == FALSE) {
    print("Manquants non-aléatoires")
    tens_binaire <- array(0, dim(tensor))
    for (i1 in 1:dim(tensor)[1]) {
        for (i2 in 1:dim(tensor)[2]) {
            tens_binaire[i1, i2, ] <- round(random_matrix[i1, i2] + 0.5 - tx_manquant)
        }
    }
    mat_binaire <- array(tens_binaire, c(dim(tens_binaire)[1], dim(tens_binaire)[2] * 
        dim(tens_binaire)[3]))
    dim(mat_binaire)
    
    # Création de la matrice de données manquants (element-wise)
    mat_manq <- mat_dense * mat_binaire
    head(mat_manq)
}

# Méthode

## Noyaux

Les « noyaux » *(Kernel en anglais)* permettent d’ajouter de l’information supplémentaire sur les lignes et les colonnes dans le modèle. Selon les auteurs de l’étude, cette manière de faire aide à faire des prédictions, notamment dans le cas d’un démarrage à froid. En outre, la prédiction de données manquantes peut se faire sur une colonne ou une ligne avec seulement des données manquantes.

Une des innovations soulignées est l’inclusion de multiples sources d’information incluses dans les « noyaux ». L’apprentissage par « noyaux » multiples *(multiple kernel learning en anglais)* combine plusieurs fonctions « noyaux » pour trouver une mesure de similarité. 

Un autre appel pour cette méthode est l’inférence de graph bipartite *(bipartite graph inference en anglais)*.

**Réduction non linéaire de la dimensionalité par « noyaux »**

$({G_{x,m} = {A_x^T}{K_{x,m}}})_{m=1}^{P_x}$



# Modélisation

- Quatre scénarios peuvent se présenter puisque les fonctions noyaux multiples peuvent être placés soit sur U, soit sur V ou sur les deux en même temps. Des stratégies différentes s'appliquent selon les applications de noyaux.
- À ces quatre scénarios s'ajoutent les stratégies pour les matrices complètes/ avec des valeurs manquantes.



## Un noyau sur les deux facteurs latents U et V

In [None]:
kbmf1k1k_supervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    set.seed(parameters$seed)
    # initialisation des paramètres
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    
    R <- parameters$R
    
    sigma_g <- parameters$sigma_g
    sigma_y <- parameters$sigma_y
    
    # Lambda_x (initialisation du Lambda)
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    # A_x|Lambda_x
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    # G_x|Lambda_x
    Gx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = diag(1, R, R))
    
    # Lambda_z (Initialisation du Lambda)
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    # A_z|Lambda_z
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    # G_z|Lambda_z
    Gz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = diag(1, R, R))
    
    # Outer product of kernels
    KxKx <- tcrossprod(Kx, Kx)
    KzKz <- tcrossprod(Kz, Kz)
    
    # Variational algorithm
    for (iter in 1:parameters$iteration) {
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (tcrossprod(Kx, Gx$mu[s, , drop = FALSE])/sigma_g^2)
        }
        # update Gx
        Gx$sigma <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Gz$mu, Gz$mu) + 
            Nz * Gz$sigma)/sigma_y^2))
        Gx$mu <- Gx$sigma %*% (crossprod(Ax$mu, Kx)/sigma_g^2 + tcrossprod(Gz$mu, 
            Y)/sigma_y^2)
        
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        # update Az
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (tcrossprod(Kz, Gz$mu[s, , drop = FALSE])/sigma_g^2)
        }
        # update Gz
        Gz$sigma <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Gx$mu, Gx$mu) + 
            Nx * Gx$sigma)/sigma_y^2))
        Gz$mu <- Gz$sigma %*% (crossprod(Az$mu, Kz)/sigma_g^2 + Gx$mu %*% Y/sigma_y^2)
    }
    
    state <- list(Lambdax = Lambdax, Ax = Ax, Lambdaz = Lambdaz, Az = Az, parameters = parameters)
}

In [None]:
# S'il manque des Y dans la matrice, le modèle est en mode « semisupervisé »
kbmf1k1k_semisupervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    set.seed(parameters$seed)
    
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    R <- parameters$R
    sigma_g <- parameters$sigma_g
    sigma_y <- parameters$sigma_y
    
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    Gx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = array(diag(1, R, R), c(R, 
        R, Nx)))
    
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    Gz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = array(diag(1, R, R), c(R, 
        R, Nz)))
    
    KxKx <- tcrossprod(Kx, Kx)
    KzKz <- tcrossprod(Kz, Kz)
    
    for (iter in 1:parameters$iteration) {
        
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (tcrossprod(Kx, Gx$mu[s, , drop = FALSE])/sigma_g^2)
        }
        # update Gx
        for (i in 1:Nx) {
            indices <- which(is.na(Y[i, ]) == FALSE)
            Gx$sigma[, , i] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Gz$mu[, 
                indices, drop = FALSE], Gz$mu[, indices, drop = FALSE]) + apply(Gz$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Gx$mu[, i] <- Gx$sigma[, , i] %*% (crossprod(Ax$mu, Kx[, i])/sigma_g^2 + 
                tcrossprod(Gz$mu[, indices, drop = FALSE], Y[i, indices, drop = FALSE])/sigma_y^2)
        }
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        # update Az
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (tcrossprod(Kz, Gz$mu[s, , drop = FALSE])/sigma_g^2)
        }
        # update Gz
        for (j in 1:Nz) {
            indices <- which(is.na(Y[, j]) == FALSE)
            Gz$sigma[, , j] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Gx$mu[, 
                indices, drop = FALSE], Gx$mu[, indices, drop = FALSE]) + apply(Gx$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Gz$mu[, j] <- Gz$sigma[, , j] %*% (crossprod(Az$mu, Kz[, j])/sigma_g^2 + 
                Gx$mu[, indices, drop = FALSE] %*% Y[indices, j, drop = FALSE]/sigma_y^2)
        }
    }
    
    state <- list(Lambdax = Lambdax, Ax = Ax, Lambdaz = Lambdaz, Az = Az, parameters = parameters)
}

### Fonctions de test du modèle

In [None]:
# Mode supervisé
kbmf1k1k_supervised_regression_variational_test <- function(Kx, Kz, state) {
    Gx <- list(mu = crossprod(state$Ax$mu, Kx))
    
    Gz <- list(mu = crossprod(state$Az$mu, Kz))
    
    Y <- list(mu = crossprod(Gx$mu, Gz$mu))
    
    prediction <- list(Gx = Gx, Gz = Gz, Y = Y)
}

# Mode seimsupervisé
kbmf1k1k_semisupervised_regression_variational_test <- function(Kx, Kz, state) {
    Gx <- list(mu = crossprod(state$Ax$mu, Kx))
    
    Gz <- list(mu = crossprod(state$Az$mu, Kz))
    
    Y <- list(mu = crossprod(Gx$mu, Gz$mu))
    
    prediction <- list(Gx = Gx, Gz = Gz, Y = Y)
}

## Noyaux d'apprentissage multiple sur U et V

In [None]:
# S'il ne manque pas de Y dans la matrice, le modèle est en mode « supervisé »
kbmf1mkl1mkl_supervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    set.seed(parameters$seed)
    
    # initialisation des paramètres
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- parameters$R
    sigma_g <- parameters$sigma_g
    sigma_h <- parameters$sigma_h
    sigma_y <- parameters$sigma_y
    
    # Lambda X
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    
    Gx <- list(mu = array(rnorm(R * Nx * Px), c(R, Nx, Px)), sigma = array(diag(1, 
        R, R), c(R, R, Px)))
    
    ## Apprentissage par noyau multiple
    etax <- list(alpha = matrix(parameters$alpha_eta + 0.5, Px, 1), beta = matrix(parameters$beta_eta, 
        Px, 1))
    
    ex <- list(mu = matrix(1, Px, 1), sigma = diag(1, Px, Px))
    Hx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = diag(1, R, R))
    
    # Lambda Z
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    Gz <- list(mu = array(rnorm(R * Nz * Pz), c(R, Nz, Pz)), sigma = array(diag(1, 
        R, R), c(R, R, Pz)))
    
    ## Apprentissage par noyau multiple
    etaz <- list(alpha = matrix(parameters$alpha_eta + 0.5, Pz, 1), beta = matrix(parameters$beta_eta, 
        Pz, 1))
    ez <- list(mu = matrix(1, Pz, 1), sigma = diag(1, Pz, Pz))
    Hz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = diag(1, R, R))
    
    # Calcul sur les noyaux préentrainement
    ## Pour réduire en complexité computationnel, les auteurs suggèrent de mettre en cache
    ## les noyaux
    # Cache sur Kx
    KxKx <- matrix(0, Dx, Dx)
    for (m in 1:Px) {
        KxKx <- KxKx + tcrossprod(Kx[, , m], Kx[, , m])
    }
    Kx <- matrix(Kx, Dx, Nx * Px)
    
    # Cache sur Kz
    KzKz <- matrix(0, Dz, Dz)
    for (n in 1:Pz) {
        KzKz <- KzKz + tcrossprod(Kz[, , n], Kz[, , n])
    }
    Kz <- matrix(Kz, Dz, Nz * Pz)
    
    
    # Entrainement du modèle (variationel)
    for (iter in 1:parameters$iteration) {
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (Kx %*% matrix(Gx$mu[s, , ], Nx * Px, 
                1)/sigma_g^2)
        }
        # update Gx
        for (m in 1:Px) {
            Gx$sigma[, , m] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ex$mu[m] * 
                ex$mu[m] + ex$sigma[m, m])/sigma_h^2, R, R)))
            Gx$mu[, , m] <- crossprod(Ax$mu, Kx[, ((m - 1) * Nx + 1):(m * Nx)])/sigma_g^2 + 
                ex$mu[m] * Hx$mu/sigma_h^2
            for (o in setdiff(1:Px, m)) {
                Gx$mu[, , m] <- Gx$mu[, , m] - (ex$mu[m] * ex$mu[o] + ex$sigma[m, 
                  o]) * Gx$mu[, , o]/sigma_h^2
            }
            Gx$mu[, , m] <- Gx$sigma[, , m] %*% Gx$mu[, , m]
        }
        # update etax
        etax$beta <- 1/(1/parameters$beta_eta + 0.5 * (ex$mu^2 + diag(ex$sigma)))
        
        # update ex
        ex$sigma <- diag(as.vector(etax$alpha * etax$beta))
        for (m in 1:Px) {
            for (o in 1:Px) {
                ex$sigma[m, o] <- ex$sigma[m, o] + (sum(Gx$mu[, , m] * Gx$mu[, , 
                  o]) + (m == o) * Nx * sum(diag(Gx$sigma[, , m])))/sigma_h^2
            }
        }
        
        ex$sigma <- chol2inv(chol(ex$sigma))
        for (m in 1:Px) {
            ex$mu[m] <- sum(Gx$mu[, , m] * Hx$mu)/sigma_h^2
        }
        ex$mu <- ex$sigma %*% ex$mu
        
        # update Hx
        Hx$sigma <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Hz$mu, Hz$mu) + 
            Nz * Hz$sigma)/sigma_y^2))
        
        Hz$mu
        Hx$mu <- tcrossprod(Hz$mu, Y)/sigma_y^2
        for (m in 1:Px) {
            Hx$mu <- Hx$mu + ex$mu[m] * Gx$mu[, , m]/sigma_h^2
        }
        
        Hx$mu <- Hx$sigma %*% Hx$mu
        
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        # update Az
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (Kz %*% matrix(Gz$mu[s, , ], Nz * Pz, 
                1)/sigma_g^2)
        }
        # update Gz
        for (n in 1:Pz) {
            Gz$sigma[, , n] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ez$mu[n] * 
                ez$mu[n] + ez$sigma[n, n])/sigma_h^2, R, R)))
            Gz$mu[, , n] <- crossprod(Az$mu, Kz[, ((n - 1) * Nz + 1):(n * Nz)])/sigma_g^2 + 
                ez$mu[n] * Hz$mu/sigma_h^2
            for (p in setdiff(1:Pz, n)) {
                Gz$mu[, , n] <- Gz$mu[, , n] - (ez$mu[n] * ez$mu[p] + ez$sigma[n, 
                  p]) * Gz$mu[, , p]/sigma_h^2
            }
            Gz$mu[, , n] <- Gz$sigma[, , n] %*% Gz$mu[, , n]
        }
        # update etaz
        etaz$beta <- 1/(1/parameters$beta_eta + 0.5 * (ez$mu^2 + diag(ez$sigma)))
        # update ez
        ez$sigma <- diag(as.vector(etaz$alpha * etaz$beta))
        for (n in 1:Pz) {
            for (p in 1:Pz) {
                ez$sigma[n, p] <- ez$sigma[n, p] + (sum(Gz$mu[, , n] * Gz$mu[, , 
                  p]) + (n == p) * Nz * sum(diag(Gz$sigma[, , n])))/sigma_h^2
            }
        }
        ez$sigma <- chol2inv(chol(ez$sigma))
        for (n in 1:Pz) {
            ez$mu[n] <- sum(Gz$mu[, , n] * Hz$mu)/sigma_h^2
        }
        ez$mu <- ez$sigma %*% ez$mu
        # update Hz
        Hz$sigma <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Hx$mu, Hx$mu) + 
            Nx * Hx$sigma)/sigma_y^2))
        Hz$mu <- Hx$mu %*% Y/sigma_y^2
        for (n in 1:Pz) {
            Hz$mu <- Hz$mu + ez$mu[n] * Gz$mu[, , n]/sigma_h^2
        }
        Hz$mu <- Hz$sigma %*% Hz$mu
    }
    
    state <- list(Lambdax = Lambdax, Ax = Ax, etax = etax, ex = ex, Lambdaz = Lambdaz, 
        Az = Az, etaz = etaz, ez = ez, parameters = parameters)
}


# S'il manque des Y dans la matrice, le modèle est en mode « semisupervisé »
kbmf1mkl1mkl_semisupervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    
    set.seed(parameters$seed)
    # Initialisation des paramètres
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- parameters$R
    sigma_g <- parameters$sigma_g
    sigma_h <- parameters$sigma_h
    sigma_y <- parameters$sigma_y
    
    
    # Lambda X
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    Gx <- list(mu = array(rnorm(R * Nx * Px), c(R, Nx, Px)), sigma = array(diag(1, 
        R, R), c(R, R, Px)))
    etax <- list(alpha = matrix(parameters$alpha_eta + 0.5, Px, 1), beta = matrix(parameters$beta_eta, 
        Px, 1))
    ex <- list(mu = matrix(1, Px, 1), sigma = diag(1, Px, Px))
    Hx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = array(diag(1, R, R), c(R, 
        R, Nx)))
    
    # Lambda Z
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    Gz <- list(mu = array(rnorm(R * Nz * Pz), c(R, Nz, Pz)), sigma = array(diag(1, 
        R, R), c(R, R, Pz)))
    etaz <- list(alpha = matrix(parameters$alpha_eta + 0.5, Pz, 1), beta = matrix(parameters$beta_eta, 
        Pz, 1))
    ez <- list(mu = matrix(1, Pz, 1), sigma = diag(1, Pz, Pz))
    Hz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = array(diag(1, R, R), c(R, 
        R, Nz)))
    
    # Kernel pour X
    KxKx <- matrix(0, Dx, Dx)
    for (m in 1:Px) {
        KxKx <- KxKx + tcrossprod(Kx[, , m], Kx[, , m])
    }
    Kx <- matrix(Kx, Dx, Nx * Px)
    
    # Kernel pour Z
    KzKz <- matrix(0, Dz, Dz)
    for (n in 1:Pz) {
        KzKz <- KzKz + tcrossprod(Kz[, , n], Kz[, , n])
    }
    Kz <- matrix(Kz, Dz, Nz * Pz)
    
    # Mise à jour des paramètres
    for (iter in 1:parameters$iteration) {
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (Kx %*% matrix(Gx$mu[s, , ], Nx * Px, 
                1)/sigma_g^2)
        }
        # update Gx
        for (m in 1:Px) {
            Gx$sigma[, , m] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ex$mu[m] * 
                ex$mu[m] + ex$sigma[m, m])/sigma_h^2, R, R)))
            Gx$mu[, , m] <- crossprod(Ax$mu, Kx[, ((m - 1) * Nx + 1):(m * Nx)])/sigma_g^2 + 
                ex$mu[m] * Hx$mu/sigma_h^2
            for (o in setdiff(1:Px, m)) {
                Gx$mu[, , m] <- Gx$mu[, , m] - (ex$mu[m] * ex$mu[o] + ex$sigma[m, 
                  o]) * Gx$mu[, , o]/sigma_h^2
            }
            Gx$mu[, , m] <- Gx$sigma[, , m] %*% Gx$mu[, , m]
        }
        # update etax
        etax$beta <- 1/(1/parameters$beta_eta + 0.5 * (ex$mu^2 + diag(ex$sigma)))
        # update ex
        ex$sigma <- diag(as.vector(etax$alpha * etax$beta))
        for (m in 1:Px) {
            for (o in 1:Px) {
                ex$sigma[m, o] <- ex$sigma[m, o] + (sum(Gx$mu[, , m] * Gx$mu[, , 
                  o]) + (m == o) * Nx * sum(diag(Gx$sigma[, , m])))/sigma_h^2
            }
        }
        ex$sigma <- chol2inv(chol(ex$sigma))
        for (m in 1:Px) {
            ex$mu[m] <- sum(Gx$mu[, , m] * Hx$mu)/sigma_h^2
        }
        ex$mu <- ex$sigma %*% ex$mu
        # update Hx
        for (i in 1:Nx) {
            indices <- which(is.na(Y[i, ]) == FALSE)
            Hx$sigma[, , i] <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Hz$mu[, 
                indices, drop = FALSE], Hz$mu[, indices, drop = FALSE]) + apply(Hz$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Hx$mu[, i] <- tcrossprod(Hz$mu[, indices, drop = FALSE], Y[i, indices, 
                drop = FALSE])/sigma_y^2
            for (m in 1:Px) {
                Hx$mu[, i] <- Hx$mu[, i] + ex$mu[m] * Gx$mu[, i, m]/sigma_h^2
            }
            Hx$mu[, i] <- Hx$sigma[, , i] %*% Hx$mu[, i]
        }
        
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        # update Az
        
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (Kz %*% matrix(Gz$mu[s, , ], Nz * Pz, 
                1)/sigma_g^2)
        }
        # update Gz
        for (n in 1:Pz) {
            Gz$sigma[, , n] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ez$mu[n] * 
                ez$mu[n] + ez$sigma[n, n])/sigma_h^2, R, R)))
            Gz$mu[, , n] <- crossprod(Az$mu, Kz[, ((n - 1) * Nz + 1):(n * Nz)])/sigma_g^2 + 
                ez$mu[n] * Hz$mu/sigma_h^2
            for (p in setdiff(1:Pz, n)) {
                Gz$mu[, , n] <- Gz$mu[, , n] - (ez$mu[n] * ez$mu[p] + ez$sigma[n, 
                  p]) * Gz$mu[, , p]/sigma_h^2
            }
            Gz$mu[, , n] <- Gz$sigma[, , n] %*% Gz$mu[, , n]
        }
        # update etaz
        etaz$beta <- 1/(1/parameters$beta_eta + 0.5 * (ez$mu^2 + diag(ez$sigma)))
        # update ez
        ez$sigma <- diag(as.vector(etaz$alpha * etaz$beta))
        for (n in 1:Pz) {
            for (p in 1:Pz) {
                ez$sigma[n, p] <- ez$sigma[n, p] + (sum(Gz$mu[, , n] * Gz$mu[, , 
                  p]) + (n == p) * Nz * sum(diag(Gz$sigma[, , n])))/sigma_h^2
            }
        }
        ez$sigma <- chol2inv(chol(ez$sigma))
        for (n in 1:Pz) {
            ez$mu[n] <- sum(Gz$mu[, , n] * Hz$mu)/sigma_h^2
        }
        ez$mu <- ez$sigma %*% ez$mu
        # update Hz
        for (j in 1:Nz) {
            indices <- which(is.na(Y[, j]) == FALSE)
            Hz$sigma[, , j] <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Hx$mu[, 
                indices, drop = FALSE], Hx$mu[, indices, drop = FALSE]) + apply(Hx$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Hz$mu[, j] <- Hx$mu[, indices, drop = FALSE] %*% Y[indices, j, drop = FALSE]/sigma_y^2
            for (n in 1:Pz) {
                Hz$mu[, j] <- Hz$mu[, j] + ez$mu[n] * Gz$mu[, j, n]/sigma_h^2
            }
            Hz$mu[, j] <- Hz$sigma[, , j] %*% Hz$mu[, j]
        }
    }
    # ?
    state <- list(Lambdax = Lambdax, Ax = Ax, etax = etax, ex = ex, Lambdaz = Lambdaz, 
        Az = Az, etaz = etaz, ez = ez, parameters = parameters)
}

**Fonctions de test du modèle**

In [None]:
# Mode supervisé
kbmf1mkl1mkl_supervised_regression_variational_test <- function(Kx, Kz, state) {
    
    # initialisation des paramètres
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- dim(state$Ax$mu)[2]
    
    Gx <- list(mu = array(0, c(R, Nx, Px)))
    for (m in 1:Px) {
        Gx$mu[, , m] <- crossprod(state$Ax$mu, Kx[, , m])
    }
    
    Hx <- list(mu = matrix(0, R, Nx))
    for (m in 1:Px) {
        Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[, , m]
    }
    
    Gz <- list(mu = array(0, c(R, Nz, Pz)))
    for (n in 1:Pz) {
        Gz$mu[, , n] <- crossprod(state$Az$mu, Kz[, , n])
    }
    
    Hz <- list(mu = matrix(0, R, Nz))
    for (n in 1:Pz) {
        Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[, , n]
    }
    
    Y <- list(mu = crossprod(Hx$mu, Hz$mu))
    
    prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, Y = Y)
}

In [None]:
# Mode semisupervisé
kbmf1mkl1mkl_semisupervised_regression_variational_test <- function(Kx, Kz, state) {
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- dim(state$Ax$mu)[2]
    
    Gx <- list(mu = array(0, c(R, Nx, Px)))
    for (m in 1:Px) {
        Gx$mu[, , m] <- crossprod(state$Ax$mu, Kx[, , m])
    }
    Hx <- list(mu = matrix(0, R, Nx))
    for (m in 1:Px) {
        Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[, , m]
    }
    
    Gz <- list(mu = array(0, c(R, Nz, Pz)))
    for (n in 1:Pz) {
        Gz$mu[, , n] <- crossprod(state$Az$mu, Kz[, , n])
    }
    Hz <- list(mu = matrix(0, R, Nz))
    for (n in 1:Pz) {
        Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[, , n]
    }
    
    Y <- list(mu = crossprod(Hx$mu, Hz$mu))
    
    prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, Y = Y)
}

## Multiples noyaux sur U, un noyau sur V

In [None]:
# S'il manque des Y dans la matrice, le modèle est en mode « semisupervisé »
kbmf1mkl1k_supervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    set.seed(parameters$seed)
    # Initialisation des paramètres
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    R <- parameters$R
    sigma_g <- parameters$sigma_g
    sigma_h <- parameters$sigma_h
    sigma_y <- parameters$sigma_y
    
    # Lambda X
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    Gx <- list(mu = array(rnorm(R * Nx * Px), c(R, Nx, Px)), sigma = array(diag(1, 
        R, R), c(R, R, Px)))
    #######
    etax <- list(alpha = matrix(parameters$alpha_eta + 0.5, Px, 1), beta = matrix(parameters$beta_eta, 
        Px, 1))
    ex <- list(mu = matrix(1, Px, 1), sigma = diag(1, Px, Px))
    Hx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = diag(1, R, R))
    
    # Lambda Z
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    Gz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = diag(1, R, R))
    
    # Mise en cache des noyaux
    KxKx <- matrix(0, Dx, Dx)
    for (m in 1:Px) {
        KxKx <- KxKx + tcrossprod(Kx[, , m], Kx[, , m])
    }
    Kx <- matrix(Kx, Dx, Nx * Px)
    
    KzKz <- tcrossprod(Kz, Kz)
    
    # Mise à jour des paramètres
    for (iter in 1:parameters$iteration) {
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (Kx %*% matrix(Gx$mu[s, , ], Nx * Px, 
                1)/sigma_g^2)
        }
        # update Gx
        for (m in 1:Px) {
            Gx$sigma[, , m] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ex$mu[m] * 
                ex$mu[m] + ex$sigma[m, m])/sigma_h^2, R, R)))
            Gx$mu[, , m] <- crossprod(Ax$mu, Kx[, ((m - 1) * Nx + 1):(m * Nx)])/sigma_g^2 + 
                ex$mu[m] * Hx$mu/sigma_h^2
            for (o in setdiff(1:Px, m)) {
                Gx$mu[, , m] <- Gx$mu[, , m] - (ex$mu[m] * ex$mu[o] + ex$sigma[m, 
                  o]) * Gx$mu[, , o]/sigma_h^2
            }
            Gx$mu[, , m] <- Gx$sigma[, , m] %*% Gx$mu[, , m]
        }
        
        # update etax
        etax$beta <- 1/(1/parameters$beta_eta + 0.5 * (ex$mu^2 + diag(ex$sigma)))
        
        # update ex
        ex$sigma <- diag(as.vector(etax$alpha * etax$beta))
        for (m in 1:Px) {
            for (o in 1:Px) {
                ex$sigma[m, o] <- ex$sigma[m, o] + (sum(Gx$mu[, , m] * Gx$mu[, , 
                  o]) + (m == o) * Nx * sum(diag(Gx$sigma[, , m])))/sigma_h^2
            }
        }
        ex$sigma <- chol2inv(chol(ex$sigma))
        for (m in 1:Px) {
            ex$mu[m] <- sum(Gx$mu[, , m] * Hx$mu)/sigma_h^2
        }
        ex$mu <- ex$sigma %*% ex$mu
        
        # update Hx
        Hx$sigma <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Gz$mu, Gz$mu) + 
            Nz * Gz$sigma)/sigma_y^2))
        Hx$mu <- tcrossprod(Gz$mu, Y)/sigma_y^2
        for (m in 1:Px) {
            Hx$mu <- Hx$mu + ex$mu[m] * Gx$mu[, , m]/sigma_h^2
        }
        Hx$mu <- Hx$sigma %*% Hx$mu
        
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        
        # update Az
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (tcrossprod(Kz, Gz$mu[s, , drop = FALSE])/sigma_g^2)
        }
        
        # update Gz
        Gz$sigma <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Hx$mu, Hx$mu) + 
            Nx * Hx$sigma)/sigma_y^2))
        Gz$mu <- Gz$sigma %*% (crossprod(Az$mu, Kz)/sigma_g^2 + Hx$mu %*% Y/sigma_y^2)
    }
    
    state <- list(Lambdax = Lambdax, Ax = Ax, etax = etax, ex = ex, Lambdaz = Lambdaz, 
        Az = Az, parameters = parameters)
}



In [None]:
# S'il manque des Y dans la matrice, le modèle est en mode « semisupervisé »
kbmf1mkl1k_semisupervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    set.seed(parameters$seed)
    # Initialisation des paramètres
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    R <- parameters$R
    sigma_g <- parameters$sigma_g
    sigma_h <- parameters$sigma_h
    sigma_y <- parameters$sigma_y
    
    # Lambda X
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    Gx <- list(mu = array(rnorm(R * Nx * Px), c(R, Nx, Px)), sigma = array(diag(1, 
        R, R), c(R, R, Px)))
    etax <- list(alpha = matrix(parameters$alpha_eta + 0.5, Px, 1), beta = matrix(parameters$beta_eta, 
        Px, 1))
    ex <- list(mu = matrix(1, Px, 1), sigma = diag(1, Px, Px))
    Hx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = array(diag(1, R, R), c(R, 
        R, Nx)))
    
    # Lambda Z
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    Gz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = array(diag(1, R, R), c(R, 
        R, Nz)))
    
    # Mise en cache des noyaux
    KxKx <- matrix(0, Dx, Dx)
    for (m in 1:Px) {
        KxKx <- KxKx + tcrossprod(Kx[, , m], Kx[, , m])
    }
    Kx <- matrix(Kx, Dx, Nx * Px)
    
    KzKz <- tcrossprod(Kz, Kz)
    
    # Mise à jour des paramètres
    for (iter in 1:parameters$iteration) {
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (Kx %*% matrix(Gx$mu[s, , ], Nx * Px, 
                1)/sigma_g^2)
        }
        # update Gx
        for (m in 1:Px) {
            Gx$sigma[, , m] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ex$mu[m] * 
                ex$mu[m] + ex$sigma[m, m])/sigma_h^2, R, R)))
            Gx$mu[, , m] <- crossprod(Ax$mu, Kx[, ((m - 1) * Nx + 1):(m * Nx)])/sigma_g^2 + 
                ex$mu[m] * Hx$mu/sigma_h^2
            for (o in setdiff(1:Px, m)) {
                Gx$mu[, , m] <- Gx$mu[, , m] - (ex$mu[m] * ex$mu[o] + ex$sigma[m, 
                  o]) * Gx$mu[, , o]/sigma_h^2
            }
            Gx$mu[, , m] <- Gx$sigma[, , m] %*% Gx$mu[, , m]
        }
        # update etax
        etax$beta <- 1/(1/parameters$beta_eta + 0.5 * (ex$mu^2 + diag(ex$sigma)))
        # update ex
        ex$sigma <- diag(as.vector(etax$alpha * etax$beta))
        for (m in 1:Px) {
            for (o in 1:Px) {
                ex$sigma[m, o] <- ex$sigma[m, o] + (sum(Gx$mu[, , m] * Gx$mu[, , 
                  o]) + (m == o) * Nx * sum(diag(Gx$sigma[, , m])))/sigma_h^2
            }
        }
        ex$sigma <- chol2inv(chol(ex$sigma))
        for (m in 1:Px) {
            ex$mu[m] <- sum(Gx$mu[, , m] * Hx$mu)/sigma_h^2
        }
        ex$mu <- ex$sigma %*% ex$mu
        # update Hx
        for (i in 1:Nx) {
            indices <- which(is.na(Y[i, ]) == FALSE)
            Hx$sigma[, , i] <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Gz$mu[, 
                indices, drop = FALSE], Gz$mu[, indices, drop = FALSE]) + apply(Gz$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Hx$mu[, i] <- tcrossprod(Gz$mu[, indices, drop = FALSE], Y[i, indices, 
                drop = FALSE])/sigma_y^2
            for (m in 1:Px) {
                Hx$mu[, i] <- Hx$mu[, i] + ex$mu[m] * Gx$mu[, i, m]/sigma_h^2
            }
            Hx$mu[, i] <- Hx$sigma[, , i] %*% Hx$mu[, i]
        }
        
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        # update Az
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (tcrossprod(Kz, Gz$mu[s, , drop = FALSE])/sigma_g^2)
        }
        # update Gz
        for (j in 1:Nz) {
            indices <- which(is.na(Y[, j]) == FALSE)
            Gz$sigma[, , j] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Hx$mu[, 
                indices, drop = FALSE], Hx$mu[, indices, drop = FALSE]) + apply(Hx$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Gz$mu[, j] <- Gz$sigma[, , j] %*% (crossprod(Az$mu, Kz[, j])/sigma_g^2 + 
                Hx$mu[, indices, drop = FALSE] %*% Y[indices, j, drop = FALSE]/sigma_y^2)
        }
    }
    
    state <- list(Lambdax = Lambdax, Ax = Ax, etax = etax, ex = ex, Lambdaz = Lambdaz, 
        Az = Az, parameters = parameters)
}

**Fonction de test**

In [None]:
# Mode supervisé
kbmf1mkl1k_supervised_regression_variational_test <- function(Kx, Kz, state) {
    # Initialisation des paramètres
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Nz <- dim(Kz)[2]
    R <- dim(state$Ax$mu)[2]
    
    Gx <- list(mu = array(0, c(R, Nx, Px)))
    for (m in 1:Px) {
        Gx$mu[, , m] <- crossprod(state$Ax$mu, Kx[, , m])
    }
    Hx <- list(mu = matrix(0, R, Nx))
    for (m in 1:Px) {
        Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[, , m]
    }
    
    Gz <- list(mu = crossprod(state$Az$mu, Kz))
    
    Y <- list(mu = crossprod(Hx$mu, Gz$mu))
    
    prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Y = Y)
}



In [None]:
# Mode semisupervisé
kbmf1mkl1k_semisupervised_regression_variational_test <- function(Kx, Kz, state) {
    # Initialisation des paramètres
    Nx <- dim(Kx)[2]
    Px <- dim(Kx)[3]
    Nz <- dim(Kz)[2]
    R <- dim(state$Ax$mu)[2]
    
    Gx <- list(mu = array(0, c(R, Nx, Px)))
    for (m in 1:Px) {
        Gx$mu[, , m] <- crossprod(state$Ax$mu, Kx[, , m])
    }
    Hx <- list(mu = matrix(0, R, Nx))
    for (m in 1:Px) {
        Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[, , m]
    }
    
    Gz <- list(mu = crossprod(state$Az$mu, Kz))
    
    Y <- list(mu = crossprod(Hx$mu, Gz$mu))
    
    prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Y = Y)
}

## Un noyau sur U, multiples noyaux sur V

In [None]:
# Supervisé
kbmf1k1mkl_supervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    set.seed(parameters$seed)
    # Initialisation des paramètres
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- parameters$R
    sigma_g <- parameters$sigma_g
    sigma_h <- parameters$sigma_h
    sigma_y <- parameters$sigma_y
    
    # Lambda X
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    Gx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = diag(1, R, R))
    
    # Lambda Z
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    Gz <- list(mu = array(rnorm(R * Nz * Pz), c(R, Nz, Pz)), sigma = array(diag(1, 
        R, R), c(R, R, Pz)))
    etaz <- list(alpha = matrix(parameters$alpha_eta + 0.5, Pz, 1), beta = matrix(parameters$beta_eta, 
        Pz, 1))
    ez <- list(mu = matrix(1, Pz, 1), sigma = diag(1, Pz, Pz))
    Hz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = diag(1, R, R))
    
    # Kernels
        # Kernels X
    KxKx <- tcrossprod(Kx, Kx)
        # Kernels Z
    KzKz <- matrix(0, Dz, Dz)
    for (n in 1:Pz) {
        KzKz <- KzKz + tcrossprod(Kz[, , n], Kz[, , n])
    }
    Kz <- matrix(Kz, Dz, Nz * Pz)
    
    # Mise à jour des paramètres
    for (iter in 1:parameters$iteration) {
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (tcrossprod(Kx, Gx$mu[s, , drop = FALSE])/sigma_g^2)
        }
        # update Gx
        Gx$sigma <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Hz$mu, Hz$mu) + 
            Nz * Hz$sigma)/sigma_y^2))
        Gx$mu <- Gx$sigma %*% (crossprod(Ax$mu, Kx)/sigma_g^2 + tcrossprod(Hz$mu, 
            Y)/sigma_y^2)
        
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        # update Az
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (Kz %*% matrix(Gz$mu[s, , ], Nz * Pz, 
                1)/sigma_g^2)
        }
        # update Gz
        for (n in 1:Pz) {
            Gz$sigma[, , n] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ez$mu[n] * 
                ez$mu[n] + ez$sigma[n, n])/sigma_h^2, R, R)))
            Gz$mu[, , n] <- crossprod(Az$mu, Kz[, ((n - 1) * Nz + 1):(n * Nz)])/sigma_g^2 + 
                ez$mu[n] * Hz$mu/sigma_h^2
            for (p in setdiff(1:Pz, n)) {
                Gz$mu[, , n] <- Gz$mu[, , n] - (ez$mu[n] * ez$mu[p] + ez$sigma[n, 
                  p]) * Gz$mu[, , p]/sigma_h^2
            }
            Gz$mu[, , n] <- Gz$sigma[, , n] %*% Gz$mu[, , n]
        }
        # update etaz
        etaz$beta <- 1/(1/parameters$beta_eta + 0.5 * (ez$mu^2 + diag(ez$sigma)))
        # update ez
        ez$sigma <- diag(as.vector(etaz$alpha * etaz$beta))
        for (n in 1:Pz) {
            for (p in 1:Pz) {
                ez$sigma[n, p] <- ez$sigma[n, p] + (sum(Gz$mu[, , n] * Gz$mu[, , 
                  p]) + (n == p) * Nz * sum(diag(Gz$sigma[, , n])))/sigma_h^2
            }
        }
        ez$sigma <- chol2inv(chol(ez$sigma))
        for (n in 1:Pz) {
            ez$mu[n] <- sum(Gz$mu[, , n] * Hz$mu)/sigma_h^2
        }
        ez$mu <- ez$sigma %*% ez$mu
        # update Hz
        Hz$sigma <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Gx$mu, Gx$mu) + 
            Nx * Gx$sigma)/sigma_y^2))
        Hz$mu <- Gx$mu %*% Y/sigma_y^2
        for (n in 1:Pz) {
            Hz$mu <- Hz$mu + ez$mu[n] * Gz$mu[, , n]/sigma_h^2
        }
        Hz$mu <- Hz$sigma %*% Hz$mu
    }
    
    state <- list(Lambdax = Lambdax, Ax = Ax, Lambdaz = Lambdaz, Az = Az, etaz = etaz, 
        ez = ez, parameters = parameters)
}



In [None]:
kbmf1k1mkl_semisupervised_regression_variational_train <- function(Kx, Kz, Y, parameters) {
    set.seed(parameters$seed)
    # Initialisation des paramètres
    Dx <- dim(Kx)[1]
    Nx <- dim(Kx)[2]
    Dz <- dim(Kz)[1]
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- parameters$R
    sigma_g <- parameters$sigma_g
    sigma_h <- parameters$sigma_h
    sigma_y <- parameters$sigma_y
    
    # Lambda X
    Lambdax <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dx, R), beta = matrix(parameters$beta_lambda, 
        Dx, R))
    Ax <- list(mu = matrix(rnorm(Dx * R), Dx, R), sigma = array(diag(1, Dx, Dx), 
        c(Dx, Dx, R)))
    Gx <- list(mu = matrix(rnorm(R * Nx), R, Nx), sigma = array(diag(1, R, R), c(R, 
        R, Nx)))
    
    # Lambda Z
    Lambdaz <- list(alpha = matrix(parameters$alpha_lambda + 0.5, Dz, R), beta = matrix(parameters$beta_lambda, 
        Dz, R))
    Az <- list(mu = matrix(rnorm(Dz * R), Dz, R), sigma = array(diag(1, Dz, Dz), 
        c(Dz, Dz, R)))
    Gz <- list(mu = array(rnorm(R * Nz * Pz), c(R, Nz, Pz)), sigma = array(diag(1, 
        R, R), c(R, R, Pz)))
    etaz <- list(alpha = matrix(parameters$alpha_eta + 0.5, Pz, 1), beta = matrix(parameters$beta_eta, 
        Pz, 1))
    ez <- list(mu = matrix(1, Pz, 1), sigma = diag(1, Pz, Pz))
    Hz <- list(mu = matrix(rnorm(R * Nz), R, Nz), sigma = array(diag(1, R, R), c(R, 
        R, Nz)))
    
    # Mise en cache des kernels
    KxKx <- tcrossprod(Kx, Kx)
    
    KzKz <- matrix(0, Dz, Dz)
    for (n in 1:Pz) {
        KzKz <- KzKz + tcrossprod(Kz[, , n], Kz[, , n])
    }
    Kz <- matrix(Kz, Dz, Nz * Pz)
    
    # Mise à jour des paramètres
    for (iter in 1:parameters$iteration) {
        # update Lambdax
        for (s in 1:R) {
            Lambdax$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Ax$mu[, s]^2 + 
                diag(Ax$sigma[, , s])))
        }
        # update Ax
        for (s in 1:R) {
            Ax$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdax$alpha[, s] * 
                Lambdax$beta[, s]), Dx, Dx) + KxKx/sigma_g^2))
            Ax$mu[, s] <- Ax$sigma[, , s] %*% (tcrossprod(Kx, Gx$mu[s, , drop = FALSE])/sigma_g^2)
        }
        # update Gx
        for (i in 1:Nx) {
            indices <- which(is.na(Y[i, ]) == FALSE)
            Gx$sigma[, , i] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + (tcrossprod(Hz$mu[, 
                indices, drop = FALSE], Hz$mu[, indices, drop = FALSE]) + apply(Hz$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Gx$mu[, i] <- Gx$sigma[, , i] %*% (crossprod(Ax$mu, Kx[, i])/sigma_g^2 + 
                tcrossprod(Hz$mu[, indices, drop = FALSE], Y[i, indices, drop = FALSE])/sigma_y^2)
        }
        
        # update Lambdaz
        for (s in 1:R) {
            Lambdaz$beta[, s] <- 1/(1/parameters$beta_lambda + 0.5 * (Az$mu[, s]^2 + 
                diag(Az$sigma[, , s])))
        }
        # update Az
        for (s in 1:R) {
            Az$sigma[, , s] <- chol2inv(chol(diag(as.vector(Lambdaz$alpha[, s] * 
                Lambdaz$beta[, s]), Dz, Dz) + KzKz/sigma_g^2))
            Az$mu[, s] <- Az$sigma[, , s] %*% (Kz %*% matrix(Gz$mu[s, , ], Nz * Pz, 
                1)/sigma_g^2)
        }
        # update Gz
        for (n in 1:Pz) {
            Gz$sigma[, , n] <- chol2inv(chol(diag(1/sigma_g^2, R, R) + diag((ez$mu[n] * 
                ez$mu[n] + ez$sigma[n, n])/sigma_h^2, R, R)))
            Gz$mu[, , n] <- crossprod(Az$mu, Kz[, ((n - 1) * Nz + 1):(n * Nz)])/sigma_g^2 + 
                ez$mu[n] * Hz$mu/sigma_h^2
            for (p in setdiff(1:Pz, n)) {
                Gz$mu[, , n] <- Gz$mu[, , n] - (ez$mu[n] * ez$mu[p] + ez$sigma[n, 
                  p]) * Gz$mu[, , p]/sigma_h^2
            }
            Gz$mu[, , n] <- Gz$sigma[, , n] %*% Gz$mu[, , n]
        }
        # update etaz
        etaz$beta <- 1/(1/parameters$beta_eta + 0.5 * (ez$mu^2 + diag(ez$sigma)))
        # update ez
        ez$sigma <- diag(as.vector(etaz$alpha * etaz$beta))
        for (n in 1:Pz) {
            for (p in 1:Pz) {
                ez$sigma[n, p] <- ez$sigma[n, p] + (sum(Gz$mu[, , n] * Gz$mu[, , 
                  p]) + (n == p) * Nz * sum(diag(Gz$sigma[, , n])))/sigma_h^2
            }
        }
        ez$sigma <- chol2inv(chol(ez$sigma))
        for (n in 1:Pz) {
            ez$mu[n] <- sum(Gz$mu[, , n] * Hz$mu)/sigma_h^2
        }
        ez$mu <- ez$sigma %*% ez$mu
        # update Hz
        for (j in 1:Nz) {
            indices <- which(is.na(Y[, j]) == FALSE)
            Hz$sigma[, , j] <- chol2inv(chol(diag(1/sigma_h^2, R, R) + (tcrossprod(Gx$mu[, 
                indices, drop = FALSE], Gx$mu[, indices, drop = FALSE]) + apply(Gx$sigma[, 
                , indices, drop = FALSE], 1:2, sum))/sigma_y^2))
            Hz$mu[, j] <- Gx$mu[, indices, drop = FALSE] %*% Y[indices, j, drop = FALSE]/sigma_y^2
            for (n in 1:Pz) {
                Hz$mu[, j] <- Hz$mu[, j] + ez$mu[n] * Gz$mu[, j, n]/sigma_h^2
            }
            Hz$mu[, j] <- Hz$sigma[, , j] %*% Hz$mu[, j]
        }
    }
    
    state <- list(Lambdax = Lambdax, Ax = Ax, Lambdaz = Lambdaz, Az = Az, etaz = etaz, 
        ez = ez, parameters = parameters)
}

In [None]:
# Mode supervisé
kbmf1k1mkl_supervised_regression_variational_test <- function(Kx, Kz, state) {
    # Initialisation des paramètres
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- dim(state$Ax$mu)[2]
    
    Gx <- list(mu = crossprod(state$Ax$mu, Kx))
    
    Gz <- list(mu = array(0, c(R, Nz, Pz)))
    for (n in 1:Pz) {
      Gz$mu[, , n] <- crossprod(state$Az$mu, Kz[, , n])
    }
    Hz <- list(mu = matrix(0, R, Nz))
    for (n in 1:Pz) {
      Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[, , n]
    }
    
    Y <- list(mu = crossprod(Gx$mu, Hz$mu))
    
    prediction <- list(Gx = Gx,
                       Gz = Gz,
                       Hz = Hz,
                       Y = Y)
  }



In [None]:
# Mode semisupervisé
kbmf1k1mkl_semisupervised_regression_variational_test <- function(Kx, Kz, state) {
    # Initialisation des paramètres
    Nz <- dim(Kz)[2]
    Pz <- dim(Kz)[3]
    R <- dim(state$Ax$mu)[2]
    
    Gx <- list(mu = crossprod(state$Ax$mu, Kx))
    
    Gz <- list(mu = array(0, c(R, Nz, Pz)))
    for (n in 1:Pz) {
      Gz$mu[, , n] <- crossprod(state$Az$mu, Kz[, , n])
    }
    Hz <- list(mu = matrix(0, R, Nz))
    for (n in 1:Pz) {
      Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[, , n]
    }
    
    Y <- list(mu = crossprod(Gx$mu, Hz$mu))
    
    prediction <- list(Gx = Gx,
                       Gz = Gz,
                       Hz = Hz,
                       Y = Y)
  }

## Entrainement du modèle

*Le modèle de régression est ici utilisé*

In [None]:
kbmf_regression_train <- function(Kx, Kz, Y, R, iter, varargin) {
    
    # ====================================================
    # Fonction d'entrainement du modèle
    # Paramètres en entrée  
    # Kx
    # Kz
    # Y
    # R
    # iter
    # varargin
    # 
    # Paramètres en sortie
    # 
    # 
    #
    # ====================================================
    
    
    # is_supervised <- all(!is.na(Y))
    is_supervised <- all(Y > 0)
    
    # Initialisation des paramètres
    Px <- dim(Kx)[3]
    Pz <- dim(Kz)[3]
    
    parameters <- list()
    
    parameters$alpha_lambda <- 1
    parameters$beta_lambda <- 1
    
    if (Px > 1 || Pz > 1) {
        parameters$alpha_eta <- 1
        parameters$beta_eta <- 1
    }
    parameters$iteration <- iter  # Automatiser dans la fonction
    parameters$progress <- 1  # Qu'est-ce que ça veut dire ? 
    parameters$R <- R  # c'est quoi ?
    parameters$seed <- 1606
    parameters$sigma_g <- 0.1  # ??
    
    if (Px > 1 || Pz > 1) {
        parameters$sigma_h <- 0.1
    }
    parameters$sigma_y <- 1
    
    # Choix du modèle (multi kernel learning pour X ou Z, kernel learning pour X ou Z)
    if (is_supervised == 1) {
        if (Px == 1 && Pz == 1) {
            train_function <- kbmf1k1k_supervised_regression_variational_train
            test_function <- kbmf1k1k_supervised_regression_variational_test
        }
        if (Px > 1 && Pz == 1) {
            train_function <- kbmf1mkl1k_supervised_regression_variational_train
            test_function <- kbmf1mkl1k_supervised_regression_variational_test
        }
        if (Px == 1 && Pz > 1) {
            train_function <- kbmf1k1mkl_supervised_regression_variational_train
            test_function <- kbmf1k1mkl_supervised_regression_variational_test
        }
        if (Px > 1 && Pz > 1) {
            train_function <- kbmf1mkl1mkl_supervised_regression_variational_train
            test_function <- kbmf1mkl1mkl_supervised_regression_variational_test
        }
    } else {
        if (Px == 1 && Pz == 1) {
            # print('kbmf1k1k')
            train_function <- kbmf1k1k_semisupervised_regression_variational_train
            test_function <- kbmf1k1k_semisupervised_regression_variational_test
        }
        if (Px > 1 && Pz == 1) {
            train_function <- kbmf1mkl1k_semisupervised_regression_variational_train
            test_function <- kbmf1mkl1k_semisupervised_regression_variational_test
        }
        if (Px == 1 && Pz > 1) {
            train_function <- kbmf1k1mkl_semisupervised_regression_variational_train
            test_function <- kbmf1k1mkl_semisupervised_regression_variational_test
        }
        if (Px > 1 && Pz > 1) {
            train_function <- kbmf1mkl1mkl_semisupervised_regression_variational_train
            test_function <- kbmf1mkl1mkl_semisupervised_regression_variational_test
        }
    }
    if (missing(varargin) == FALSE) {
        for (name in names(varargin)) {
            parameters[[name]] <- varargin[[name]]
        }
    }
    parameters$train_function <- train_function
    parameters$test_function <- test_function
    
    state <- train_function(drop(Kx), drop(Kz), Y, parameters)
}

In [None]:
kbmf_regression_test <- function(Kx, Kz, state) {
    # ====================================================
    # Fonction de test du modèle
    # Paramètres en entrée  
    # Kx
    # Kz
    # state
    # 
    # 
    # 
    # 
    # Paramètres en sortie
    # 
    # 
    #
    # ====================================================
    prediction <- state$parameters$test_function(drop(Kx), drop(Kz), state)
    }

In [None]:
set.seed(1606)

Px <- 1 # Dimensionalité de X
Nz <- dim(mat_dense)[2] # Nombre de maladies
Pz <- 1 # Dimensionalité de Z
Nz <- dim(mat_dense)[1] # Nombre de miRNA

In [None]:
X <- matrix(data = rnorm(Px * Nx), nrow = Px, ncol = Nx)
Z <- matrix(rnorm(Pz * Nz), Pz, Nz)
Y <- sparse_mat

Kx <- array(0, c(Nx, Nx, Px))
for (m in 1:Px) {
  Kx[, , m] <- crossprod(X[m, , drop = FALSE], X[m, , drop = FALSE])
}
Kx
Kz <- array(0, c(Nz, Nz, Pz))
for (n in 1:Pz) {
  Kz[, , n] <- crossprod(Z[n, , drop = FALSE], Z[n, , drop = FALSE])
}
Kz

In [None]:
# Entrainement
state <- kbmf_regression_train(Kx, Kz, Y, 5, iter = 20) # Le R est le rang??

In [None]:
# Test du modèle
prediction <- kbmf_regression_test(Kx, Kz, state)

# Présentation des résultats

In [None]:
print(sprintf("RMSE = %.4f", sqrt(mean((prediction$Y$mu - Y) ^ 2))))

In [None]:
print("kernel weights on X")
print(state$ex$mu)

In [None]:
print("kernel weights on Z")
print(state$ez$mu)