# À propos de ce livret



Recherches liées au développement de cette méthode : 
>Ruslan Salakhutdinov, Andriy Mnih (2008). **Probabilistic Matrix Factorization.**


La méthode implantés provient de ces sources : 
>[**Dépôt 1**](https://github.com/xinychen/transdim/blob/master/experiments/Imputation-PMF-Gdata.ipynb) *Version Python*  
>[**Dépôt 2**](https://github.com/kastnerkyle/School/blob/master/atpr/matrix_factorization.py) *Version Python*  
>[**Dépôt 3**](https://github.com/stxupengyu/Probabilistic-Matrix-Factorization) *Version R*  



**Note sur le cachier**
- Terminer le formatage du document
- Voir comment ajouter la modification des jeux de données dans la fonction pour le modèle

## Préparation de l'environnement pour l'utilisation

In [1]:
# Utilités
import os
import numpy as np
from numpy import linalg as LA

# Chargement des données
import scipy.io

# Barre de progression
from tqdm.auto import trange
from tqdm import tqdm_notebook, notebook

# Présentation des résultats
import matplotlib.pyplot as plot
import matplotlib.cm as cm

In [2]:
os.chdir("C:/Users/amass/OneDrive/02_Education/02_Maitrise/Cours/PROJET_MAITRISE/application")

# Données

## Chargement des données

In [3]:
# Chargement du tenseur de données
tensor = scipy.io.loadmat('data/Guangzhou-data-set/tensor.mat')
tensor = tensor['tensor']

# Chargement de la matrice aléatoire de données
random_matrix = scipy.io.loadmat('data/Guangzhou-data-set/random_matrix.mat')
random_matrix = random_matrix['random_matrix']

# Chargement du tenseur aléatoire de données
random_tensor = scipy.io.loadmat('data/Guangzhou-data-set/random_tensor.mat')
random_tensor = random_tensor['random_tensor']

print(
    "Tensor shape : {:} | Random matrix shape : {:} | Random tensor shape : {:}"
    .format(tensor.shape, random_matrix.shape, random_tensor.shape))

Tensor shape : (214, 61, 144) | Random matrix shape : (214, 61) | Random tensor shape : (214, 61, 144)


## Format des données

In [4]:
petit_donnees = True

# Paramètres du jeu de données tronqué
capteurs = 50
jours = 10
sequences = tensor.shape[2]

matrice = True

if petit_donnees == True:
    tensor = tensor[0:capteurs, 0:jours, 0:sequences]

    random_tensor = random_tensor[0:capteurs, 0:jours, 0:sequences]

print("The tensors shapes are : {:} | {:}".format(tensor.shape,
                                                  random_tensor.shape))

if matrice == True:
    dense_mat = tensor.reshape(
        [tensor.shape[0], tensor.shape[1] * tensor.shape[2]])

    print("The matrix shape is : {:}".format(dense_mat.shape))

The tensors shapes are : (50, 10, 144) | (50, 10, 144)
The matrix shape is : (50, 1440)


## Scénario de manquants

In [5]:
missing_rate = 0.4

random_missing = True

# Revoir le code pour que les deux choix puissent être respectés sur l'ensemble des matrices/ tenseurs
if random_missing == True:
    print("Random missing (RM) scenario")
    binary_mat = (np.round(random_tensor + 0.5 - missing_rate).reshape([
        random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]
    ]))
    print("\nBinary matrix shape : {:}".format(binary_mat.shape))
else:
    print("Non-random missing (NM) scenario")
    binary_tensor = np.zeros(tensor.shape)
    for i1 in range(tensor.shape[0]):
        for i2 in range(tensor.shape[1]):
            binary_tensor[i1, i2, :] = np.round(random_matrix[i1, i2] + 0.5 -
                                                missing_rate)
    binary_mat = binary_tensor.reshape([
        binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]
    ])
    print("\nBinary matrix shape : {:}".format(binary_mat.shape))

sparse_mat = np.multiply(dense_mat, binary_mat)

Random missing (RM) scenario

Binary matrix shape : (50, 1440)


# Modélisation

## Spécification du modèle

In [31]:
def PMF(sparse_mat, rang = 50, iterations = 2, learning_rate = .001, regularization_rate = .1):
    """
    Probabilistic matrix factorization
    
    Paramètres en entré
    
    Paramètres en sortie
    
    
    
    """
    
    mat_bin = np.asarray(sparse_mat > 0, dtype=np.int)
    
    sparse_mat_1d = np.ravel(sparse_mat)
    mat_moy = np.mean(sparse_mat_1d)
    mat_norm = sparse_mat - mat_moy
    
    N = mat_norm.shape[0]
    M = mat_norm.shape[1]
    U = np.random.randn(N, rang)
    V = np.random.randn(rang, M)
    
    #PMF using gradient descent as per paper
    #Probabilistic Matrix Factorization
    #R. Salakhutdinov, A. Minh
    
    for r in notebook.tqdm(range(iterations), desc = "Iterations"):
        for i in notebook.tqdm(range(N), desc = "Dimension N"):
            for j in notebook.tqdm(range(M), desc = "Dimension M"):
                if mat_bin[i, j] > 0:
                    e = mat_norm[i, j] - np.dot(U[i, :], V[:, j])
                    U[i, :] = U[i, :] + learning_rate * (e * V[:, j] - regularization_rate * U[i, :])
                    V[:, j] = V[:, j] + learning_rate * (e * U[i, :] - regularization_rate * V[:, j])
                    
    mat_imput = np.dot(U, V)
    mat_imput = mat_imput + mat_moy
    return mat_imput

## Entrainement du modèle

In [32]:
mat_imput = PMF(sparse_mat)

HBox(children=(FloatProgress(value=0.0, description='Iterations', max=2.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='Dimension N', max=50.0, style=ProgressStyle(description_w…

HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…





HBox(children=(FloatProgress(value=0.0, description='Dimension N', max=50.0, style=ProgressStyle(description_w…

HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Dimension M', max=1440.0, style=ProgressStyle(description…






# Présentation des résultats

In [33]:
def get_RMSE(A, A_):
    """
    A : Matrice originale
    A_ : Matrice imputée
    
    
    """
    A1d = np.ravel(A)
    A_1d = np.ravel(A_)
    e = np.mean((A1d - A_1d)**2)
    return np.sqrt(e)

In [34]:
def combine(A, A_):
    """
    Fonction qui combine le jeu de données avec des données manquantes et l'imputation 
    
    Paramètres en entrée
        A : Matrice avec des données manquantes
        A_ : Matrice avec des données imputées
    Paramètres en sortie
        out : Matrice complétée 
    
    """
    
    
    out = np.zeros(A.shape)
    N = A.shape[0]
    M = A.shape[1]
    for i in range(N):
        for j in range(M):
            if A[i, j] == 0:
                out[i, j] = A_[i, j]
            else:
                out[i, j] = A[i, j]
    return out

In [35]:
RMSE = get_RMSE(dense_mat, mat_imput)
RMSE

6.793514785243465

In [36]:
mat_combine = combine(sparse_mat, mat_imput)

get_RMSE(dense_mat, mat_combine)

5.193563772862957