# Intro

This implem

## Import matrix as dataframe

In [7]:
import pickle
import torch
import torch.nn as nn
import os
import numpy as np
import pandas as pd

## Model Implementation
I used pytorch because it has the same functionality while easily scalable and the capacity to run Nvidia GPUs(Thanks Daddy Jensen LOL).  
But this could be easily adopted into numpy.

This is PQ Factorization, an implementation of the matrix factorization algorithm using PyTorch. nn Module is used to define the model and the forward pass. `nn.Module` in pytorch is usually used to define a neural network model, but it can be used to define classical models as well. Usually, the main components of the model are forward pass, backward pass, and optimization.  
The forward pass is the computation of the output of the model given the input, and is implemented in the forward method of the nn.Module class. The backward pass is the computation of the gradients of the model parameters with respect to the loss, and is implemented in the backward method of the nn.Module class.
The optimization is the process of updating the model parameters using the gradients computed in the backward pass, and usually implemented using an optimizer such as SGD or Adam.

In [42]:
import torch
torch.manual_seed(0)

<torch._C.Generator at 0x10c226d50>

In [40]:
class PQFactorization(nn.Module):
	def __init__(self, target, latent_factors=2, processor="cpu"):
		super(PQFactorization, self).__init__()
		self.R = torch.tensor(target)
		self.latent_factors = latent_factors
		self.device = processor
		
	def forward(self, P, Q, processor="cpu"):
		if type(P) == np.ndarray:
			P = torch.tensor(P, device=processor)
		if type(Q) == np.ndarray:
			Q = torch.tensor(Q, device=processor)
			
		return torch.matmul(P, Q.T)
	


	def FrobeniousLoss(self,E,P, Q, beta):
		return 1/2* torch.norm(E - R, p='fro') + beta / 2 * torch.norm(P, p='fro') + torch.norm(Q, p='fro')
	
	def fit(self, epochs=1000, alpha=0.01, beta=0.02, patience=10, min_delta=0.001,  processor="cpu"):
		Pik = torch.randn(self.R.shape[0], self.latent_factors, device=self.device)
		Qkj = torch.randn(self.R.shape[1], self.latent_factors, device=self.device)
		best_loss = float('inf')
		counter = 0

		for epoch in range(epochs):
			error_matrix = self.forward(Pik, Qkj) - self.R
			loss = self.FrobeniousLoss(error_matrix, Pik, Qkj, beta)
			if loss < best_loss - min_delta:
				best_loss = loss
				counter = 0
			else:
				counter += 1

			if counter >= patience:
				print(f"Early stopping at epoch {epoch+1} with loss {loss.item():.4f}")
				break

			Pik -= alpha * (2 * error_matrix.matmul(Qkj) - beta * Pik)
			Qkj -= alpha * (2 * error_matrix.T.matmul(Pik) - beta * Qkj)
			if (epoch + 1) % 100 == 0:  # Print loss every 100 epochs
				print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

		return Pik.matmul(Qkj.T)


In [35]:
R = np.array([
     [4.5,2.3,0,1.2],
     [4.6,0,0,1.25],
     [1.0,1.2,0,5.6],
     [1.8,0,0,4.3],
     [0,1.4,5.32,4.3],
    ], dtype=np.float32)
K = 3

In [41]:
model = PQFactorization(R, latent_factors=K)
model.fit(epochs=10000, alpha=0.01, beta=0.02)

Early stopping at epoch 92 with loss 10.6888


tensor([[ 4.7594,  1.2884,  0.2733,  1.2455],
        [ 4.3465,  1.0454, -0.2805,  1.2030],
        [ 1.1452,  0.6191,  0.1547,  5.6327],
        [ 1.6536,  0.5889, -0.1621,  4.2799],
        [-0.0410,  1.5644,  5.2852,  4.2956]])

In [None]:
matrix = pd.read_parquet('matrix.parquet.gzip')
display(matrix)

In [None]:
R = matrix.to_numpy()
R