In [1]:
import numpy as np
import pandas as pd

In [7]:
PLATES = ['Beer', 'Whiskey', 'Vodka', 'Tequila', 'Gin' ]

user_likes = np.array([
    [5,3,4,4,0],
    [3,1,2,3,3],
    [4,3,4,3,5],
    [3,3,1,5,4],
    [1,5,5,2,1],
]) 

In [8]:
user_likes.shape

(5, 5)

In [9]:
FEATURES = 15

def build_initial_factors(m):
    rows, columns = m.shape
    f1 = np.random.rand(rows, FEATURES)
    f2 = np.random.rand(columns, FEATURES)
    return f1,f2

def factorize(m, f1, f2, steps = 10000, alpha=0.02, beta=0.02, error_trigger=0.001):
    f2 = f2.T
    rows, columns = m.shape 
    last_error = 0
    for step in range(steps):
        for i in range(rows):
            for j in range(columns):
                if m[i][j] > 0:
                    eij = m[i][j] - np.dot(f1[i,:],f2[:,j])
                    for k in range(FEATURES):
                        f1[i][k] = f1[i][k] + alpha * (2 * eij * f2[k][j] - beta * f1[i][k])
                        f2[k][j] = f2[k][j] + alpha * (2 * eij * f1[i][k] - beta * f2[k][j])
        e = 0
        for i in range(rows):
            for j in range(columns):
                if m[i][j] > 0:
                    e = e + pow(m[i][j] - np.dot(f1[i,:],f2[:,j]), 2)
                    for k in range(FEATURES):
                        e = e + (beta/2) * (pow(f1[i][k],2) + pow(f2[k][j],2))
        last_error = e
        if e < error_trigger:
            print(f'>> Achieved an error less than {error_trigger} in {step} calculation steps')
            break
    print(f'FINISH: Error = {last_error}')
    return f1, f2.T 
    

def extrapolate(m):
    f1,f2 = build_initial_factors(m)
    f1n, f2n = factorize(m, f1, f2)
    return np.round(np.matmul(f1n, f2n.T), 2)

In [10]:
extrapolated_user_likes = extrapolate(user_likes)

FINISH: Error = 2.4361295150958835


In [11]:
likes_df = pd.DataFrame(user_likes, columns = PLATES)
extrapolated_likes_df = pd.DataFrame(extrapolated_user_likes, columns = PLATES)

In [12]:
likes_df

Unnamed: 0,Beer,Whiskey,Vodka,Tequila,Gin
0,5,3,4,4,0
1,3,1,2,3,3
2,4,3,4,3,5
3,3,3,1,5,4
4,1,5,5,2,1


In [13]:
extrapolated_likes_df

Unnamed: 0,Beer,Whiskey,Vodka,Tequila,Gin
0,4.96,2.99,3.99,4.0,5.35
1,2.99,1.03,1.98,2.97,3.0
2,4.0,3.0,3.98,3.01,4.97
3,3.0,2.98,1.02,4.97,3.99
4,1.01,4.98,4.97,1.99,1.01
