In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from formatting_data import get_data_frame, normalise
from numba import njit

In [5]:
df, game_df = get_data_frame()

In [31]:
class Collaborative_filtering:
    def __init__(self):
        self.alpha = 61
        print('Model created')

    @staticmethod
    @njit
    def normaliser(x, alpha):
        return 1 - (1 / (1 + (x / alpha)))

    def load_data(self, df=None, game_df=None):
        if not (df is None) and df.shape[1] == 3 and not (game_df is None) and game_df.shape[1] == 2:
            try:
                self.df = df
                self.game_df = game_df

                #creating utility matrix
                self.utility_matrix = df.pivot_table(index=df.columns[0], columns=df.columns[1], values=df.columns[2], fill_value=0)
                self.utility_matrix = self.utility_matrix.to_numpy(dtype=np.float16)

                #mean normalisation
                self.mean_vector = self.utility_matrix.mean(axis=1)
                self.utility_matrix = (self.utility_matrix.T - self.mean_vector).T

                self.utility_shape =self.utility_matrix.shape
                
                print('Data loaded sucessfully')
            except Exception as e:
                print(f'Error loading data.\n{e}')
        elif df is None:
            print('No dataframe is provided')
        else:
            print('Data not in the right format')

    def initialise(self, num_features=100):
        if not self.utility_matrix is None:
            self.user_matrix = np.random.uniform(-1, 1, (self.utility_shape[0], num_features))
            self.item_matrix = np.random.uniform(-1, 1, (self.utility_shape[1], num_features))

            self.user_matrix = np.float16(self.user_matrix)
            self.item_matrix = np.float16(self.item_matrix)

            self.user_sum = np.sum(self.user_matrix)
            self.item_sum = np.sum(self.item_matrix)
            print('Model\'s varaibles initialised')
        else:
            print('Data not loaded')

    def train(self, X_train, y_train):
        print('Training model')

    def predict(self, X):
        prediction = self.user_matrix @ self.item_matrix.T
        print('Predicted')

    def __cost(self, y, yhat, normalise_func, chunk_size=500, _lambda=1):
        cost = 0
        normaliser = self.normaliser
        alpha = self.alpha
        
        for i in range(0, y.shape[0], chunk_size):
            yhat_chunk = normaliser(yhat[i:i+chunk_size], alpha)
            cost += np.sum(
                    np.square(
                    np.subtract(y[i:i+chunk_size], yhat_chunk)
                )
            )
            
        cost += self.user_sum + self.item_sum #regularisation
        return cost
    
    def get_cost(self):
        yhat = self.user_matrix @ self.item_matrix.T
        return self.__cost(self.utility_matrix, yhat, normalise)

In [32]:
model = Collaborative_filtering()

Model created


In [33]:
model.load_data(df, game_df)

Data loaded sucessfully


In [34]:
model.initialise()

Model's varaibles initialised


In [35]:
model.get_cost()

NotImplementedError: float16

In [16]:
utility_matrix = df.pivot_table(index=df.columns[0], columns=df.columns[1], values=df.columns[2], fill_value=0)
utility_matrix = utility_matrix.to_numpy(dtype=np.float32)

In [17]:
mean_vector = utility_matrix.mean(axis=1)

In [18]:
utility_matrix = (utility_matrix.T - mean_vector).T

In [19]:
user_matrix = np.random.uniform(-1, 1, (utility_matrix.shape[0], 100))
item_matrix = np.random.uniform(-1, 1, (utility_matrix.shape[1], 100))
user_matrix = user_matrix.astype(np.float32)
item_matrix = item_matrix.astype(np.float32)

In [20]:
prediction = user_matrix @ item_matrix.T

In [21]:
normaliser = np.vectorize(normalise)
normaliser(prediction[0])

array([ 0.0423893 ,  0.03624141, -0.06758709, ..., -0.02281161,
        0.00157553, -0.03177214], dtype=float32)

In [22]:
chunk = 1000
cost = 0
_lambda = 0.1
for i in range(0, utility_matrix.shape[0], chunk):
    prediction_chunk = normalise(prediction[i:i+chunk])
    cost += np.sum(
            np.square(
            np.subtract(utility_matrix[i:i+chunk], prediction_chunk)))

In [23]:
cost

np.float32(4093832.8)

In [24]:
_lambda = 0.1
cost += np.sum(user_matrix) + np.sum(item_matrix)
cost

np.float32(4094378.8)

In [15]:
model.cost(utility_matrix, prediction, normalise, user_matri=user_matrix, item_matri=item_matrix)

np.float32(4093996.2)