In [1]:
import pandas as pd 
import numpy as np 
from numpy.linalg import norm
from scipy.spatial.distance import pdist,squareform

def cosine_similarities(a,b) : 
    norm_a = np.sqrt(np.dot(a,a))
    norm_b = np.sqrt(np.dot(b,b))
    cosine_similarities = np.dot(a,b) / np.dot(norm_a,norm_b)
    return cosine_similarities

c:\Users\Fakhri Robi Aulia\AppData\Local\Programs\Python\Python38\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\Fakhri Robi Aulia\AppData\Local\Programs\Python\Python38\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Users\Fakhri Robi Aulia\AppData\Local\Programs\Python\Python38\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
c:\Users\Fakhri Robi Aulia\AppData\Local\Programs\Python\Python38\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


## Implementation of SLIM from Scratch

In [62]:
## implementation of SLIM 
from sklearn.linear_model import SGDRegressor
class SLIM : 
    """Implementation of SLIM User Based
    
    """
    def __init__(self,l1_reg=0.001, l2_reg=0.0001,top_k = 5,recommend_only_missing=True) -> None:
        self.alpha = l1_reg + l2_reg
        self.l1_ratio = l1_reg / self.alpha
        self.max_rating = 5
        self.top_k = 5
        self.recommend_only_missing=recommend_only_missing
    def impute_missing_values(self,user_rating_matrix) : 
        """
        Missing Values Imputation of User Rating Matrix using Mean of average item ratings ( Based on Charu C Aggarwal on Dimensionality Reduction Neighbourhood)
        However According to Original Paper it said that the missing values treated as true Negative or 0
        
        Args:
            user_rating_matrix (_type_): _description_

        Returns:
            _type_: _description_
        """
        #save missing location for prediction purpose 
        self.missing_location = np.argwhere(np.isnan(user_rating_matrix))
        for col in range(user_rating_matrix.shape[0]) : 
            mean = np.nanmean(user_rating_matrix[col])
            user_rating_matrix[col][np.isnan(user_rating_matrix[col])] = mean


        return user_rating_matrix
    def clip_rating(self,rating)->int : 
        """
        Clip predicted rating when predicting according to max value of raring

        Args:
            rating (int): Predicted Rating

        Returns:
            int: clipped rating
        """
        if rating > self.max_rating : 
            return self.max_rating 
        elif rating < 0  : 
            return 0 
        elif (rating <= self.max_rating) & (rating>=0) : 
            return rating
        
    def fit(self,user_rating_matrix) : 
        """Implementing Eq (1) of SLIM Paper Miminizing Least Squares Error of Ratings and 
        Predicted Ratings Also Implement Elastic Net Regularization 

        Args:
            user_rating_matrix (np.array type): User Rating Matrix that has shape m(user)xn(items)
        """
        self.m,self.n = user_rating_matrix.shape
        self.W =  np.zeros(shape=(self.n,self.n))
        
        #prepare data 
        self.user_rating_matrix = self.impute_missing_values(user_rating_matrix)
        
        for item_index in range(self.n) : 
            # 
            model = SGDRegressor(
            penalty='elasticnet',
            fit_intercept=False,
            alpha=self.alpha,
            l1_ratio=self.l1_ratio)
            
            #prepare data for training 
            #isolate item_index array make it as target 
            training_rating = self.user_rating_matrix[:,item_index].copy()

            #isolate item_idx as it becomes target 
            self.user_rating_matrix[:,item_index]=0 

            #equivalent of model.fit(X,y)
            model.fit(self.user_rating_matrix,training_rating.ravel())
            self.user_rating_matrix[:,item_index] = training_rating

            #get model coef  
            coef_ = model.coef_ 
            #SLIM restriction 
            coef_[coef_< 0] = 0

            #replace weight 
            for el in coef_.nonzero()[0]:
                self.W[(el, item_index)] = coef_[el]
                
                
    def predict_rating(self,user_idx,item_idx) : 
        """Implementation of Prediction Function Based of Charu C. Aggarwal Recommendation System TextBook Eq. (2.33)

        Args:
            user_idx (_type_): user_index to predict rating
            item_idx (_type_): item which rating to be predicted 
        """
        #get weight for item_idx 
        weight = self.W[item_idx]
        #get_each available rating based on user_idx
        rating = self.user_rating_matrix[user_idx]
        
        #dot product of weight of rating from all item from user = user_idx
        predicted_rating = self.clip_rating(np.dot(weight,rating.T))
        return predicted_rating
    

        
        
    def get_recommendation(self,user_idx,recommend_only_missing=True,top_k=5) : 
        """Recommend best top K item for user=user_idx
            Approach -> repredict missing rating only -> sort best on the highest-k (could be set)
        Args:
            user_idx (_type_): user_idx to recommend

        Returns:
            _type_: _description_
        """
        #predict only missing value of the data 
        #finding missing_value on spesific user_idx
        missing_item_idx = []
        #missing location has shape of mxn (similar of user rating matrix) m -> user_idx and n-> item_index
        for missing_loc in self.missing_location : 
            if missing_loc[0]==user_idx : 
                missing_item_idx.append(missing_loc[1])
            else : 
                continue 
        #call user_rating_matrix = 
        user_idx_rating_matrix = self.user_rating_matrix[user_idx]
        
        #refill again the missing ones 
        for idx in missing_item_idx : 
            user_idx_rating_matrix[idx]= self.predict_rating(user_idx=user_idx,item_idx=idx)
        #with assumption that rated item will not be recommended again 
        
        recommendation  = {}
        if recommend_only_missing : 
            missing_ratings = user_idx_rating_matrix[missing_item_idx]
            rank = np.argsort(missing_ratings)[::-1][:top_k]
            sorted_ratings = missing_ratings[rank]
        
        
            for x,y in zip(rank,sorted_ratings) : 
                recommendation[f'Item ID : {x} ']= y
        else : 
            rank = np.argsort(user_idx_rating_matrix)[::-1][:top_k]
            sorted_ratings = user_idx_rating_matrix[rank]
            for x,y in zip(rank,sorted_ratings) : 
                recommendation[f'Item ID : {x} ']= y
                
        return recommendation
    

In [63]:
example_URM = np.array([
                [1,np.nan,3,np.nan,np.nan,5,np.nan,np.nan,5,np.nan,4,np.nan],
                [np.nan,np.nan,5,4,np.nan,np.nan,4,np.nan,np.nan,2,1,3],
                [2,4,np.nan,1,2,np.nan,3,np.nan,4,3,5,np.nan],
                [np.nan,2,4,np.nan,5,np.nan,np.nan,4,np.nan,np.nan,2,np.nan],
                [np.nan,np.nan,4,3,4,2,np.nan,np.nan,np.nan,np.nan,2,5],
                [1,np.nan,3,np.nan,3,np.nan,np.nan,2,np.nan,np.nan,4,np.nan],
    
]).T




model = SLIM()
model.fit(user_rating_matrix=example_URM)
model.predict_rating(user_idx=0,item_idx=0)





1.8060091435691996

In [64]:
## Predict Whole User Rating Matrix 
all_prediction_result = np.dot(example_URM,model.W)

from sklearn.metrics import mean_squared_error 

mse = mean_squared_error(example_URM.reshape(-1,1),all_prediction_result.reshape(-1,1))
print('MSE from SLIM Method, using Mean Imputation : ',mse)


MSE from SLIM Method, using Mean Imputation :  0.7190607334008564


In [77]:
# User 6 Recommendation 
rec_user_6 = model.get_recommendation(user_idx=5,recommend_only_missing=False,top_k=None)
print('Recommendation to User 6 ,',rec_user_6)
#measure local mse 
predicted_rating_user_6 = list(rec_user_6.values())

true_rating_user_6 = example_URM[5]
true_rating_user_6

mse_6 = mean_squared_error(true_rating_user_6,predicted_rating_user_6)
print('MSE Ratings user 6 :',mse_6)

Recommendation to User 6 , {'Item ID : 5 ': 5.0, 'Item ID : 0 ': 5.0, 'Item ID : 3 ': 3.7194378107598394, 'Item ID : 2 ': 3.1246500611676122, 'Item ID : 1 ': 2.638662788490432, 'Item ID : 4 ': 2.0}
MSE Ratings user 6 : 2.6152247529986883


In [79]:
# User 8 Recommendation 
rec_user_8 = model.get_recommendation(user_idx=7,recommend_only_missing=False,top_k=None)
print('Recommendation to User 8 ,',rec_user_8)
predicted_rating_user_8 = list(rec_user_8.values())

true_rating_user_8 = example_URM[7]
true_rating_user_8

mse_8 = mean_squared_error(true_rating_user_8,predicted_rating_user_8)
print('MSE Ratings user 8 : ',mse_8)

Recommendation to User 8 , {'Item ID : 3 ': 4.0, 'Item ID : 4 ': 3.3031562380182087, 'Item ID : 0 ': 2.9695801829213835, 'Item ID : 1 ': 2.678354344355851, 'Item ID : 5 ': 2.0, 'Item ID : 2 ': 1.8442035677024164}
MSE Ratings user 8 :  1.0313084799865957


In [None]:
## Which User is The Most Similar to User 4 
"""
In terms of Measuring Similarities, there are several way to do so 
1. Cosine Similarity 
2. Pearson Similarity 
3. Etc 
"""

# Cosine Approach 
# Create Similarity Matrix 
cosine_similarity_matrix = squareform(pdist(example_URM, metric='cosine'))
# pick user 4 
user_4_cosine_similarity = cosine_similarity_matrix[3]
# : 
print('5 most similar to user 4',np.argsort(user_4_cosine_similarity)[::-1][:5])
print('5 least similar to user 4',np.argsort(user_4_cosine_similarity)[:5])

5 most similar to user 4 [10  0  5  1  9]
5 least similar to user 4 [3 6 4 2 8]


In [None]:
## Pearson 
pearson_similarity_matrix = squareform(pdist(example_URM, metric='correlation'))
# pick user 4 
user_4_pearson_similarity = pearson_similarity_matrix[3]
# : 
print('5 most similar to user 4',np.argsort(user_4_pearson_similarity)[::-1][:5])
print('5 least similar to user 4',np.argsort(user_4_pearson_similarity)[:5])

5 most similar to user 4 [ 9 10  0  1 11]
5 least similar to user 4 [3 6 4 8 7]
