In [30]:
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np

from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from ast import literal_eval

In [31]:
def normalize(pred_ratings):
    '''
    This function will normalize the input pred_ratings
    
    params:
        pred_ratings (List -> List) : The prediction ratings 
    '''
    return (pred_ratings - pred_ratings.min()) / (pred_ratings.max() - pred_ratings.min())

In [32]:
def generate_prediction_df(mat, pt_df, n_factors):
    '''
    This function will calculate the single value decomposition of the input matrix
    given n_factors. It will then generate and normalize the user rating predictions.
    
    params:
        mat (CSR Matrix) : scipy csr matrix corresponding to the pivot table (pt_df)
        pt_df (DataFrame) : pandas dataframe which is a pivot table
        n_factors (Integer) : Number of singular values and vectors to compute. 
                              Must be 1 <= n_factors < min(mat.shape). 
    '''
    
    if not 1 <= n_factors < min(mat.shape):
        raise ValueError("Must be 1 <= n_factors < min(mat.shape)")
        
    # matrix factorization
    u, s, v = svds(mat, k = n_factors)
    s = np.diag(s)

    # calculate pred ratings
    pred_ratings = np.dot(np.dot(u, s), v) 
    pred_ratings = normalize(pred_ratings)
    
    # convert to df
    pred_df = pd.DataFrame(
        pred_ratings,
        columns = pt_df.columns,
        index = list(pt_df.index)
    ).transpose()
    return pred_df

In [33]:
def recommend_items(pred_df, usr_id, n_recs):
    '''
    Given a usr_id and pred_df this function will recommend
    items to the user.
    
    params:
        pred_df (DataFrame) : generated from `generate_prediction_df` function
        usr_id (Integer) : The user you wish to get item recommendations for
        n_recs (Integer) : The number of recommendations you want for this user
    '''
    
    usr_pred = pred_df[usr_id].sort_values(ascending = False).reset_index().rename(columns = {usr_id : 'sim'})
    rec_df = usr_pred.sort_values(by = 'sim', ascending = False).head(n_recs)
    return rec_df

In [38]:
if __name__ == '__main__':
    # constants
    #PATH = '/home/Machine Learning/projects/developia-movie-recommendation/dataset/movie_dataset.csv'
    PATH = '/home/Machine Learning/projects/developia-movie-recommendation/dataset/data-book.csv'
    # import data
    df = pd.read_csv(PATH)
    print(df.shape)
    #df.insert(1, 'user_id', np.random.randint(1, 3000, df.shape[0]))
    print(df.shape)

    # generate a pivot table with readers on the index and books on the column and values being the ratings
    pt_df = df.pivot_table(
        columns = 'book_id',
        index = 'reader_id',
        values = 'book_rating'
    ).fillna(0)

    # convert to a csr matrix
    mat = pt_df.values
    mat = csr_matrix(mat)
    
    pred_df = generate_prediction_df(mat, pt_df, 500)


    # generate recommendations
    print(recommend_items(pred_df, 5, 5))

(100000, 10)
(100000, 10)
   book_id       sim
0     2994  0.190227
1     1796  0.115211
2      120  0.106719
3      313  0.106164
4     1234  0.103631


In [35]:
pred_df.head()

Unnamed: 0_level_0,1,3,4,5,6,7,8,9,10,11,...,29990,29991,29992,29993,29994,29995,29996,29997,29998,29999
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.118258,0.12971,0.13108,0.130729,0.130594,0.129655,0.132052,0.13046,0.130621,0.130702,...,0.130954,0.131655,0.130717,0.129519,0.128122,0.128228,0.132394,0.143184,0.130736,0.131624
2,0.128676,0.131649,0.132473,0.130864,0.130831,0.130756,0.131007,0.131097,0.130927,0.13173,...,0.130791,0.130837,0.132091,0.131032,0.13206,0.131683,0.131256,0.133047,0.130796,0.130953
3,0.128903,0.131077,0.13269,0.131301,0.13074,0.130744,0.130449,0.130451,0.13086,0.130945,...,0.131221,0.131075,0.130473,0.131168,0.129487,0.131385,0.131502,0.134274,0.130842,0.131259
4,0.131477,0.131017,0.13171,0.130821,0.130761,0.130751,0.131474,0.13055,0.13066,0.131252,...,0.131195,0.130916,0.131264,0.130988,0.133237,0.130491,0.131401,0.132741,0.130741,0.131528
5,0.136733,0.130799,0.132886,0.131013,0.130842,0.131584,0.132057,0.130393,0.131004,0.132252,...,0.13191,0.130814,0.131181,0.131571,0.128638,0.132009,0.13164,0.129895,0.130951,0.130843
