## Import Library

In [1]:
import pandas as pd
import numpy as np
from rsvd import RSVD
import pickle, gzip

## Import Books

In [2]:
books = pd.read_csv("./input/books.csv")
books['title']=books['title'].str.strip()
books

Unnamed: 0,id,entry_date,inventory_number,author,title,publisher,city,year,category_id,call_number_author,call_number_title,category_name,source,quantity,status,cover
0,1,19-4-2023,01/SMPN3 KDR/2023,RPUL BUANA RAYA,RANGKUMAN PENGETAHUAN UMUM,RPUL BUANA RAYA,JAKARTA,2014,000,BUA,RPA,KARYA UMUM,DAK203,1,BAIK,https://ik.imagekit.io/pohfq3xvx/d05a46ed-2921...
1,2,19-4-2023,02/SMPN3 KDR/2023,TIM CBM,MENJELAJAH SUNGAI NIL,NUANSA CITRA DRAFIKA,JAKARTA,2008,001,TIM,MEN,ILMU PENGETAHUAN UMUM,DAK203,1,BAIK,
2,3,19-4-2023,03/SMPN3 KDR/2023,HENDY HERMAWAN,GURU MENULIS,CITRA PRAYA,BANDUNG,2006,070,HEN,GUR,JURNALISM,DAK203,1,BAIK,https://ik.imagekit.io/pohfq3xvx/0b046a5a-2799...
3,4,19-4-2023,04-05/SMPN3 KDR/2023,SUHARNO WIDI NUGROHO ST,MENGENAL LEMBAR KERJA MS EKCEL,BSD MIPA,JAKARTA,2010,004,SUH,MEN,PENGOLAHAN DATA ILMU KOMPUTER,DAK203,2,BAIK,https://ik.imagekit.io/pohfq3xvx/0aa37d88-3140...
4,5,19-4-2023,06-07/SMPN3 KDR/2023,SANYO SAPUTRA,MENGGAMBAR DENGAN MICROSOFT PAINT,EXACT GANECA,JAKARTA,2007,004,SAN,MEN,PENGOLAHAN DATA ILMU KOMPUTER,DAK203,2,BAIK,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1314,1315,24-6-2023,2438-2440/SMPN3 KDR/2023,ALANNA FARDIANI,AYO MENGUKUR KELAJUAN,GRAHA ILMU MULIA,SURABAYA,2009,5169,ALA,AYO,ILMU UKUR DESKRIPTIF,DAK203,3,BAIK,
1315,1316,24-6-2023,2441-2442/SMPN3 KDR/2023,RUMIDA LUMBAN RAJA,BAWANG MERAH,PANCA ANUGRAH SAKTI,TANGERANG,2010,581,RUM,BAW,BOTANI,DAK203,2,BAIK,
1316,1317,24-6-2023,2443-2445/SMPN3 KDR/2023,ALANNA FARDIANI,AYO MENGUKUR BERAT,GRAHA ILMU MULIA,SURABAYA,2009,5169,ALA,AYO,ILMU UKUR DESKRIPTIF,DAK203,3,BAIK,
1317,1318,24-6-2023,2446-2448/SMPN3 KDR/2023,DIDIK DJUNAEDI,MENGENAL BANGUN DAN BELAJAR PECAHAN,GADING INTI PRIMA,JAKARTA,2010,513,DID,MEN,ILMU HITUNG,DAK203,3,BAIK,


## Import Model

In [3]:
with gzip.open("rsvd_model.pkl.gz", 'rb') as f:
    p = pickle.Unpickler(f)
    model = p.load()

## Get Recommendation

### Get Personal Recommendation

In [4]:
def get_user_recs(user_id, rated_book_ids, model=model) -> pd.DataFrame:
    """Returns the top 25 most rated books to a specified user 
    
    This function iterates over every possible book in dataset and find the rating
    estimation for the user.
    """
    rec_ids = []
    rec_titles = []
    rec_ests = []
    # Get user inner id
    user_row_idx = model.trainset.to_inner_uid(user_id)
    # Iterate over every possible book and find rating est 
    for index, book in books.iterrows():
        try:
            # Get book inner id
            book_row_idx = model.trainset.to_inner_iid(book["id"])
            est = model.estimate(user_row_idx, book_row_idx)
            rec_ids.append(book["id"])
            rec_titles.append(book["title"])
            rec_ests.append(est)
        except:
            continue
    
    recs = pd.DataFrame({ "id": rec_ids, "title": rec_titles, "est": rec_ests })
    recs = recs[~recs.id.isin(rated_book_ids)]
    return recs.sort_values(by="est", ascending=False).head(10)

In [12]:
get_user_recs("51e49f25-397d-43a1-a807-005933626d2e", rated_book_ids=[])

Unnamed: 0,id,title,est
528,571,DINOSAURUS,3.74979
14,15,ALAM SEMESTA,3.727477
160,173,SEJARAH HUKUM DAN KONSTITUSI DI INDONESIA,3.698503
567,614,MENGENAL ALAM SEMESTA,3.695523
549,594,KEMAMPUAN BERFIKIR KRITIS MATEMATIKA,3.679157
680,732,MODEL TEKNOLOGI SEDERHANA,3.677216
353,382,LANGKAH SUKSES BELAJAR BAHASA INGGRIS 2,3.661169
724,778,BEKERJA SEBAGAI PSIKOLOG,3.640712
349,378,LEST UNDERSTAND ENGLISH,3.623342
424,463,MEMAHAMI TEKNOLOGI POPULER,3.620828


### Get Non-Personal Recommendation

#### Get Book Latent Features

In [6]:
def get_vector(raw_id: int, model=model) -> np.array:
    """Returns the latent features of a book in the form of a numpy array"""
    book_row_idx = model.trainset._raw2inner_id_items[raw_id]
    return model.qi[book_row_idx]

#### Get Book Recommendation

In [7]:
def cosine_similarity(u, v):
    norm_u = np.linalg.norm(u)
    norm_v = np.linalg.norm(v)

    # Handle zero-vector cases to avoid division by zero
    if norm_u == 0 or norm_v == 0:
        return 0
    
    cosine_similarity = np.dot(u, v) / (norm_u * norm_v)
    return cosine_similarity

In [8]:
def get_book_recs(ref_book_title: str, model=model):
    try:
        """Returns the top 25 most similar books to a specified book
        
        This function iterates over every possible book in dataset and calculates
        distance between `book_title` vector and that book's vector.
        """
        # Get book id from data by book title
        ref_raw_id = books[books['title']==ref_book_title]["id"].item()

        # Get the first book vector
        book_vector: np.array = get_vector(ref_raw_id, model)
        similarity_table = []
        
        # Iterate over every possible book and calculate similarity
        for other_raw_id in model.trainset._raw2inner_id_items.keys():
            other_book_vector = get_vector(other_raw_id, model)
            
            # Get the second book vector, and calculate distance
            similarity_score = cosine_similarity(other_book_vector, book_vector)
            recommended_book = books[books['id']==other_raw_id]["title"].item()

            if ref_raw_id != other_raw_id:
                similarity_table.append((similarity_score, recommended_book, other_raw_id))

        recs = pd.DataFrame(sorted(similarity_table), columns=["vector cosine similarity", "title", "id"])
        # sort books by descending similarity
        return recs.tail(25)[::-1]
    # Exception for if there isnt enough info about the book
    except:
        print("Not enough info about book")

In [9]:
get_book_recs("ASYIK BELAJAR MATEMATIKA")

Unnamed: 0,vector cosine similarity,title,id
884,0.624564,KAMUS ISTILAH BERGAMBAR MATEMATIKA,589
883,0.577345,FISIKA CERIA SERI MEKANIKA,557
882,0.543823,SAINS UNTUK PEMULA 5 MARI BERMAIN BUNYI,133
881,0.515059,MENGENAL BILANGAN,468
880,0.490345,FISIKA CERIA SERI KEMAGNETAN,647
879,0.488498,BEKERJA SEBAGAI PSIKOLOG,778
878,0.471385,FISIKA CERIA SERI OPTIK GEOMETRI,648
877,0.462928,MENGENAL PENGUKURAN,465
876,0.443325,KEBO TARUNANTAKA,50
875,0.442426,KEHIDUPAN MOLUSKA,514


In [10]:
get_book_recs("ENGLISH EVERYWHERE")

Unnamed: 0,vector cosine similarity,title,id
884,0.744844,LEST UNDERSTAND ENGLISH,378
883,0.704884,BELAJAR BAHASA INGGRIS MELALUI PERMAINAN,385
882,0.627669,DON’T JUDGE A MAN BY HIS FACE,1075
881,0.614066,BERBAHASA INGGRIS YANG BAIK DAN BENAR,424
880,0.608698,ENGLISH COMPETENCY READING COMREHENSION,386
879,0.563764,LANGKAH SUKSES BELAJAR BAHASA INGGRIS 2,382
878,0.557111,SINONIM DAN ANTONIM DALAM BAHASA INGGRIS,380
877,0.533217,PANDUAN PRAKTIS BERBUSANA,855
876,0.510082,ENGLISH IS FUN,430
875,0.490782,KAMUS IDEAL,413


In [11]:
get_book_recs("LEGENDA DAN DONGENG NUSANTARA MALINKUNDANG")

Unnamed: 0,vector cosine similarity,title,id
884,0.642055,LEGENDA TIMUN MAS,315
883,0.600812,LEGENDA DONGENG NUSANTARA TELAGA BIRU,305
882,0.588145,LEGENDA DAN DONGENG NUSANTARA SULTAN DOMAS,218
881,0.536389,SERI DONGENG BINATANG KELELAWAR TAK BERPENDIRIAN,290
880,0.500542,SATUA BALI I BELOG,312
879,0.500161,LEGENDA DAN DONGENG NUSANTARA BATU MENANGIS,260
878,0.491997,ASAL MULA CANDI BOROBUDUR,219
877,0.483825,KECIL KECIL BERANI BERKIRIM SURAT,415
876,0.464705,KISAH LEGENDA MALIN KUNDANG SI ANAK DURHAKA,179
875,0.450841,DONGENG PUTRI CINDERLELLA,147
