### IMPORT LIBRARIES & DATA

In [1]:
import pandas as pd
import mysql.connector
import sqlalchemy as sa
import getpass
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def getconn():
    conn = mysql.connector.connect(
        host="localhost",
        user="root",
        password=getpass.getpass(),
        database="goodreads"
    )
    return conn

pool = sa.create_engine(
    "mysql+mysqlconnector://",
    creator=getconn,
)

with pool.connect() as db_conn:
    results = db_conn.execute(sa.text("SELECT NOW()")).fetchone()
    print("Current time: ", results[0])

Current time:  2023-05-10 15:19:12


In [4]:
books_query = sa.text(
    "SELECT * FROM books;"
)
books_df = pd.read_sql_query(books_query,con=pool.connect())

In [5]:
book_tags_query = sa.text(
    "SELECT * FROM new_book_tags;"
)
book_tags = pd.read_sql_query(book_tags_query, con=pool.connect())

### MODEL

In [38]:
class Tagbased_recommender:
    def __init__(self):
        self.pivot_table=None
        self.matrix=None
        self.indices=None
        self.cosine_sim=None
    
    def fit(self, data):
        self.pivot_table = pd.pivot_table(data, index='goodreads_book_id', columns='tag_name', aggfunc=len, fill_value=0)
        self.matrix = self.pivot_table.values
        indices_values = [num for num in list(range(len(self.pivot_table)))]
        self.indices = pd.Series(indices_values, index=self.pivot_table.index.astype(int))
        self.cosine_sim = cosine_similarity(self.matrix)

    def get_recommendations(self, id, num_recommends = 5):
        idx = self.indices[int(id)]
        sim_scores = list(enumerate(self.cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:num_recommends+1]
        book_indices = [i[0] for i in sim_scores]
        return book_indices

In [39]:
recommender = Tagbased_recommender()
recommender.fit(book_tags)

In [41]:
books_df.iloc[recommender.get_recommendations('3')]

Unnamed: 0,goodreads_book_id,work_id,books_count,isbn,isbn13,original_publication_year,original_title,title,language_code,average_rating,ratings_count,description
2162,13143,2471943,82,446608815,9780446608820.0,1999.0,Pop Goes the Weasel,"Pop Goes the Weasel (Alex Cross, #5)",eng,3.98,46820,Detective Alex Cross is back-and he's in love....
6739,1563413,1555975,2,307020916,9780307020920.0,1973.0,The Three Little Pigs (A Little Golden Book),The Three Little Pigs,eng,4.26,14779,Relates the adventures of three little pigs wh...
1608,14817,1527439,97,057507681X,9780575076820.0,1977.0,A Scanner Darkly,A Scanner Darkly,eng,4.04,54014,Substance D is not known as Death for nothing....
7280,790171,1127127,140,671726528,9780671726520.0,1948.0,Kon-Tiki Ekspedisjonen,Kon-Tiki: Across The Pacific In A Raft,eng,4.1,13541,Kon-Tiki is the record of an astonishing adven...
0,2767052,2792775,272,439023483,9780439023480.0,2008.0,The Hunger Games,"The Hunger Games (The Hunger Games, #1)",eng,4.34,4780653,"Could you survive on your own in the wild, wit..."


In [42]:
import pickle

with open('tagbased_recommender.pkl', 'wb') as f:
    pickle.dump(recommender, f)

In [43]:
with open('tagbased_recommender.pkl', 'rb') as f:
    model = pickle.load(f)

model.get_recommendations('3', 10)

[2162, 6739, 1608, 7280, 0, 4343, 4899, 3634, 247, 815]