In [1]:
from sqlalchemy import create_engine
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from string import punctuation

import pandas as pd
import numpy as np
import json

In [2]:
class RecommenderSystem:
    def __init__(self, data, content_col='description'):
        self.engine = create_engine("mysql+pymysql://username:password@localhost/mp_recomsys")
        self.dbCon = self.engine.connect()
        self.df = pd.read_sql(data, self.dbCon)
        
        self.content_col = content_col
        self.encoder = None
        self.bank = None
                
    def fit(self):
        sw_indo = stopwords.words("indonesian")
        text = self.df[self.content_col]
        text = [''.join(c for c in s if c not in punctuation) for s in text]
        self.encoder = TfidfVectorizer(ngram_range=(1, 3), tokenizer=word_tokenize, stop_words=sw_indo)
        self.bank = self.encoder.fit_transform(text)
    
    def recommend_cb(self, idx, top=3):
        df = self.df.copy()
        df = df.loc[df['keywords'] == df['keywords'][idx]]
        desc = df.loc[idx, self.content_col]
        desc = self.encoder.transform([desc])
        
        sim = cosine_similarity(desc, self.bank)
        rec_idx = sim.argsort()[0, -(top+1):-1]
        rec_idx = rec_idx[::-1]
        
        df = df.reset_index().set_index('index', drop=False)
        result = df.loc[rec_idx, "index":"price"]
        result = result.to_json(orient="records")
        result = json.loads(result)
        
        return result
        
    def recommend_demo(self, keyword=None, prices=None, topk=20):
        df = self.df.copy()
        df = self.demographic_filter(df, keyword=keyword, prices=prices)
        df = self.compute_mp_score(df)
        
        df = df.reset_index().set_index('index', drop=False)
        result = df.loc[:, "index":"created_at"]
        result = result.drop_duplicates(subset=['name'])
        result = result.sort_values(by=["review", "rate"], ascending=False)
        result = result.head(topk)
        result = result.to_json(orient="records")
        result = json.loads(result)
        
        return result
    
    @staticmethod
    def demographic_filter(df, keyword=None, prices=None):
        df = df.copy()
        
        if keyword is not None:
            df = df[(df.keywords == keyword)]
        if prices is not None:
            df = df[df.price.gt(prices)]
            
        return df
    
    @staticmethod
    def compute_mp_score(df, q=0.9):
        df = df.copy()

        m = df.review.quantile(q)
        C = (df.rate * df.review).sum() / df.review.sum()

        df = df[df.review >= m]
        df["score"] = df.apply(lambda x: (x.rate * x.review + C*m) / (x.review + m), axis=1)
        
        return df

In [3]:
recsys = RecommenderSystem(data="SELECT * FROM product")
recsys.fit()



# Run Demographic Filtering

In [4]:
demographic = recsys.recommend_demo(keyword="gitar akustik", topk=10)
# demographic.loc[:, "name":"rate"]

`Get index for recommend by id`

In [5]:
# choice = demographic['index'][45]
choice = demographic[0]['index']
choice

1601

# Run Content-based Filtering

In [6]:
cb = recsys.recommend_cb(choice)

In [7]:
cb

[{'index': 578,
  'id_product': 1578,
  'id_store': 1678,
  'url': 'https://shopee.co.id/Senar-Gitar-Akustik-String-Daddario-EXP-Coated-Phosphor-Bronze-EXP16-EXP26-EXP15-i.20427669.1050675966',
  'image': 'https://cf.shopee.co.id/file/20a83885c60230234f5484df83edc534',
  'name': 'Senar Gitar Akustik String Daddario EXP Coated Phosphor Bronze EXP16 EXP26 EXP15',
  'price': 22500},
 {'index': 2087,
  'id_product': 35104,
  'id_store': 36104,
  'url': 'https://www.tokopedia.com/storetan/senar-string-gitar-akustik-elixir-nanoweb-polyweb-bebas-0bc5?whid=0',
  'image': 'https://ecs7.tokopedia.net/img/cache/500-square/product-1/2020/7/18/15832445/15832445_6665b8c2-273c-4df7-bbb3-bd157d4386e7_1024_1024.webp',
  'name': 'Senar String Gitar Akustik Elixir Nanoweb Polyweb - BEBAS',
  'price': 50000},
 {'index': 2089,
  'id_product': 35106,
  'id_store': 36106,
  'url': 'https://www.tokopedia.com/storetan/string-senar-gitar-akustik-daddario-bebas-ce3d?whid=0',
  'image': 'https://ecs7.tokopedia.ne

In [13]:
cb

[{'id_product': 1578,
  'id_store': 1678,
  'url': 'https://shopee.co.id/Senar-Gitar-Akustik-String-Daddario-EXP-Coated-Phosphor-Bronze-EXP16-EXP26-EXP15-i.20427669.1050675966',
  'image': 'https://cf.shopee.co.id/file/20a83885c60230234f5484df83edc534',
  'name': 'Senar Gitar Akustik String Daddario EXP Coated Phosphor Bronze EXP16 EXP26 EXP15',
  'price': 22500},
 {'id_product': 35104,
  'id_store': 36104,
  'url': 'https://www.tokopedia.com/storetan/senar-string-gitar-akustik-elixir-nanoweb-polyweb-bebas-0bc5?whid=0',
  'image': 'https://ecs7.tokopedia.net/img/cache/500-square/product-1/2020/7/18/15832445/15832445_6665b8c2-273c-4df7-bbb3-bd157d4386e7_1024_1024.webp',
  'name': 'Senar String Gitar Akustik Elixir Nanoweb Polyweb - BEBAS',
  'price': 50000},
 {'id_product': 35106,
  'id_store': 36106,
  'url': 'https://www.tokopedia.com/storetan/string-senar-gitar-akustik-daddario-bebas-ce3d?whid=0',
  'image': 'https://ecs7.tokopedia.net/img/cache/500-square/product-1/2020/7/18/1583244

In [253]:
cb[0]['name']

'Senar Gitar Akustik String Daddario EXP Coated Phosphor Bronze EXP16 EXP26 EXP15'

In [254]:
cb[1]['name']

'Senar String Gitar Akustik Elixir Nanoweb Polyweb - BEBAS'

In [252]:
cb[2]['name']

'String Senar Gitar Akustik Daddario - BEBAS'