### Popularity Recommender (Best Seller)
Recommend product based on purchase counts

In [1]:
import pandas as pd

class PopularityRecommender:
    MODEL_NAME = 'Popularity'
    
    def __init__(self,data):
        self.df=pd.read_csv(data)
    
    def recommend_items(self, items_to_ignore=[], topk=5):
        df = self.df.copy()
        product_df = self.product_dataframe(df)
        
        recommendations_df = product_df[~product_df['ORDER_ITEM_ID'].isin(items_to_ignore)] \
                               .sort_values('ORDER_ITEM_QUANTITY', ascending = False) \
                               .head(topk)
           
        return recommendations_df
    

    @staticmethod
    def product_dataframe(df):
        df_extract = df.drop_duplicates().reset_index(drop=True)
        df_extract['PRODUCT_COLOR'] = df_extract['PRODUCT_COLOR'].replace(['blue'],'Blue')
        purchase = df_extract.groupby('ORDER_ITEM_ID').agg({'ORDER_ITEM_QUANTITY':sum}).sort_values(by='ORDER_ITEM_QUANTITY',\
                                                                                                    ascending=False).reset_index()
        
        return purchase
        

In [2]:
popularity_model = PopularityRecommender(data = 'extractCSV_latest.csv') #data should be product list
popularity_model.recommend_items()

Unnamed: 0,ORDER_ITEM_ID,ORDER_ITEM_QUANTITY
0,301,319
1,289,312
2,280,302
3,277,300
4,294,292


In [3]:
popularity_model.recommend_items()['ORDER_ITEM_ID'].tolist()

[301, 289, 280, 277, 294]

### Product Features

In [10]:
df_extract = pd.read_csv('extractCSV_latest.csv')
df_extract = df_extract.drop_duplicates().reset_index(drop=True)
df_extract['PRODUCT_COLOR'] = df_extract['PRODUCT_COLOR'].replace(['blue'],'Blue')
df_products = df_extract[['ORDER_ITEM_ID','PRODUCT_CATEGORY','PRODUCT_SUBCATEGORY','PRODUCT_NAME','PRODUCT_SUMMARY','PRODUCT_PRICE','PRODUCT_TARGETED_GENDER','PRODUCT_TARGETED_AGE','PRODUCT_COLOR']].copy().drop_duplicates()
df_products = df_products.reset_index(drop=True)

### Purchase history

In [11]:
df_user_purchase = df_extract[['PROFILE_ID','ORDER_ITEM_ID','ORDER_ITEM_QUANTITY']].copy()
df_user_purchase = df_user_purchase.groupby(['PROFILE_ID','ORDER_ITEM_ID'])['ORDER_ITEM_QUANTITY'].sum().reset_index()

### Content based (Recommend Products based on product features)

In [12]:
import pandas as pd
import numpy as np
import scipy
import sklearn
from nltk.corpus import stopwords
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [13]:
#Ignoring stopwords (words with no semantics) from English 
stopwords_list = stopwords.words('english') 

#Trains a model whose vectors size is 500, composed by the main unigrams ,  bigrams and trigrams found in the corpus, ignoring stopwords
vectorizer = TfidfVectorizer(analyzer='word',
                     ngram_range=(1, 3),
                     min_df=0.003,
                     max_df=0.5,
                     max_features=500,
                     stop_words=stopwords_list)

item_ids = df_products['ORDER_ITEM_ID'].tolist()
tfidf_matrix = vectorizer.fit_transform(df_products['PRODUCT_CATEGORY']+""+df_products['PRODUCT_SUBCATEGORY']+""+df_products['PRODUCT_NAME']+""+df_products['PRODUCT_SUMMARY']+""+df_products['PRODUCT_COLOR'])
tfidf_feature_names = vectorizer.get_feature_names()
tfidf_matrix

<398x286 sparse matrix of type '<class 'numpy.float64'>'
	with 2227 stored elements in Compressed Sparse Row format>

In [14]:
def get_item_profile(item_id):
    idx = item_ids.index(item_id)
    item_profile = tfidf_matrix[idx:idx+1]
    return item_profile

def get_item_profiles(ids):
    item_profiles_list = [get_item_profile(x) for x in ids]
    item_profiles = scipy.sparse.vstack(item_profiles_list)
    return item_profiles

def build_users_profile(person_id, interactions_indexed_df):
    interactions_person_df = interactions_indexed_df.loc[person_id]
    user_item_profiles = get_item_profiles(interactions_person_df['ORDER_ITEM_ID'])
    
    user_item_strengths = np.array(interactions_person_df['ORDER_ITEM_QUANTITY']).reshape(-1,1)
    #Weighted average of item profiles by the interactions strength
    user_item_strengths_weighted_avg = np.sum(user_item_profiles.multiply(user_item_strengths), axis=0) / np.sum(user_item_strengths)
    user_profile_norm = sklearn.preprocessing.normalize(user_item_strengths_weighted_avg)
    return user_profile_norm

def build_users_profiles(): 
    interactions_indexed_df = df_user_purchase[df_user_purchase['ORDER_ITEM_ID'] \
                                                   .isin(df_products['ORDER_ITEM_ID'])].set_index('PROFILE_ID')
    user_profiles = {}
    for person_id in interactions_indexed_df.index.unique():
        user_profiles[person_id] = build_users_profile(person_id, interactions_indexed_df)
    return user_profiles

In [15]:
user_profiles = build_users_profiles()

In [16]:
class ContentBasedRecommender:
    
    MODEL_NAME = 'Content-Based'
    
    def __init__(self, items_df=None):
        self.item_ids = item_ids
        self.items_df = items_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def _get_similar_items_to_user_profile(self, person_id, topn=1000):
        #Computes the cosine similarity between the user profile and all item profiles
        cosine_similarities = cosine_similarity(user_profiles[person_id], tfidf_matrix)
        #Gets the top similar items
        similar_indices = cosine_similarities.argsort().flatten()[-topn:]
        #Sort the similar items by similarity
        similar_items = sorted([(item_ids[i], cosine_similarities[0,i]) for i in similar_indices], key=lambda x: -x[1])
        return similar_items
        
    def recommend_items(self, user_id=None, items_to_ignore=[], topn=10, verbose=False):
        similar_items = self._get_similar_items_to_user_profile(user_id)
        #Ignores items the user has already interacted
        similar_items_filtered = list(filter(lambda x: x[0] not in items_to_ignore, similar_items))
        
        recommendations_df = pd.DataFrame(similar_items_filtered, columns=['ORDER_ITEM_ID', 'recStrength']) \
                                    .head(topn)

        if verbose:
            if self.items_df is None:
                raise Exception('"items_df" is required in verbose mode')

            recommendations_df = recommendations_df.merge(self.items_df, how = 'left', 
                                                          left_on = 'ORDER_ITEM_ID', 
                                                          right_on = 'ORDER_ITEM_ID')[['recStrength', 'ORDER_ITEM_ID',  'PRODUCT_CATEGORY','PRODUCT_SUBCATEGORY','PRODUCT_NAME','PRODUCT_SUMMARY','PRODUCT_PRICE','PRODUCT_TARGETED_GENDER','PRODUCT_TARGETED_AGE','PRODUCT_COLOR']]


        return recommendations_df
    
content_based_recommender_model = ContentBasedRecommender(df_products)

In [17]:
user_id = 2
item_purchased_by_user = df_extract[df_extract['PROFILE_ID']==user_id].ORDER_ITEM_ID.unique().tolist()
content_based_recommender_model.recommend_items(user_id,item_purchased_by_user)

Unnamed: 0,ORDER_ITEM_ID,recStrength
0,243,0.684709
1,171,0.684709
2,302,0.684709
3,299,0.684709
4,177,0.684709
5,234,0.684709
6,172,0.684709
7,238,0.684709
8,169,0.564715
9,168,0.534402


In [18]:
content_based_recommender_model.recommend_items(user_id,item_purchased_by_user).ORDER_ITEM_ID.values.tolist()

[243, 171, 302, 299, 177, 234, 172, 238, 169, 168]

In [19]:
content_based_recommender_model.recommend_items(user_id,item_purchased_by_user,verbose=True)

Unnamed: 0,recStrength,ORDER_ITEM_ID,PRODUCT_CATEGORY,PRODUCT_SUBCATEGORY,PRODUCT_NAME,PRODUCT_SUMMARY,PRODUCT_PRICE,PRODUCT_TARGETED_GENDER,PRODUCT_TARGETED_AGE,PRODUCT_COLOR
0,0.684709,243,Mens Clothing,Shirt,Mikayla Deckow Shirt,Mikayla Deckow Casual Shirt,200.0,0,2,White
1,0.684709,171,Mens Clothing,Shirt,Lucious Feest Shirt,Lucious Feest Casual Shirt,750.0,0,1,White
2,0.684709,302,Mens Clothing,Shirt,Maryse Wiza Shirt,Maryse Wiza Casual Shirt,850.0,0,3,White
3,0.684709,299,Mens Clothing,Shirt,Kennith Haley Shirt,Kennith Haley Casual Shirt,700.0,0,3,White
4,0.684709,177,Mens Clothing,Shirt,Ona Hoppe Shirt,Ona Hoppe Casual Shirt,150.0,0,1,White
5,0.684709,234,Mens Clothing,Shirt,Carmela Schamberger Shirt,Carmela Schamberger Casual Shirt,650.0,0,2,White
6,0.684709,172,Mens Clothing,Shirt,Desiree Emmerich Shirt,Desiree Emmerich Casual Shirt,800.0,0,1,White
7,0.684709,238,Mens Clothing,Shirt,Celestine Lakin Shirt,Celestine Lakin Casual Shirt,850.0,0,2,White
8,0.564715,169,Mens Clothing,Shirt,Gilbert Monahan Shirt,Gilbert Monahan Casual Shirt,650.0,0,1,White
9,0.534402,168,Mens Clothing,Shirt,Winston Pacocha Shirt,Winston Pacocha formal Shirt,600.0,0,1,White


In [20]:
user_id = 100
item_purchased_by_user = df_extract[df_extract['PROFILE_ID']==user_id].ORDER_ITEM_ID.unique().tolist()
content_based_recommender_model.recommend_items(user_id,item_purchased_by_user,verbose=True)

Unnamed: 0,recStrength,ORDER_ITEM_ID,PRODUCT_CATEGORY,PRODUCT_SUBCATEGORY,PRODUCT_NAME,PRODUCT_SUMMARY,PRODUCT_PRICE,PRODUCT_TARGETED_GENDER,PRODUCT_TARGETED_AGE,PRODUCT_COLOR
0,0.784225,186,Womens Clothing,Shirt / T-Shirt,Gwen Rau Shirt,Gwen Rau Casual Shirt,450.0,1,1,Red
1,0.784225,184,Womens Clothing,Shirt / T-Shirt,Carroll Sipes Shirt,Carroll Sipes Casual Shirt,350.0,1,1,Red
2,0.784225,247,Womens Clothing,Shirt / T-Shirt,Alisa Gutmann Shirt,Alisa Gutmann Casual Shirt,250.0,1,2,Red
3,0.784225,245,Womens Clothing,Shirt / T-Shirt,Caesar OConnell Shirt,Caesar OConnell Casual Shirt,150.0,1,2,Red
4,0.784225,253,Womens Clothing,Shirt / T-Shirt,Abdiel Ebert Shirt,Abdiel Ebert Casual Shirt,550.0,1,2,Red
5,0.784225,376,Womens Clothing,Shirt / T-Shirt,Grayce Gleason Shirt,Grayce Gleason Casual Shirt,250.0,1,4,Red
6,0.784225,189,Womens Clothing,Shirt / T-Shirt,Dario Nienow Shirt,Dario Nienow Casual Shirt,600.0,1,1,Red
7,0.784225,181,Womens Clothing,Shirt / T-Shirt,Jordy Gaylord Shirt,Jordy Gaylord Casual Shirt,200.0,1,1,Red
8,0.705485,272,Womens Clothing,Shirt / T-Shirt,Ambrose Kreiger Shirt,Ambrose Kreiger Casual Shirt,900.0,1,2,White
9,0.705485,204,Womens Clothing,Shirt / T-Shirt,Tyrell Watsica Shirt,Tyrell Watsica Casual Shirt,750.0,1,1,White


### Collabarative Filtering (user based)

#### pre datasetup

In [21]:
#Creating a sparse pivot table with users in rows and items in columns
users_items_pivot_matrix_df = df_user_purchase.pivot_table(index='PROFILE_ID', 
                                                          columns='ORDER_ITEM_ID', 
                                                          values='ORDER_ITEM_QUANTITY').fillna(0)
users_items_pivot_matrix = users_items_pivot_matrix_df.as_matrix()
users_ids = list(users_items_pivot_matrix_df.index)
users_items_pivot_sparse_matrix = csr_matrix(users_items_pivot_matrix)

  """


In [22]:
from scipy.sparse.linalg import svds

NUMBER_OF_FACTORS_MF = 20
#Performs matrix factorization of the original user item matrix
U, sigma, Vt = svds(users_items_pivot_sparse_matrix, k = NUMBER_OF_FACTORS_MF)
sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 
all_user_predicted_ratings_norm = (all_user_predicted_ratings - all_user_predicted_ratings.min()) / (all_user_predicted_ratings.max() - all_user_predicted_ratings.min())

#Converting the reconstructed matrix back to a Pandas dataframe
cf_preds_df = pd.DataFrame(all_user_predicted_ratings_norm, columns = users_items_pivot_matrix_df.columns, index=users_ids).transpose()

In [23]:
class CFRecommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, cf_predictions_df, items_df=None):
        self.cf_predictions_df = cf_predictions_df
        self.items_df = items_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=5, verbose=False):
        # Get and sort the user's predictions
        sorted_user_predictions = self.cf_predictions_df[user_id].sort_values(ascending=False) \
                                    .reset_index().rename(columns={user_id: 'recStrength'})

        # Recommend the popular movies the user hasn't seen yet.
        recommendations_df = sorted_user_predictions[~sorted_user_predictions['ORDER_ITEM_ID'].isin(items_to_ignore)] \
                               .sort_values('recStrength', ascending = False) \
                               .head(topn)

        if verbose:
            if self.items_df is None:
                raise Exception('"items_df" is required in verbose mode')

            recommendations_df = recommendations_df.merge(self.items_df, how = 'left', 
                                                          left_on = 'ORDER_ITEM_ID', 
                                                          right_on = 'ORDER_ITEM_ID')[['recStrength', 'ORDER_ITEM_ID',  'PRODUCT_CATEGORY','PRODUCT_SUBCATEGORY','PRODUCT_NAME','PRODUCT_SUMMARY','PRODUCT_PRICE','PRODUCT_TARGETED_GENDER','PRODUCT_TARGETED_AGE','PRODUCT_COLOR']]


        return recommendations_df
    
cf_recommender_model = CFRecommender(cf_preds_df, df_products)

In [24]:
user_id = 100
item_purchased_by_user = df_extract[df_extract['PROFILE_ID']==user_id].ORDER_ITEM_ID.unique().tolist()
cf_recommender_model.recommend_items(user_id,item_purchased_by_user,verbose=True)

Unnamed: 0,recStrength,ORDER_ITEM_ID,PRODUCT_CATEGORY,PRODUCT_SUBCATEGORY,PRODUCT_NAME,PRODUCT_SUMMARY,PRODUCT_PRICE,PRODUCT_TARGETED_GENDER,PRODUCT_TARGETED_AGE,PRODUCT_COLOR
0,0.437892,325,Womens Clothing,Shirt / T-Shirt,Godfrey Dare Shirt,Godfrey Dare Casual Shirt,900.0,1,3,Blue
1,0.395775,332,Womens Clothing,Shirt / T-Shirt,Elyse McClure Shirt,Elyse McClure Casual Shirt,700.0,1,3,White
2,0.337618,319,Womens Clothing,Shirt / T-Shirt,Sylvester Jerde Shirt,Sylvester Jerde Casual Shirt,600.0,1,3,Blue
3,0.134677,263,Womens Clothing,Shirt / T-Shirt,Ramiro Reichert Shirt,Ramiro Reichert formal Shirt,100.0,1,2,Blue
4,0.134677,269,Womens Clothing,Shirt / T-Shirt,Pierce Shields Shirt,Pierce Shields Casual Shirt,750.0,1,2,White


In [37]:
cf_recommender_model.recommend_items(user_id,item_purchased_by_user)['ORDER_ITEM_ID'].tolist()

[325, 332, 319, 263, 269]

### Hybrid Recommender

In [25]:
class HybridRecommender:
    
    MODEL_NAME = 'Hybrid'
    
    def __init__(self, cb_rec_model, cf_rec_model, items_df, cb_ensemble_weight=1.0, cf_ensemble_weight=1.0):
        self.cb_rec_model = cb_rec_model
        self.cf_rec_model = cf_rec_model
        self.cb_ensemble_weight = cb_ensemble_weight
        self.cf_ensemble_weight = cf_ensemble_weight
        self.items_df = items_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=5, verbose=False):
        #Getting the top-1000 Content-based filtering recommendations
        cb_recs_df = self.cb_rec_model.recommend_items(user_id, items_to_ignore=items_to_ignore, verbose=verbose,
                                                           topn=10).rename(columns={'recStrength': 'recStrengthCB'})
        
        #Getting the top-1000 Collaborative filtering recommendations
        cf_recs_df = self.cf_rec_model.recommend_items(user_id, items_to_ignore=items_to_ignore, verbose=verbose, 
                                                           topn=10).rename(columns={'recStrength': 'recStrengthCF'})
        
        #Combining the results by contentId
        recs_df = cb_recs_df.merge(cf_recs_df,
                                   how = 'outer', 
                                   left_on = 'ORDER_ITEM_ID', 
                                   right_on = 'ORDER_ITEM_ID').fillna(0.0)
        
        #Computing a hybrid recommendation score based on CF and CB scores
        recs_df['recStrengthHybrid'] = (recs_df['recStrengthCB'] * self.cb_ensemble_weight) \
                                     + (recs_df['recStrengthCF'] * self.cf_ensemble_weight)
        
        #Sorting recommendations by hybrid score
        recommendations_df = recs_df.sort_values('recStrengthHybrid', ascending=False).head(topn)

        if verbose:
            if self.items_df is None:
                raise Exception('"items_df" is required in verbose mode')

            recommendations_df = recommendations_df.merge(self.items_df, how = 'left', 
                                                          left_on = 'ORDER_ITEM_ID', 
                                                          right_on = 'ORDER_ITEM_ID')[['recStrengthHybrid', 'ORDER_ITEM_ID',  'PRODUCT_CATEGORY','PRODUCT_SUBCATEGORY','PRODUCT_NAME','PRODUCT_SUMMARY','PRODUCT_PRICE','PRODUCT_TARGETED_GENDER','PRODUCT_TARGETED_AGE','PRODUCT_COLOR']]


        return recommendations_df
    
hybrid_recommender_model = HybridRecommender(content_based_recommender_model, cf_recommender_model, df_products,
                                             cb_ensemble_weight=50.0, cf_ensemble_weight=50.0)

In [26]:
user_id = 100
item_purchased_by_user = df_extract[df_extract['PROFILE_ID']==user_id].ORDER_ITEM_ID.unique().tolist()
hybrid_recommender_model.recommend_items(user_id,verbose=True)

Unnamed: 0,recStrengthHybrid,ORDER_ITEM_ID,PRODUCT_CATEGORY,PRODUCT_SUBCATEGORY,PRODUCT_NAME,PRODUCT_SUMMARY,PRODUCT_PRICE,PRODUCT_TARGETED_GENDER,PRODUCT_TARGETED_AGE,PRODUCT_COLOR
0,60.698904,309,Womens Clothing,Shirt / T-Shirt,Mckenna Quigley Shirt,Mckenna Quigley Casual Shirt,150.0,1,3,Red
1,39.211261,186,Womens Clothing,Shirt / T-Shirt,Gwen Rau Shirt,Gwen Rau Casual Shirt,450.0,1,1,Red
2,39.211261,376,Womens Clothing,Shirt / T-Shirt,Grayce Gleason Shirt,Grayce Gleason Casual Shirt,250.0,1,4,Red
3,39.211261,184,Womens Clothing,Shirt / T-Shirt,Carroll Sipes Shirt,Carroll Sipes Casual Shirt,350.0,1,1,Red
4,39.211261,189,Womens Clothing,Shirt / T-Shirt,Dario Nienow Shirt,Dario Nienow Casual Shirt,600.0,1,1,Red


In [34]:
hybrid_recommender_model.recommend_items(user_id)['ORDER_ITEM_ID'].tolist()

[309, 186, 376, 184, 189]

### Item Collabarative Filtering (Customer who brought have also brought)

In [27]:
from sklearn.decomposition import TruncatedSVD
df_matrix = df_user_purchase.pivot_table(index='PROFILE_ID', columns='ORDER_ITEM_ID', values='ORDER_ITEM_QUANTITY').fillna(0)      
df_matrix_norm = (df_matrix-df_matrix.min())/(df_matrix.max()-df_matrix.min())
d = df_matrix_norm.reset_index() 
d.index.names = ['scaled_purchase_freq'] 
data_norm = pd.melt(d, id_vars=['PROFILE_ID'], value_name='scaled_purchase_freq').dropna()
product_matrix = pd.pivot_table(data_norm, values='scaled_purchase_freq', index='PROFILE_ID', columns='ORDER_ITEM_ID').fillna(0)

SVD = TruncatedSVD(n_components=12)
X = product_matrix.values.T
matrix = SVD.fit_transform(X)
corr = np.corrcoef(matrix)

In [28]:
class ItemCFRecommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, product_matrix, icf_predictions_df, items_df=None):
        self.icf_predictions_df = icf_predictions_df
        self.items_df = items_df
        self.product_matrix = product_matrix
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, item_id, items_to_ignore=[], topn=5, verbose=False):
        # Get and sort the user's predictions
        
        item = self.product_matrix.columns
        item_list = list(item)
        prod = item_list.index(item_id)
        
        recommendations_df = item[self.icf_predictions_df[prod]>=0.9][0:5].tolist()
        

        if verbose:
            if self.items_df is None:
                raise Exception('"items_df" is required in verbose mode')
            
            recommendations_df = pd.DataFrame(recommendations_df,columns=['ORDER_ITEM_ID'])
            recommendations_df = recommendations_df.merge(self.items_df, how = 'left', 
                                                          left_on = 'ORDER_ITEM_ID', 
                                                          right_on = 'ORDER_ITEM_ID')[['ORDER_ITEM_ID',  'PRODUCT_CATEGORY','PRODUCT_SUBCATEGORY','PRODUCT_NAME','PRODUCT_SUMMARY','PRODUCT_PRICE','PRODUCT_TARGETED_GENDER','PRODUCT_TARGETED_AGE','PRODUCT_COLOR']]


        return recommendations_df
    


In [29]:
icf_recommender_model = ItemCFRecommender(product_matrix, corr, df_products)
icf_recommender_model.recommend_items(401,verbose=True)

Unnamed: 0,ORDER_ITEM_ID,PRODUCT_CATEGORY,PRODUCT_SUBCATEGORY,PRODUCT_NAME,PRODUCT_SUMMARY,PRODUCT_PRICE,PRODUCT_TARGETED_GENDER,PRODUCT_TARGETED_AGE,PRODUCT_COLOR
0,373,Womens Clothing,Shirt / T-Shirt,Miss Raegan Schowalter Shirt,Miss Raegan Schowalter Casual Shirt,100.0,1,4,Red
1,374,Womens Clothing,Shirt / T-Shirt,Giuseppe Donnelly Shirt,Giuseppe Donnelly formal Shirt,150.0,1,4,Red
2,375,Womens Clothing,Shirt / T-Shirt,Tevin OKon Shirt,Tevin OKon Casual Shirt,200.0,1,4,Red
3,376,Womens Clothing,Shirt / T-Shirt,Grayce Gleason Shirt,Grayce Gleason Casual Shirt,250.0,1,4,Red
4,377,Womens Clothing,Shirt / T-Shirt,Heber Tromp Shirt,Heber Tromp Casual Shirt,300.0,1,4,Red


In [32]:
icf_recommender_model.recommend_items(401)

[373, 374, 375, 376, 377]

### Demographic Recommendation (Based on user profile)

In [30]:
import pandas as pd

class DemographicRecommenderSystem:
    def __init__(self,data):
        self.df=pd.read_csv(data)
        
    def recommend(self, user= None, age=None, gender=None, color=None, topk=5):
        df = self.df.copy()
        product_df = self.product_dataframe(df)
        prod_df = self.demographic_filter(product_df, age=age, gender=gender, color=color)        
        already_purchased = df[df['PROFILE_ID'].isin([user])]['ORDER_ITEM_ID']
        
        recom_prod = prod_df[~prod_df.index.isin(already_purchased)]


        result = recom_prod.head(topk)
        return result.ORDER_ITEM_ID.values.tolist()
    
    def popularity_recommend(self, topk=5):
        df = self.df.copy()
        product_df = self.product_dataframe(df)
        
        product_df['Rank'] = product_df['ORDER_ITEM_QUANTITY'].rank(ascending=0, method='first')
        #Get the top 10 recommendations
        popularity_recommendations = product_df.head(topk)
        
        return popularity_recommendations
    
    @staticmethod
    def demographic_filter(df, age=None, gender=None, color=None):
        df = df.copy()
        
        if age is not None:
            df = df[(df.PRODUCT_TARGETED_AGE == age)]
            
        if gender is not None:
            df = df[(df.PRODUCT_TARGETED_GENDER == gender)]
            
        if color is not None:
            df = df[(df.PRODUCT_COLOR == color)]
        return df    

    @staticmethod
    def product_dataframe(df):
        df_product_copy = df[['ORDER_ITEM_ID','PRODUCT_NAME','PRODUCT_CATEGORY','PRODUCT_COLOR','PRODUCT_TARGETED_AGE','PRODUCT_TARGETED_GENDER']].copy()
        
        df_product = df.groupby('ORDER_ITEM_ID').agg({'ORDER_ITEM_QUANTITY':sum}).sort_values(by='ORDER_ITEM_QUANTITY',\
                                                                                                    ascending=False).reset_index()
        df_product = df_product.merge(df_product_copy,on='ORDER_ITEM_ID').drop_duplicates().reset_index(drop=True)
        
        return df_product

In [31]:
recom = DemographicRecommenderSystem(data = 'extractCSV_latest.csv' )
recom.recommend(67, gender=1,age=0)

[62, 127, 71, 132, 93]