In [5]:
df = pd.read_csv(r"C:\Users\Abhishek\Desktop\chandansir\amazonnew\recommender-system-e-commerce\Data\Amazon_cleaned_df.csv")

In [49]:
df.head(100)

Unnamed: 0,item_id,title,brand,user_id,rating,timestamp,sub_cat,main_cat,age,gender,location
0,B00ENFVJJO,powera dualshock 4 charge station playstation 4,powera,A30E7ZUWMO2E8V,5.0,2017-01-05,Video_Games,Electronics and Technology,18-24,female,Canada
1,B00AEM2EGW,je206 man woman faux spinal gemstone ring silv...,quality brand,A4FQC6UQO6W84,5.0,2013-09-20,All_Beauty,Beauty and Fashion,45-54,female,Canada
2,B006GHA8EE,witcher 2 assassin king enhance edition,wb game,A3GPR3LKLWHSXS,2.0,2014-01-21,Video_Games,Electronics and Technology,35-44,female,Canada
3,B0001HAI8G,mvp baseball 2004 gamecube,electronic art,A1PQNBC3IG5DWD,1.0,2015-08-23,Video_Games,Electronics and Technology,25-34,male,UK
4,B000CBCVFE,resident evil deadly silence nintendo ds,capcom,A3FJKFJ61BU32J,5.0,2017-08-17,Video_Games,Electronics and Technology,18-24,female,USA
...,...,...,...,...,...,...,...,...,...,...,...
95,B005GWU1BA,ncis pc,ubisoft,A3RXYQ0JP44Y6B,1.0,2013-02-27,Video_Games,Electronics and Technology,18-24,female,Canada
96,B013R8BIWU,egelexy baby tie stripe vest formal wear weddi...,egelexy,AM7HZKJVCR39Z,5.0,2018-04-18,AMAZON_FASHION,Beauty and Fashion,18-24,female,India
97,B0194PD6O8,myobe woman long sleeve stripe shirt crew neck...,myobe,AMEXX4CNIWAWW,5.0,2016-04-19,AMAZON_FASHION,Beauty and Fashion,25-34,male,India
98,B000HKGD4A,rollercoaster tycoon 3 platinum pc,atari,A1HIZ0LKBD6X8B,5.0,2015-08-03,Video_Games,Electronics and Technology,18-24,female,Canada


In [5]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Content-Based Filtering
class ContentBasedRecommender:
    def __init__(self, df):
        self.df = df
        self.category_similar_matrix = self._get_similarity_matrix(self.df['sub_cat'])
        self.brand_similar_matrix = self._get_similarity_matrix(self.df['brand'])

    def _get_similarity_matrix(self, attribute_data):
        df_attribute = pd.DataFrame({'item_id': self.df['item_id'], 'attribute_data': attribute_data})
        df_attribute_unique = df_attribute.drop_duplicates(subset='item_id', keep='first')

        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        doc_term = tfidf_vectorizer.fit_transform(df_attribute_unique['attribute_data'])
        cos_similar_matrix = pd.DataFrame(
            cosine_similarity(doc_term), 
            columns=df_attribute_unique['item_id'],
            index=df_attribute_unique['item_id']
        )
        return cos_similar_matrix

    def get_recommendations(self, user_id, top_n=5):
        top_n_recommendations = []
        user_items = self.df[self.df['user_id'] == user_id]['item_id']
        combined_similar_matrix = (self.category_similar_matrix + self.brand_similar_matrix) / 2

        for item_id in user_items:
            if item_id in combined_similar_matrix.index:
                similar_items = combined_similar_matrix.loc[item_id]
                similar_items = similar_items.sort_values(ascending=False)
                similar_items = similar_items[~similar_items.index.isin(user_items)]
                top_n_recommendations.extend(similar_items.head(top_n).index.tolist())

        recommended_items = list(set(top_n_recommendations) - set(user_items))
        return self.df[self.df['item_id'].isin(recommended_items)][['item_id', 'title', 'brand', 'rating', 'sub_cat', 'main_cat']]

# Collaborative Filtering
class CollaborativeFilteringRecommender:
    def __init__(self, df):
        self.df = df
        self.user_item_matrix = self.df.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)
        self.user_similarity_matrix = self._get_user_similarity_matrix()

    def _get_user_similarity_matrix(self):
        return pd.DataFrame(
            cosine_similarity(self.user_item_matrix), 
            index=self.user_item_matrix.index,
            columns=self.user_item_matrix.index
        )

    def get_recommendations(self, user_id, top_n=10):
        if user_id not in self.user_similarity_matrix.index:
            raise ValueError(f"User ID {user_id} not found in the dataset.")

        user_similarity = self.user_similarity_matrix.loc[user_id]
        similar_users = user_similarity.sort_values(ascending=False).index[1:top_n+1]
        recommended_items = set()

        for sim_user in similar_users:
            items = self.df[self.df['user_id'] == sim_user]['item_id']
            recommended_items.update(items)

        user_items = set(self.df[self.df['user_id'] == user_id]['item_id'])
        recommended_items = list(recommended_items - user_items)[:top_n]
        return self.df[self.df['item_id'].isin(recommended_items)][['item_id', 'title', 'brand', 'rating', 'sub_cat', 'main_cat']]

# Hybrid Filtering
class HybridRecommender:
    def __init__(self, df, content_weight=0.5, collaborative_weight=0.5):
        self.df = df
        self.content_recommender = ContentBasedRecommender(df)
        self.collaborative_recommender = CollaborativeFilteringRecommender(df)
        self.content_weight = content_weight
        self.collaborative_weight = collaborative_weight

    def get_recommendations(self, user_id, top_n=10):
        content_recs = self.content_recommender.get_recommendations(user_id, top_n)['item_id'].tolist()
        collaborative_recs = self.collaborative_recommender.get_recommendations(user_id, top_n)['item_id'].tolist()

        hybrid_scores = {}
        for item in content_recs:
            hybrid_scores[item] = hybrid_scores.get(item, 0) + self.content_weight
        for item in collaborative_recs:
            hybrid_scores[item] = hybrid_scores.get(item, 0) + self.collaborative_weight

        sorted_recs = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)
        recommended_items = [item for item, score in sorted_recs[:top_n]]

        return self.df[self.df['item_id'].isin(recommended_items)][['item_id', 'title', 'brand', 'rating', 'sub_cat', 'main_cat']]

# Unified Recommendation System
class RecommendationSystem:
    def __init__(self, df):
        self.df = df
        self.content_recommender = ContentBasedRecommender(df)
        self.collaborative_recommender = CollaborativeFilteringRecommender(df)
        self.hybrid_recommender = HybridRecommender(df)

    def get_recommendations(self, user_id=None, method='content', top_n=5):
        if method == 'content':
            return self.content_recommender.get_recommendations(user_id, top_n)
        elif method == 'collaborative':
            return self.collaborative_recommender.get_recommendations(user_id, top_n)
        elif method == 'hybrid':
            return self.hybrid_recommender.get_recommendations(user_id, top_n)
        else:
            raise ValueError("Invalid recommendation method! Choose from 'content', 'collaborative', 'hybrid'.")

# Example Usage
df = pd.read_csv(r"C:\\Users\\Abhishek\\Desktop\\chandansir\\amazonnew\\recommender-system-e-commerce\\Data\\Amazon_cleaned_df.csv")
# Use only the first 2000 rows of the dataset
df = df.head(2000)

# Convert float64 columns to float32
float_columns = df.select_dtypes(include=['float64']).columns
df[float_columns] = df[float_columns].astype('float32')

recommender = RecommendationSystem(df)

# Prompt the user for inputs
user_id = input("Please enter the User ID: ")
method = input("Please choose the recommendation method (content, collaborative, hybrid): ")
top_n = 5  # Number of recommendations

try:
    recommendations = recommender.get_recommendations(user_id=user_id, method=method.lower(), top_n=top_n)
    print(f"Top {top_n} recommendations using {method} filtering:\n", recommendations)
except ValueError as e:
    print(f"Error: {e}")


Please enter the User ID:  A2MZW171GAE3DE
Please choose the recommendation method (content, collaborative, hybrid):  hybrid


Top 5 recommendations using hybrid filtering:
          item_id                                              title  \
185   B00RN7QTF8                                 sentey mouse group   
655   B00RNDC5DM  sentey&reg gaming headset red arrow analog 3.5...   
1117  B003EGULF6                           wii wii sport game white   
1132  B00003OPE7                          myth 2 soulblighter linux   
1619  B00ZPZQKIG                           mass effect andromeda pc   
1731  B00ZPZQKIG                           mass effect andromeda pc   

                            brand  rating      sub_cat  \
185                        sentey     5.0  Video_Games   
655                        sentey     5.0  Video_Games   
1117                     nintendo     1.0  Video_Games   
1132  loki entertainment software     5.0  Video_Games   
1619               electronic art     3.0  Video_Games   
1731               electronic art     5.0  Video_Games   

                        main_cat  
185   Electro

In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Content-Based Filtering
class ContentBasedRecommender:
    def __init__(self, df):
        self.df = df
        self.category_similar_matrix = self._get_similarity_matrix(self.df['sub_cat'])
        self.brand_similar_matrix = self._get_similarity_matrix(self.df['brand'])

    def _get_similarity_matrix(self, attribute_data):
        df_attribute = pd.DataFrame({'item_id': self.df['item_id'], 'attribute_data': attribute_data})
        df_attribute_unique = df_attribute.drop_duplicates(subset='item_id', keep='first')

        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        doc_term = tfidf_vectorizer.fit_transform(df_attribute_unique['attribute_data'])
        cos_similar_matrix = pd.DataFrame(
            cosine_similarity(doc_term), 
            columns=df_attribute_unique['item_id'],
            index=df_attribute_unique['item_id']
        )
        return cos_similar_matrix

    def get_recommendations(self, user_id, top_n=5):
        top_n_recommendations = []
        user_items = self.df[self.df['user_id'] == user_id]['item_id']
        combined_similar_matrix = (self.category_similar_matrix + self.brand_similar_matrix) / 2

        for item_id in user_items:
            if item_id in combined_similar_matrix.index:
                similar_items = combined_similar_matrix.loc[item_id]
                similar_items = similar_items.sort_values(ascending=False)
                similar_items = similar_items[~similar_items.index.isin(user_items)]
                top_n_recommendations.extend(similar_items.head(top_n).index.tolist())

        recommended_items = list(set(top_n_recommendations) - set(user_items))
        return self.df[self.df['item_id'].isin(recommended_items)][['item_id', 'title', 'brand', 'rating', 'sub_cat', 'main_cat']]

# Collaborative Filtering
class CollaborativeFilteringRecommender:
    def __init__(self, df):
        self.df = df
        self.user_item_matrix = self.df.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)
        self.user_similarity_matrix = self._get_user_similarity_matrix()

    def _get_user_similarity_matrix(self):
        return pd.DataFrame(
            cosine_similarity(self.user_item_matrix), 
            index=self.user_item_matrix.index,
            columns=self.user_item_matrix.index
        )

    def get_recommendations(self, user_id, top_n=10):
        if user_id not in self.user_similarity_matrix.index:
            raise ValueError(f"User ID {user_id} not found in the dataset.")

        user_similarity = self.user_similarity_matrix.loc[user_id]
        similar_users = user_similarity.sort_values(ascending=False).index[1:top_n+1]
        recommended_items = set()

        for sim_user in similar_users:
            items = self.df[self.df['user_id'] == sim_user]['item_id']
            recommended_items.update(items)

        user_items = set(self.df[self.df['user_id'] == user_id]['item_id'])
        recommended_items = list(recommended_items - user_items)[:top_n]
        return self.df[self.df['item_id'].isin(recommended_items)][['item_id', 'title', 'brand', 'rating', 'sub_cat', 'main_cat']]

# Hybrid Filtering
class HybridRecommender:
    def __init__(self, df, content_weight=0.5, collaborative_weight=0.5):
        self.df = df
        self.content_recommender = ContentBasedRecommender(df)
        self.collaborative_recommender = CollaborativeFilteringRecommender(df)
        self.content_weight = content_weight
        self.collaborative_weight = collaborative_weight

    def get_recommendations(self, user_id, top_n=10):
        content_recs = self.content_recommender.get_recommendations(user_id, top_n)['item_id'].tolist()
        collaborative_recs = self.collaborative_recommender.get_recommendations(user_id, top_n)['item_id'].tolist()

        hybrid_scores = {}
        for item in content_recs:
            hybrid_scores[item] = hybrid_scores.get(item, 0) + self.content_weight
        for item in collaborative_recs:
            hybrid_scores[item] = hybrid_scores.get(item, 0) + self.collaborative_weight

        sorted_recs = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)
        recommended_items = [item for item, score in sorted_recs[:top_n]]

        return self.df[self.df['item_id'].isin(recommended_items)][['item_id', 'title', 'brand', 'rating', 'sub_cat', 'main_cat']]

# Unified Recommendation System
class RecommendationSystem:
    def __init__(self, df):
        self.df = df
        self.content_recommender = ContentBasedRecommender(df)
        self.collaborative_recommender = CollaborativeFilteringRecommender(df)
        self.hybrid_recommender = HybridRecommender(df)

    def get_recommendations(self, user_id=None, method='content', top_n=5):
        if method == 'content':
            return self.content_recommender.get_recommendations(user_id, top_n)
        elif method == 'collaborative':
            return self.collaborative_recommender.get_recommendations(user_id, top_n)
        elif method == 'hybrid':
            return self.hybrid_recommender.get_recommendations(user_id, top_n)
        else:
            raise ValueError("Invalid recommendation method! Choose from 'content', 'collaborative', 'hybrid'.")

# Example Usage
df = pd.read_csv(r"C:\\Users\\Abhishek\\Desktop\\chandansir\\amazonnew\\recommender-system-e-commerce\\Data\\Amazon_cleaned_df.csv")
# Use only the first 2000 rows of the dataset
df = df.head(2000)

# Convert float64 columns to float32
float_columns = df.select_dtypes(include=['float64']).columns
df[float_columns] = df[float_columns].astype('float32')

recommender = RecommendationSystem(df)

# Prompt the user for inputs
user_id = input("Please enter the User ID: ")
method = input("Please choose the recommendation method (content, collaborative, hybrid): ")
top_n = 5  # Number of recommendations

try:
    recommendations = recommender.get_recommendations(user_id=user_id, method=method.lower(), top_n=top_n)
    print(f"Top {top_n} recommendations using {method} filtering:\n", recommendations)
except ValueError as e:
    print(f"Error: {e}")


Please enter the User ID:  A1PQNBC3IG5DWD
Please choose the recommendation method (content, collaborative, hybrid):  hybrid


Top 5 recommendations using hybrid filtering:
          item_id                                       title           brand  \
76    B0025KZV7A    leave 4 dead game year edition -xbox 360  electronic art   
1206  B001NX6GBK  dante inferno divine edition playstation 3  electronic art   
1215  B002BCOC3G                        nba live 10 xbox 360  electronic art   
1315  B001NX6GBK  dante inferno divine edition playstation 3  electronic art   
1532  B00009KX7C               alien vs. predator extinction  electronic art   
1619  B00ZPZQKIG                    mass effect andromeda pc  electronic art   
1731  B00ZPZQKIG                    mass effect andromeda pc  electronic art   

      rating      sub_cat                    main_cat  
76       5.0  Video_Games  Electronics and Technology  
1206     5.0  Video_Games  Electronics and Technology  
1215     2.0  Video_Games  Electronics and Technology  
1315     3.0  Video_Games  Electronics and Technology  
1532     4.0  Video_Games  Elect