In [23]:
import pandas as pd
data = pd.read_csv('sample1_watch_data.csv')
data.head(20)

Unnamed: 0,user_id,Product ID,Product Name,Brand,Price,Category,Rating
0,1,P001,Classic Leather,Omega,300,Watch,4.6
1,1,P003,Explorer Chrono,Rolex,1500,Watch,4.8
2,2,P002,Digital Pro,Casio,120,Smartwatch,4.2
3,2,P008,Innovator,Xiaomi,100,Smartwatch,4.0
4,3,P001,Classic Leather,Omega,300,Watch,4.6
5,3,P007,Classic Sport,Seiko,350,Watch,4.4
6,4,P004,Diver 300,Tag Heuer,2500,Watch,4.9
7,4,P009,Ocean Explorer,Citizen,180,Watch,3.8
8,5,P005,Modern Quartz,Tissot,400,Watch,4.1
9,5,P010,Aviator,Hamilton,700,Watch,4.3


In [15]:
print(pd.__version__)

2.2.2


In [16]:
from surprise import Reader, Dataset, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [17]:
#content based filtering
content_df = data[['Product ID', 'Product Name', 'Brand', 'Price', 'Category']]
content_df['content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

# use tf-idf to vectorize the content into a matrix of matrix of tf-idf features
tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['content'])

content_similarity = linear_kernel(content_matrix, content_matrix)

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['user_id', 'Product ID', 'Rating']], reader)

def get_content_based_recommendations(product_id, top_n):
    index = content_df[content_df['Product ID'] == product_id].index[0]
    similarity_scores = content_similarity[index]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n+1] 
    recommendations = content_df.loc[similar_indices, 'Product ID'].values  
    return recommendations.tolist() 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  content_df['content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)


In [18]:
get_content_based_recommendations("P001", 5)

['P001', 'P001', 'P007', 'P007', 'P004']

In [19]:
# collaborative filtering
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_filtering_recommendations(userId, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == userId, testset)
    predictions = algo.test(testset)
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommendations = [prediction.iid for prediction in predictions[:top_n]]
    return recommendations

In [20]:
get_collaborative_filtering_recommendations(1, 5)

['P004', 'P005', 'P011', 'P008', 'P007']

In [21]:
# hybrid approach
def hybrid_filtering(user_id, product_id, top_n):
    content_based_recommendations = get_content_based_recommendations(product_id, top_n)
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)
    hybrid_recommendations = list(set(content_based_recommendations + collaborative_filtering_recommendations))
    return hybrid_recommendations[:top_n]

In [22]:
user_id = 1
product_id = "P001"
top_n = 3
hybrid_recommendations = hybrid_filtering(user_id, product_id, top_n)
print(f"Hybrid recommendations for user {user_id} and product {product_id}:")
for i, recommendation in enumerate(hybrid_recommendations):
    print(f"{i+1}.Product ID: {recommendation}")

Hybrid recommendations for user 1 and product P001:
1.Product ID: P007
2.Product ID: P004
3.Product ID: P011
