In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from IPython.display import display, HTML

In [28]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load your dataset
data = pd.read_csv('C:/Users/User/Downloads/merged_final_dataset.csv')

# Preprocessing
data['text'] = data['text'].fillna('')

# Initializing TF-IDF Vectorizer
tfv = TfidfVectorizer(min_df=5, max_features=10000, strip_accents='unicode', analyzer='word',
                      token_pattern=r'\w{1,}', ngram_range=(1, 3), stop_words='english')

# Fitting the TF-IDF on the 'text' column
tfv_matrix = tfv.fit_transform(data['text'])

# Function to compute similarity in chunks
def chunk_similarity(matrix, ref_idx, chunk_size=1000):
    # Calculate number of chunks
    num_chunks = matrix.shape[0] // chunk_size + (1 if matrix.shape[0] % chunk_size != 0 else 0)
    sim_scores = []

    for chunk_start in range(0, matrix.shape[0], chunk_size):
        chunk_end = min(chunk_start + chunk_size, matrix.shape[0])
        # Compute similarities for the chunk
        sim_chunk = cosine_similarity(matrix[ref_idx:ref_idx+1], matrix[chunk_start:chunk_end])
        sim_scores.extend(sim_chunk.flatten())

    return sim_scores

# Example usage
title = 'JFC Dried Tomoshiraga Somen Noodles, 16-Ounce'
idx = data[data['title_x'] == title].index[0]
scores = chunk_similarity(tfv_matrix, idx)
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[1:11]

# Output the recommendations including price and rating
recommended_products = data.iloc[top_indices][['title_x', 'price', 'average_rating']]
print(recommended_products)


                                                  title_x  price  \
78228   Nongshim Kimchi Noodle Soup Bowl, 3.03 Ounce (...   7.38   
39279   A-Sha Healthy Ramen Noodles - Original Sauce P...  29.34   
105763  Thai Kitchen Gluten Free Stir Fry Rice Noodles...   4.18   
66199   Cadbury Curly Wurly Chocolate Chewy Bars | Tot...  19.91   
40668   Maruchan Ramen Creamy Chicken Flavor, 3 Oz, Pa...   5.98   
85551   Assi, packs Glass Noodles Korean Vermicelli Da...   9.99   
106560  Wide Thai Rice Stick Noodles Xl (1cm) Pack of ...  12.99   
8755    MAMA Noodles Pad Thai Instant Spicy Noodles w/...  19.99   
108821  Lean Cuisine Frozen Meal Chicken Fettuccine, C...   3.49   
91691   Apexy Organic Shirataki Konjac Noodle Fettucci...  14.99   

        average_rating  
78228              4.5  
39279              4.2  
105763             4.6  
66199              4.3  
40668              4.7  
85551              4.6  
106560             4.5  
8755               4.3  
108821             4.5  
9