In [None]:
Objective:Build a recommendation engine for products in the "All_Beauty" category.
Deliverable: A system recommending products based on user behavior and product metadata.

Link to download dataset:
    
https://amazon-reviews-2023.github.io/

https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/review_categories/All_Beauty.jsonl.gz
https://mcauleylab.ucsd.edu/public_datasets/data/amazon_2023/raw/meta_categories/meta_All_Beauty.jsonl.gz

In [None]:
#!pip install pandas numpy scipy scikit-learn matplotlib seaborn tensorflow flask


In [4]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from scipy.sparse.linalg import svds

In [8]:
# Data Preprocessing
#Load the dataset:
  
  
reviews = pd.read_json('All_Beauty.jsonl', lines=True)
metadata = pd.read_json('meta_All_Beauty.jsonl', lines=True)



In [4]:
print(reviews.head())

   rating                                      title  \
0       5  Such a lovely scent but not overpowering.   
1       4     Works great but smells a little weird.   
2       5                                       Yes!   
3       1                          Synthetic feeling   
4       5                                         A+   

                                                text images        asin  \
0  This spray is really nice. It smells really go...     []  B00YQ6X8EO   
1  This product does what I need it to do, I just...     []  B081TJ8YS3   
2                          Smells good, feels great!     []  B07PNNCSP9   
3                                     Felt synthetic     []  B09JS339BZ   
4                                            Love it     []  B08BZ63GMJ   

  parent_asin                       user_id               timestamp  \
0  B00YQ6X8EO  AGKHLEW2SOWHNMFQIJGBECAF7INQ 2020-05-05 14:08:48.923   
1  B081TJ8YS3  AGKHLEW2SOWHNMFQIJGBECAF7INQ 2020-05-04 18:10:55.070   

In [4]:
# Drop unnecessary columns for Reviews that are not needed for product recomendation
reviews_selected_features = reviews[['user_id', 'asin', 'rating', 'helpful_vote', 'verified_purchase', 'parent_asin']]


In [None]:
# View number of columns and rows
reviews_selected_features.shape

In [5]:
# Inspect the number of missing values in each column 
print(reviews_selected_features[['user_id', 'asin', 'rating', 'helpful_vote', 'verified_purchase', 'parent_asin']].isna().sum())

user_id              0
asin                 0
rating               0
helpful_vote         0
verified_purchase    0
parent_asin          0
dtype: int64


Let work on metadata

In [6]:
# Select features for the recommendation system
# Drop unnecessary columns and handle missing values for Metadata
#'categories'and 'bought_together' contained empty data in all the rows. So we need to remove it
metadata_selected_features = metadata[['title', 'average_rating', 'rating_number', 'price', 'parent_asin', 'description']]




In [None]:
# Inspect the number of missing values in each column 
print(metadata_selected_features[['title', 'average_rating', 'rating_number', 'price', 'parent_asin', 'description']].isna().sum())

In [7]:
# we have seen that there are missing values on the some price columns
# Fill missing price values with the median using .loc[]
metadata_selected_features.loc[:, 'price'] = metadata_selected_features['price'].fillna(metadata_selected_features['price'].median())


In [None]:
# Inspect the number of missing values in each column 
print(metadata_selected_features[['title', 'average_rating', 'rating_number', 'price', 'parent_asin', 'description']].isna().sum())

In [None]:
#Below code check if there are multiple ratings for the same user_id and asin
# Group by user_id and asin and count the number of ratings
duplicates = reviews_selected_features.groupby(['user_id', 'asin']).size().reset_index(name='count')

# Filter to find combinations with more than one rating
duplicate_ratings = duplicates[duplicates['count'] > 1]

# Display the duplicate ratings
print(f"Number of duplicate entries: {len(duplicate_ratings)}")
print(duplicate_ratings)


When we tr to create user_item_matrix. We will get memory overflow error. The error indicates that the matrix being created is too large for your system to handle. 
This happens because the pivot_table function is attempting to generate a matrix with an enormous number of cells, 
resulting in an overflow or memory issues.

How to Fix It
1. Filter or Sample the Data
Reduce the size of the dataset by working with a subset of users or products

In [8]:
# Filter top N users and products
top_users = reviews_selected_features['user_id'].value_counts().head(1000).index
top_products = reviews_selected_features['asin'].value_counts().head(1000).index

filtered_data = reviews_selected_features[
    reviews_selected_features['user_id'].isin(top_users) &
    reviews_selected_features['asin'].isin(top_products)
]


In [10]:
print(top_products)

Index(['B007IAE5WY', 'B00EEN2HCS', 'B07C533XCW', 'B00R1TAN7I', 'B08L5KN7X4',
       'B019GBG0IE', 'B0719KWG8H', 'B0092MCQZ4', 'B0107QYW14', 'B0070Z7KME',
       ...
       'B00MQTR48E', 'B00RQ6RXPQ', 'B01CZY0LKW', 'B074FST3SZ', 'B00UB7D60I',
       'B08HP6FM6K', 'B07H23KW68', 'B012H5V5PE', 'B01BZVADRW', 'B000GCVVWI'],
      dtype='object', name='asin', length=1000)


In [None]:
#Create a user-item matrix:
user_item_matrix = filtered_data.pivot_table(index='user_id', columns='asin', values='rating')


In [None]:
print(user_item_matrix.head())

In [None]:
# Collaborative Filtering Example**
#Using Singular Value Decomposition (SVD)**:
from scipy.sparse.linalg import svds

# Fill missing ratings with 0
user_item_matrix.fillna(0, inplace=True)
matrix = user_item_matrix.values

# Apply SVD
U, sigma, Vt = svds(matrix, k=50)
sigma = np.diag(sigma)

# Reconstruct matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

In [None]:
#Content-Based Filtering Example
#Vectorize Metadata**:
 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(metadata_selected_features['description'].apply(lambda x: ' '.join(x)))

# Compute similarity
#cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Compute sparse similarity to reduce the data size and prevent error due to insurficient memory
cosine_sim = cosine_similarity(tfidf_matrix, dense_output=False)

In [None]:
#To "train" this model, focus on hyperparameter tuning (e.g., the number of latent factors, k).
from sklearn.model_selection import train_test_split

# Split data into train and test sets
train_data, test_data = train_test_split(user_item_matrix, test_size=0.2, random_state=42)

# Fill missing values in train/test sets
train_data.fillna(0, inplace=True)
test_data.fillna(0, inplace=True)

# Apply SVD on training data
U_train, sigma_train, Vt_train = svds(train_data.values, k=50)
sigma_train = np.diag(sigma_train)

# Reconstruct train matrix
predicted_ratings_train = np.dot(np.dot(U_train, sigma_train), Vt_train)
predicted_ratings_train_df = pd.DataFrame(predicted_ratings_train, index=train_data.index, columns=train_data.columns)


In [None]:
#Content-Based Filtering (Using Metadata):
#We have already computed similarity using TF-IDF.
#Use the cosine_sim matrix to find the top-N similar items for each product.

# Example: Get top-10 similar items for a specific item
item_idx = 0  # Example item index
similar_items = cosine_sim[item_idx].toarray().flatten()
top_similar_indices = similar_items.argsort()[-10:][::-1]

# List of top-10 similar items
top_similar_items = metadata.iloc[top_similar_indices]

print(top_similar_items)


In [None]:
#Evaluate the Model
#Offline Metrics:
#Use evaluation metrics like Precision@K, Recall@K, and NDCG@K on the test set.

#Precision@K and Recall@K:

def precision_recall_at_k(predicted, actual, k=10):
    # Sort predictions by score
    top_k_items = predicted.argsort()[-k:][::-1]

    # Relevant items
    relevant = set(actual.nonzero()[0])  # Indices of actual items
    recommended = set(top_k_items)

    precision = len(recommended & relevant) / len(recommended)
    recall = len(recommended & relevant) / len(relevant) if relevant else 0

    return precision, recall

# Evaluate for a specific user
user_idx = 0  # Example user index
predicted_ratings_user = predicted_ratings_df.iloc[user_idx].values
actual_ratings_user = test_data.iloc[user_idx].values

precision, recall = precision_recall_at_k(predicted_ratings_user, actual_ratings_user, k=10)
print(f"Precision@10: {precision:.4f}, Recall@10: {recall:.4f}")


In [None]:
#NDCG@K:
def ndcg_at_k(predicted, actual, k=10):
    top_k_items = predicted.argsort()[-k:][::-1]
    dcg = sum([actual[idx] / np.log2(i + 2) for i, idx in enumerate(top_k_items)])
    idcg = sum([1.0 / np.log2(i + 2) for i in range(min(k, sum(actual > 0)))])
    return dcg / idcg if idcg > 0 else 0

ndcg = ndcg_at_k(predicted_ratings_user, actual_ratings_user, k=10)
print(f"NDCG@10: {ndcg:.4f}")


In [None]:
# Model Deployment
#Step 1: Save the Model
#Use joblib or pickle to save the collaborative filtering matrix and the content-based similarity matrix.
#Since we are working with Large datasets. We can have error due to insurficient memory or space.

#To solve this problem
#We need to Optimize the Model by
#Converting large matrices or dataframes to efficient formats (e.g., float32 instead of float64) before saving.

import joblib
predicted_ratings_df = predicted_ratings_df.astype(np.float32)
cosine_sim = cosine_sim.astype(np.float32)
joblib.dump(predicted_ratings_df, 'collaborative_model.pkl')
# Save the content-based similarity matrix
joblib.dump(cosine_sim, 'content_similarity_model.pkl')
#joblib.dump(cosine_sim, 'content_similarity_model.pkl', compress=('zlib', 3))



In [2]:
import joblib

#Testing
#After applying these optimizations, verify that the models load correctly:
model = joblib.load('collaborative_model.pkl')
#print(model.head())

In [13]:
#Here is we created code to generate random users from trained model
#We can generate product recomendation to each users using the model

import random

result = model.index.tolist()  
random_users = random.sample(result, 5)
#users =  jsonify({'users': random_users})

In [14]:
print(random_users)

['AHD5AUM4JPOAU4UYIULMT7PRNO5Q', 'AHT7TDFPRBZE3GH7RFKCPSCEAR5Q', 'AHC4WQGKGVP5UQ2FRGWY6TWAOETA', 'AFQ6MBCUFDVAEPXE5ME33KEPHPJQ', 'AH3BXW7KLIS2VAE56UXJS2NS7I5A']


In [16]:
import joblib

#Testing the second model
#After applying these optimizations, verify that the models load correctly:
model2 = joblib.load('content_similarity_model.pkl')
#print(model2)

In [21]:


# Extract product IDs
# Assuming model2 is a sparse matrix, use the `.tocoo()` method
coo_matrix = model2.tocoo()  # Convert to COO format to access row and column indices
product_ids = list(set(coo_matrix.row))  # Get unique product IDs from row indices

# Generate 5 random product IDs each time
random_ids = random.sample(product_ids, 5)

# Print the random product IDs
print("Random product IDs:", random_ids)


Random product IDs: [89842, 98485, 25973, 98706, 73880]


In [30]:
# Generate 5 random product IDs each time
random_ids = random.sample(product_ids, 5)

# Print the random product IDs
print("Random product IDs:", random_ids)

Random product IDs: [102593, 101152, 51089, 106165, 89411]


In [28]:
print(model2)

  (4, 4)	1.0
  (4, 5)	0.025714375
  (4, 14)	0.007455758
  (4, 16)	0.0053584795
  (4, 22)	0.003725506
  (4, 24)	0.008141328
  (4, 25)	0.028050074
  (4, 31)	0.0040644486
  (4, 32)	0.020614235
  (4, 56)	0.0126424255
  (4, 64)	0.008102727
  (4, 66)	0.017579852
  (4, 98)	0.060566526
  (4, 169)	0.035372905
  (4, 186)	0.0024434882
  (4, 196)	0.009983616
  (4, 199)	0.029806105
  (4, 201)	0.01699377
  (4, 215)	0.003272786
  (4, 221)	0.014466974
  (4, 225)	0.016566284
  (4, 227)	0.015423606
  (4, 229)	0.02498089
  (4, 231)	0.008248983
  (4, 241)	0.006958873
  :	:
  (112578, 107587)	0.02752059
  (112578, 107967)	0.03806353
  (112578, 108000)	0.055045705
  (112578, 108223)	0.08080182
  (112578, 108361)	0.025584362
  (112578, 108495)	0.01192527
  (112578, 108625)	0.029470121
  (112578, 108651)	0.03626448
  (112578, 109221)	0.025818396
  (112578, 109392)	0.028427133
  (112578, 109397)	0.0498002
  (112578, 109422)	0.06262538
  (112578, 109483)	0.04839668
  (112578, 109613)	0.026121575
  (112578, 1102

In [17]:
products = list(model2)  # Adjust as per your model's structure
random_products = random.sample(products, 5)
#return jsonify({'products': random_products})
print(random_products)

[<1x112590 sparse matrix of type '<class 'numpy.float32'>'
	with 4636 stored elements in Compressed Sparse Row format>, <1x112590 sparse matrix of type '<class 'numpy.float32'>'
	with 0 stored elements in Compressed Sparse Row format>, <1x112590 sparse matrix of type '<class 'numpy.float32'>'
	with 0 stored elements in Compressed Sparse Row format>, <1x112590 sparse matrix of type '<class 'numpy.float32'>'
	with 4216 stored elements in Compressed Sparse Row format>, <1x112590 sparse matrix of type '<class 'numpy.float32'>'
	with 0 stored elements in Compressed Sparse Row format>]


In [None]:

#Runing this code will produce this error (OSError: [Errno 28] No space left on device)
# Model Deployment
#Step 1: Save the Model
#Use joblib or pickle to save the collaborative filtering matrix and the content-based similarity matrix.


import joblib

# Save the collaborative filtering predicted ratings
joblib.dump(predicted_ratings_df, 'collaborative_model.pkl')

# Save the content-based similarity matrix
joblib.dump(cosine_sim, 'content_similarity_model.pkl')


In [None]:
#Here we created web for testing the recommendation model

#Frontend Integration
#Step 1: We Develop a Simple UI
#Create a minimal HTML/CSS/JavaScript frontend to display recommendations.

#Example index.html:

#We placeed index.html inside template of our flast program

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Product Recommendation System</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            background-color: #f8f9fa;
            margin: 0;
            padding: 0;
        }
        .container {
            width: 50%;
            margin: auto;
            padding: 20px;
            background-color: #ffffff;
            border-radius: 8px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            margin-top: 50px;
        }
        h1 {
            text-align: center;
            color: #343a40;
        }
        form {
            display: flex;
            flex-direction: column;
        }
        label {
            margin-bottom: 10px;
            font-weight: bold;
        }
        input, select {
            margin-bottom: 20px;
            padding: 10px;
            font-size: 16px;
            border: 1px solid #ced4da;
            border-radius: 4px;
        }
        button {
            padding: 10px;
            font-size: 16px;
            color: #ffffff;
            background-color: #007bff;
            border: none;
            border-radius: 4px;
            cursor: pointer;
        }
        button:hover {
            background-color: #0056b3;
        }
        .result {
            margin-top: 20px;
            padding: 10px;
            background-color: #e9ecef;
            border-radius: 4px;
        }
        .extra-buttons {
            display: flex;
            justify-content: space-between;
            margin-bottom: 20px;
        }
        
        
    table {
        width: 100%;
        border-collapse: collapse;
        margin: 20px 0;
        font-size: 18px;
        text-align: left;
    }
    table th, table td {
        border: 1px solid #ddd;
        padding: 8px;
    }
    table th {
        background-color: #f4f4f4;
        color: #333;
        text-transform: uppercase;
    }
    table tr:nth-child(even) {
        background-color: #f9f9f9;
    }
    table tr:hover {
        background-color: #f1f1f1;
    }
    h2 {
        font-family: Arial, sans-serif;
        color: #333;
    }


    </style>
</head>
<body>
    <div class="container">
        <h1>Product Recommendation System</h1>
        <div class="extra-buttons">
            <button onclick="generateUsers()">Generate 5 Random Users</button>
            <button onclick="generateProducts()">Generate 5 Product IDs</button>
        </div>
        <form id="recommendation-form">
            <label for="method">Recommendation Method:</label>
            <select id="method" name="method" onchange="toggleFields()">
                <option value="collaborative">Collaborative</option>
                <option value="content">Content</option>
            </select>

            <label for="user_id" id="user_id_label">User ID (for Collaborative):</label>
            <input type="text" id="user_id" name="user_id">

            <label for="item_id" id="item_id_label">Item ID (for Content):</label>
            <input type="number" id="item_id" name="item_id">

            <button type="submit">Get Recommendations</button>
        </form>
        <div class="result" id="result"></div>
    
    
     <!-- New Section for Getting Titles -->       
        
      <div class="extra-actions">
        <label for="product-ids">Enter 5 Sparse Matrix Product IDs:</label>
        <input type="text" id="product-ids" placeholder="Comma-separated product IDs" style="width: 100%; margin-bottom: 10px;">
        <button onclick="fetchTitlesFromIds()" style="width: 100%; margin-bottom: 20px;">Get Titles from Product IDs</button>

        <label for="parent-asins">Enter 5 Parent ASINs:</label>
        <input type="text" id="parent-asins" placeholder="Comma-separated parent ASINs" style="width: 100%; margin-bottom: 10px;">
        <button onclick="fetchTitlesFromAsins()" style="width: 100%;">Get Titles from Parent ASINs</button>
      </div>


        <!-- Results Table -->
        <div id="result-table"></div>
    

    </div>

    <script>
        function toggleFields() {
            const method = document.getElementById('method').value;
            const user_id_label = document.getElementById('user_id_label');
            const user_id_input = document.getElementById('user_id');
            const item_id_label = document.getElementById('item_id_label');
            const item_id_input = document.getElementById('item_id');

            if (method === 'collaborative') {
                user_id_label.style.display = 'block';
                user_id_input.style.display = 'block';
                item_id_label.style.display = 'none';
                item_id_input.style.display = 'none';
            } else if (method === 'content') {
                user_id_label.style.display = 'none';
                user_id_input.style.display = 'none';
                item_id_label.style.display = 'block';
                item_id_input.style.display = 'block';
            }
        }

        function generateUsers() {
            fetch('/generate-random-users')
                .then(response => response.json())
                .then(data => {
                    const resultDiv = document.getElementById('result');
                    if (data.users) {
                        resultDiv.innerHTML = '<h2>Random Users:</h2><ul>' + data.users.map(user => `<li>${user}</li>`).join('') + '</ul>';
                    } else if (data.error) {
                        resultDiv.innerHTML = `<h2>Error:</h2><p>${data.error}</p>`;
                    }
                })
                .catch(error => {
                    const resultDiv = document.getElementById('result');
                    resultDiv.innerHTML = `<h2>Error:</h2><p>${error}</p>`;
                });
        }

        
  

        
        function generateProducts() {
            fetch('/generate-random-products')
                .then(response => response.json())
                .then(data => {
                    const resultDiv = document.getElementById('result');
                    if (data.products) {
                        resultDiv.innerHTML = '<h2>Random Product IDs:</h2><ul>' + data.products.map(product => `<li>${product}</li>`).join('') + '</ul>';
                    } else if (data.error) {
                        resultDiv.innerHTML = `<h2>Error:</h2><p>${data.error}</p>`;
                    }
                })
                .catch(error => {
                    const resultDiv = document.getElementById('result');
                    resultDiv.innerHTML = `<h2>Error:</h2><p>${error}</p>`;
                });
        }
        
        
        function fetchTitlesFromIds() {
    const productIds = document.getElementById('product-ids').value.split(',').map(id => parseInt(id.trim()));
    fetch('/get-titles-from-ids', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ product_ids: productIds })
    })
    .then(response => {
        if (!response.ok) {
            throw new Error('Network response was not ok ' + response.statusText);
        }
        return response.json();
    })
    .then(data => {
        if (!data.results) {
            throw new Error('Invalid response structure');
        }
        displayResults_id(data.results, "Titles from Product IDs");
    })
    .catch(error => alert('Fetch error: ' + error.message));
}

        
       
        
        function fetchTitlesFromAsins() {
    const parentAsins = document.getElementById('parent-asins').value.split(',').map(id => id.trim());
    fetch('/get-titles-from-asin', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ parent_asins: parentAsins })
    })
    .then(response => {
        if (!response.ok) {
            throw new Error('Network response was not ok ' + response.statusText);
        }
        return response.json();
    })
    .then(data => {
        if (!data.results) {
            throw new Error('Invalid response structure');
        }
        displayResults(data.results, "Titles from Parent ASINs");
    })
    .catch(error => alert('Fetch error: ' + error.message));
}

function displayResults(results, heading) {
    let html = `<h2>${heading}</h2><table><tr><th>Parent ASIN</th><th>Title</th></tr>`;
    results.forEach(result => {
        html += `<tr><td>${result.parent_asin}</td><td>${result.title}</td></tr>`;
    });
    html += `</table>`;
    document.getElementById('result-table').innerHTML = html;
}

        
        function displayResults_id(results, heading) {
    let html = `<h2>${heading}</h2><table><tr><th>Product id</th><th>Title</th></tr>`;
    results.forEach(result => {
        html += `<tr><td>${result.product_id}</td><td>${result.title}</td></tr>`;
    });
    html += `</table>`;
    document.getElementById('result-table').innerHTML = html;
}



        document.getElementById('recommendation-form').addEventListener('submit', function(event) {
            event.preventDefault();

            const method = document.getElementById('method').value;
            const user_id = document.getElementById('user_id').value;
            const item_id = document.getElementById('item_id').value;

            const params = new URLSearchParams({
                method: method,
                user_id: user_id,
                item_id: item_id
            }).toString();

            fetch(`/recommend?${params}`)
                .then(response => response.json())
                .then(data => {
                    const resultDiv = document.getElementById('result');
                    if (data.recommendations) {
                        
                        resultDiv.innerHTML = '<h2>Recommendations:</h2>[' + data.recommendations.map(item => `${item}`).join(', ') + ']';
                        //resultDiv.innerHTML = '<h2>Recommendations:</h2><ul>' + data.recommendations.map(item => `<li>${item}</li>`).join('') + '</ul>';
                    } else if (data.error) {
                        resultDiv.innerHTML = `<h2>Error:</h2><p>${data.error}</p>`;
                    }
                })
                .catch(error => {
                    const resultDiv = document.getElementById('result');
                    resultDiv.innerHTML = `<h2>Error:</h2><p>${error}</p>`;
                });
        });

        // Initial call to set the fields based on the default selection
        toggleFields();
    </script>
</body>
</html>



In [None]:
# Step2, We expose Recommendations via an API
#  you can Use Flask or FastAPI to create a REST API that provides recommendations.

#Example with Flask:

from flask import Flask, request, jsonify, render_template
import joblib
import numpy as np
import pandas as pd
import random
import gc

gc.collect()
# Load saved models
collaborative_model = joblib.load('collaborative_model.pkl')  # Loaded as a pandas DataFrame
content_model = joblib.load('content_similarity_model.pkl')   # Assuming a sparse matrix or ndarray

#Load dataset. It will help us find which product is recomended using their product id
metadata = pd.read_json('meta_All_Beauty.jsonl', lines=True)

product_id_to_asin = dict(zip(metadata.index, metadata['parent_asin']))

# There's a mapping between product IDs in the sparse matrix and ASINs (ASINs is unique product id in dataset)
# So we need to create a mapping from the sparse matrix indices to ASINs
# Here, product IDs correspond to the index of the `metadata` DataFrame
product_id_to_asin = dict(zip(metadata.index, metadata['parent_asin']))


# Extract list of users
list_users_result = collaborative_model.index.tolist()  # Adjust as per your model's structure

# Extract product IDs
coo_matrix = content_model.tocoo()  # Convert to COO format to access row and column indices
product_ids = list(set(map(int, coo_matrix.row)))  # Get unique product IDs from row indices
#print("Product IDs:", product_ids)  # Debug print statement

app = Flask(__name__)

@app.route('/generate-random-users', methods=['GET'])
def generate_random_users():
    random_users = random.sample(list_users_result, 5)
    return jsonify({'users': random_users})

@app.route('/generate-random-products', methods=['GET'])
def generate_random_products():
    # Generate 5 random product IDs each time
    products = random.sample(product_ids, 5)
    #print("Random Products:", products)  # Debug print statement
    return jsonify({'products': products})

def find_titles_from_ids(random_ids, id_to_asin_map, metadata):
    if len(random_ids) > 5:
        random_ids = random_ids[:5]

    titles = []
    for product_id in random_ids:
        asin = id_to_asin_map.get(product_id)
        if asin:
            title = metadata.loc[metadata['parent_asin'] == asin, 'title'].values
            titles.append(title[0] if len(title) > 0 else "Title not found")
        else:
            titles.append("ASIN not found for product ID")
    return titles

    
    
@app.route('/get-titles-from-ids', methods=['POST'])
def get_titles_from_ids():
    try:
        # Log the incoming request data
        print("Request data received:", request.data)
        data = request.json
        print("Parsed JSON data:", data)
        
        # Extract product IDs
        random_ids = data.get('product_ids', [])
        print("Product IDs received:", random_ids)
        
        # Initialize an empty list to store results
        results = []
        
        for product_id in random_ids:
            title = find_titles_from_ids([product_id], product_id_to_asin, metadata)
            # Create a dictionary for each product_id and its corresponding title
            results.append({'product_id': product_id, 'title': title[0]})
        
        # Return the result
        response = jsonify({'results': results})
        print("Response data:", response.get_json())
        return response
    except Exception as e:
        # Log any errors
        print(f"Error: {e}")
        return jsonify({'error': str(e)}), 500




# Function to find the product title using parent_asin
def find_title_by_parent_asin(parent_asin, metadata):
    result = metadata.loc[metadata['parent_asin'] == parent_asin, 'title']
    if not result.empty:
        return result.iloc[0]
    return "Title not found for the given parent_asin."

# Function to process a list of poduct IDs and return titles
def process_user_ids(product_ids, metadata):
    if len(product_ids) > 5:
        print("Error: The list should contain a maximum of 5 user IDs.")
        return []
    
    titles = []
    for product_id in product_ids:
        title_by_parent_asin = find_title_by_parent_asin(product_id, metadata)
        titles.append(title_by_parent_asin)
    
    return titles


@app.route('/get-titles-from-asin', methods=['POST'])
def get_titles_from_asin():
    data = request.json
    parent_asins = data.get('parent_asins', [])
    if len(parent_asins) > 5:
        parent_asins = parent_asins[:5]
    
    # Initialize an empty list to store results
    results = []
    
    for parent_asin in parent_asins:
        title = find_title_by_parent_asin(parent_asin, metadata)
        # Create a dictionary for each parent_asin and its corresponding title
        results.append({'parent_asin': parent_asin, 'title': title})
    
    return jsonify({'results': results})



def get_recommendations(method, user_id=None, item_id=None):
    if method == 'collaborative':
        if user_id is None:
            return {"error": "user_id is required for collaborative recommendations"}
        
        try:
            # Fetch user ratings from collaborative model
            user_ratings = collaborative_model.loc[user_id]
            recommendations = user_ratings.sort_values(ascending=False).head(5).index.tolist()
        except KeyError:
            return {"error": f"User ID '{user_id}' not found in the collaborative model"}

    elif method == 'content':
        if item_id is None:
            return {"error": "item_id is required for content-based recommendations"}
        
        try:
            # Fetch item similarities from content model
            item_similarities = content_model[item_id].toarray().flatten()
            recommendations = np.argsort(item_similarities)[-5:][::-1].tolist()
        except IndexError:
            return {"error": f"Item ID {item_id} not found in the content model"}
    else:
        return {"error": "Invalid method"}
    
    return recommendations

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/recommend', methods=['GET'])
def recommend():
    user_id = request.args.get('user_id')  # User ID is a string
    item_id = request.args.get('item_id', type=int)
    method = request.args.get('method', 'collaborative')  # Default to collaborative

    recommendations = get_recommendations(method, user_id, item_id)
    
    if isinstance(recommendations, dict) and "error" in recommendations:
        return jsonify(recommendations), 400

    return jsonify({"recommendations": recommendations})
    
if __name__ == '__main__':
    import os
    os.environ["FLASK_DEBUG"] = "development"
    app.run(debug=True, use_reloader=False)



In [None]:
# Run the App
#Place the index.html in the same directory as your Flask app inside template folder.


# To Run the Flask app run your terminal:
python app.py

#Access the app in your browser at http://127.0.0.1:5000.




In [None]:
# to run and test the recomended model
#open your terminal or CMD and navigate to Recommendation folder
# To Run the Flask app type below on the termincal and press enter:
python app.py
#Access the app in your browser at http://127.0.0.1:5000.

Below code will help to test the recommendation model on jupiter note book 

In [5]:
from scipy.sparse import coo_matrix
import joblib
content_model = joblib.load('content_similarity_model.pkl')
coo_matrix = content_model.tocoo()  # Convert to COO format

In [11]:
metadata = pd.read_json('meta_All_Beauty.jsonl', lines=True)

In [12]:
product_id_to_asin = dict(zip(metadata.index, metadata['parent_asin']))

In [13]:
random_ids = [79668, 18107, 72875, 56161, 13208]

In [None]:
# Prompt user to enter exactly 5 product IDs
while True:
    user_input = input("Enter 5 product IDs separated by commas: ").strip()
    random_ids = [item.strip() for item in user_input.split(",") if item.strip()]
    if len(random_ids) == 5:
        break
    else:
        print("Please enter exactly 5 product IDs.")

In [14]:

# Function to find product titles using product ID
def find_titles_from_ids(random_ids, id_to_asin_map, metadata):
    titles = []
    for product_id in random_ids:
        asin = id_to_asin_map.get(product_id)
        if asin:
            title = metadata.loc[metadata['parent_asin'] == asin, 'title'].values
            titles.append(title[0] if len(title) > 0 else "Title not found")
        else:
            titles.append("ASIN not found for product ID")
    return titles




In [15]:
# Get titles for the entered product IDs
titles = find_titles_from_ids(random_ids, product_id_to_asin, metadata)

# Print the results in a neat format
print("\nProduct Titles for Entered Product IDs:")
print("=" * 50)
for pid, title in zip(random_ids, titles):
    print(f"Product ID: {pid:<10} | Title: {title}")
print("=" * 50)


Product Titles for Entered Product IDs:
Product ID: 79668      | Title: Exfoliating shampoo for hair regrowth CRESCINA LABO TRANSDERMIC RE-GROWTH Shampoo for women 200 ml
Product ID: 18107      | Title: Yellow Beeswax Pellets sourced from a USDA and ISO 9001 Certified Organic Supplier
Product ID: 72875      | Title: LA COLORS Shimmer & Shine Metal Polish Set
Product ID: 56161      | Title: Goop glow Body Luminizer 2.7 Fl. Oz! Pale-Gold Candlelit Glow Body Shimmer! Instantly Blurs Imperfections! Leaves Flattering Highlights On All Skin Tones! W Bonus Face Scrub
Product ID: 13208      | Title: Village Naturals Muscle Aches & Pains Relief Lotion & Foaming Bath Oil Body Wash Relaxing Set


In [3]:
print()




In [10]:
metadata = pd.read_json('meta_All_Beauty.jsonl', lines=True)

In [16]:
parent_asin =  ['B007IAE5WY', 'B00EEN2HCS', 'B07C533XCW', 'B00R1TAN7I', 'B08L5KN7X4']

In [17]:
# Function to find the product title using parent_asin
def find_title_by_parent_asin(parent_asin, metadata):
    result = metadata.loc[metadata['parent_asin'] == parent_asin, 'title']
    if not result.empty:
        return result.iloc[0]
    return "Title not found for the given parent_asin."

# Function to process a list of poduct IDs and return titles
def process_user_ids(product_ids, metadata):
    if len(product_ids) > 5:
        print("Error: The list should contain a maximum of 5 user IDs.")
        return []
    
    titles = []
    for product_id in product_ids:
        title_by_parent_asin = find_title_by_parent_asin(product_id, metadata)
        titles.append(title_by_parent_asin)
    
    return titles




In [18]:
# Example usage with a list of user IDs
product_ids = parent_asin
titles = process_user_ids(product_ids, metadata)
print(titles)

['Title not found for the given parent_asin.', 'Title not found for the given parent_asin.', 'Segbeauty empty bottle 160083', 'GranNaturals Boar Bristle Smoothing Hair Brush for Women and Men - Medium/Soft Bristles - Natural Wooden Large Flat Square Paddle Hairbrush for Fine, Thin, Straight, Long, or Short Hair', 'Meeteasy Dental Cleaner Tool Kit - Dental Care for Adult - 100% Proven Safe']


In [51]:
# Print the results in a neat format
print("\nProduct Titles for Entered Product IDs:")
print("=" * 50)
for pid, title in zip(parent_asin, titles):
    print(f"Product ID: {pid:<10} | Title: {title}")
print("=" * 50)


Product Titles for Entered Product IDs:
Product ID: B007IAE5WY | Title: Title not found for the given parent_asin.
Product ID: B00EEN2HCS | Title: Title not found for the given parent_asin.
Product ID: B07C533XCW | Title: {'Brand': 'Segbeauty', 'Material': 'Plastic', 'Capacity': '160 Milliliters', 'Recommended Uses For Product': 'Water,Cleansing,Hair Spray,Indoor', 'Number of Items': '1', 'Is Discontinued By Manufacturer': 'No', 'Product Dimensions': '8 x 2 x 2 inches; 3.52 Ounces', 'Item model number': 'AM-160083-A01-JPN', 'UPC': '612520575953'}
Product ID: B00R1TAN7I | Title: {'Brand': 'GranNaturals', 'Color': 'Brown, Black', 'Shape': 'Square', 'Handle Material': 'Wood', 'Product Benefits': 'Smoothing', 'Is Discontinued By Manufacturer': 'No', 'Package Dimensions': '10 x 3.43 x 1.57 inches; 4.23 Ounces', 'Item model number': '753807366748000', 'UPC': '753807366748 794168026971', 'Manufacturer': 'Gran Goods'}
Product ID: B08L5KN7X4 | Title: {'Is Discontinued By Manufacturer': 'No', '