Model page: https://huggingface.co/indonesian-nlp/gpt2

⚠️ If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/indonesian-nlp/gpt2)
			and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) 🙏

In [None]:
# Install necessary libraries
!pip install Flask Flask-CORS pyngrok requests numpy scikit-learn

import requests
from flask import Flask, request, jsonify
from flask_cors import CORS
import numpy as np
import json
from pyngrok import ngrok
import os
from threading import Thread

# --- IMPORTANT: Replace with your actual Hugging Face API token ---
# You can get one from huggingface.co/settings/tokens
HF_TOKEN = "<HF TOKEN HERE>"  # Replace with your Hugging Face token

# --- IMPORTANT: Replace with your actual ngrok Auth Token ---
# You can get one from dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = "<NGROK AUTH TOKEN HERE>"  # Replace with your ngrok auth token


# --- Configuration for Hugging Face API ---
API_URL = "https://api-inference.huggingface.co/models/LazarusNLP/all-indo-e5-small-v4"
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} # Use the HF_TOKEN defined above

# --- Sentence Similarity Function ---
def get_similarity_scores_from_api(source_sentence, candidate_sentences):
    """
    Gets similarity scores between a source sentence and a list of candidate sentences
    using the Hugging Face Inference API, adapted for the SentenceSimilarityPipeline.
    """
    if not source_sentence or not candidate_sentences:
        print("Source sentence or candidate sentences cannot be empty for similarity calculation.")
        return None

    payload = {
        "inputs": {
            "source_sentence": source_sentence,
            "sentences": candidate_sentences
        }
    }

    print(f"\n--- Debugging get_similarity_scores_from_api ---")
    print(f"API URL: {API_URL}")
    print(f"Payload (JSON): {json.dumps(payload, indent=2)}")

    try:
        response = requests.post(API_URL, headers=HEADERS, json=payload)
        response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)

        print(f"Response Status Code: {response.status_code}")
        print(f"Response Content: {response.text}")

        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching similarity scores: {e}")
        if response is not None:
            print(f"Response Status Code (on error): {response.status_code}")
            print(f"Response Content (on error): {response.text}")
        return None

# --- Recommendation Function ---
def recommend_products(user_query, product_database, top_n=3):
    """
    Recommends products based on user input by directly using the API's
    sentence similarity calculation.

    Args:
        user_query (str): The user's description of their ideal product.
        product_database (list of dict): A list of product dictionaries.
                                         Each dict should have 'name' and 'description' or 'search_vector' (if not null).
        top_n (int): The number of top recommendations to return.

    Returns:
        list of dict: A list of recommended products, ranked by similarity score,
                      including their score.
    """
    if not product_database:
        print("Product database is empty. Cannot make recommendations.")
        return []

    # Prepare candidate sentences: prioritize search_vector, then description, then name
    candidate_sentences = []
    for product in product_database:
        if product.get('search_vector') and product['search_vector'] != "null": # Assuming "null" as string if it's not a real null
            candidate_sentences.append(product['search_vector'])
        elif product.get('description'):
            candidate_sentences.append(product['description'])
        else: # Fallback to name if description is also missing
            candidate_sentences.append(product['name'])


    # Get similarity scores for the user query against all product descriptions/vectors
    similarity_scores = get_similarity_scores_from_api(user_query, candidate_sentences)

    if similarity_scores is None:
        print("Failed to get similarity scores for recommendations.")
        return []

    similarity_scores = np.array(similarity_scores)

    # Rank and retrieve top recommendations
    ranked_indices = np.argsort(similarity_scores)[::-1]

    recommendations = []
    for i in range(min(top_n, len(ranked_indices))):
        product_index = ranked_indices[i]
        recommended_product = product_database[product_index].copy()
        recommended_product['similarity_score'] = float(similarity_scores[product_index]) # Convert numpy float to Python float
        recommendations.append(recommended_product)

    return recommendations

# --- Flask App Setup ---
app = Flask(__name__)
CORS(app) # Enable CORS for all routes


@app.route('/')
def home():
    return "AI Product Recommender API is running!"

@app.route('/recommend', methods=['POST'])
def get_recommendations():
    data = request.get_json()
    user_query = data.get('userQuery')
    products = data.get('products') # This will be the list of all products from React

    if not user_query or not products:
        return jsonify({"error": "Missing userQuery or products data"}), 400

    print(f"Received user query: {user_query}")
    print(f"Received {len(products)} products for recommendation.")

    recommended = recommend_products(user_query, products, top_n=3)

    return jsonify({"recommendations": recommended}), 200

# To run Flask in a non-blocking way in Colab with ngrok
def run_flask():
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

# Start Flask in a separate thread
flask_thread = Thread(target=run_flask)
flask_thread.daemon = True
flask_thread.start()

print("Flask app started in background.")

# Now, try to establish ngrok tunnel
try:
    # Set the ngrok authentication token
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    print("ngrok authentication token set.")

    # Start ngrok tunnel
    ngrok_url = ngrok.connect(5000).public_url
    print(f" * Public URL for AI API (Ngrok): {ngrok_url}")
    print("Keep this cell running in Colab to keep the tunnel active.")

except Exception as e:
    print(f"Error starting ngrok: {e}")
    print("Please ensure your NGROK_AUTH_TOKEN is correct and try again.")
    print("If issues persist, try restarting the Colab runtime.")

Collecting Flask-CORS
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Downloading flask_cors-6.0.1-py3-none-any.whl (13 kB)
Downloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok, Flask-CORS
Successfully installed Flask-CORS-6.0.1 pyngrok-7.2.11
Flask app started in background.
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


ngrok authentication token set.
 * Public URL for AI API (Ngrok): https://0913-34-125-81-73.ngrok-free.app
Keep this cell running in Colab to keep the tunnel active.
