<a href="https://colab.research.google.com/github/celynhs/Bangkit-project/blob/main/Deployment_for_CC_BeBi_(Beli_Bijak).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Upload Datasets

connect gdrive to colab (not used in .py file)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


import modules

In [None]:
# import modules
import math
import scipy
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.models import load_model

# Model Deployment 

## deploy model

functions to combine the 2 models and process the results 

In [None]:
# get recommendation from collaborative filtering
def collaborative_filtering_model(df,dataset, user_id,clfmodel):
  # Generate item recommendations for a specific user
  num_products = len(dataset['product_id'].unique())
  user_mapping = {user_id: i for i, user_id in enumerate(dataset['user_id'].unique())}
  product_mapping = {item_id: i for i, item_id in enumerate(dataset['product_id'].unique())}
  user_products = np.full((num_products,), user_mapping[user_id])
  products_ids = np.arange(num_products)

  # Predict ratings for the user-item combinations
  predictions = clfmodel.predict([user_products, products_ids]).flatten()

  # Create pandas DataFrame of predictions for the user
  recs_df = pd.DataFrame({'product_id': product_mapping.keys(), 'rating': predictions})

  # Sort DataFrame by the predicted rating in descending order
  recs_df = recs_df.sort_values('rating', ascending=False)

  # Select the top 10 recommendations
  top_10_recs = recs_df.nlargest(n, 'rating') # select top 10 recs
  clfrecs = top_10_recs[['product_id', 'rating']] # choose specific column from the dataframe
  clfrecs = clfrecs.rename(columns={'rating': 'clf_ratings'}) # rename rating column to clf_ratings
  clfinfo = pd.merge(clfrecs,df,on='product_id') # merge the data with df to get info about the product id
  clfinfo = clfinfo.drop_duplicates(subset='product_id', keep='first') # drop all the duplicates (1 row for 1 product id without user info)
  clfinfo = clfinfo.drop(['user_id','add_to_cart_order','reordered','actual_price','ratings','brand'],axis=1) # drop unnecessary columns
  # reordered the column
  column_index = clfinfo.columns.get_loc('clf_ratings') 
  new_columns = list(clfinfo.columns[:column_index]) + list(clfinfo.columns[column_index+1:]) + ['clf_ratings']
  clfinfo = clfinfo[new_columns]

  return clfinfo

# get recommendations from content based ratings
def content_based_model(df, dataset, user_id, cbfmodel):
  # Predict the ratings for all products in the dataset
  all_products = dataset[dataset['user_id'] != user_id]
  X_all = [all_products['category_id'], all_products[["discount_price", "reordered", "add_to_cart_order"]]]
  r_predictions = cbfmodel.predict(X_all).flatten()

  # Create a list of tuples containing product ID along with its predicted rating
  product_ratings = list(zip(all_products['product_id'], r_predictions))

  # Sort the product_ratings list by its predicted rating in descending order
  product_ratings.sort(key=lambda x: x[1], reverse=True)

  # Get top 10 recommendations without duplicates (product id recommended only 1 time)
  recommended_products = []
  recommended_product_ids = set()

  for product_id, _ in product_ratings:
      if product_id not in recommended_product_ids:
          recommended_products.append((product_id, _))
          recommended_product_ids.add(product_id)
      if len(recommended_products) >= 10:
          break

  # Filter the original dataset based on the recommended product IDs
  top_10_recommendations = all_products[all_products['product_id'].isin([p[0] for p in recommended_products])]

  # Remove duplicate product IDs from the recommendations
  top_10_recommendations = top_10_recommendations.drop_duplicates(subset='product_id')

  # Keep the top 10 recommendations (if there are more than 10 after removing duplicates)
  top_10_recommendations = top_10_recommendations.head(10)

  # process the top 10 recommendations dataframe to get the required dataframe
  pred_rating = pd.DataFrame(product_ratings) # make the product_ratings list into dataframe
  pred_rating.columns =['product_id','cbf_ratings'] # give name to the columns of the dataframe
  pred_rating = pred_rating.drop_duplicates(subset='product_id', keep='first') # drop rows with duplicate product id
  cbfrecs = top_10_recommendations[['product_id', 'name','discount_price','category']] # choose the relevant columns to be displayed (product id and its info)
  # merge the product info dataframe with the predicted ratings to get dataframes with product id , info and its predictedratings
  cbfinfo = pd.merge(cbfrecs, pred_rating, on='product_id')
  cbfinfo = cbfinfo.drop_duplicates(subset='product_id', keep='first') # drop rows with duplicate product id
  # reverse transform the rescaled price
  cbfinfo['discount_price'] = cbfinfo['discount_price'] * (df['discount_price'].max()- df['discount_price'].min() ) + df['discount_price'].min() 

  return cbfinfo , pred_rating


# function to combine the results from 2 models
def finaldataset(df,dataset,user_id,cbfmodel,clfmodel):
  # get final collaborative filtering top 10 result data
  clfinfo = collaborative_filtering_model(df,dataset, user_id,clfmodel)
  cbfinfo , pred_rating = content_based_model(df, dataset, user_id, cbfmodel)
  finalclf =  pd.merge(clfinfo, pred_rating, on='product_id', how='left')

  # get final content-based filtering top 10 result data
  product_ids = [x for x in cbfinfo['product_id']] 
  user_mapping = {user_id: i for i, user_id in enumerate(dataset['user_id'].unique())}
  product_mapping = {item_id: i for i, item_id in enumerate(dataset['product_id'].unique())}
  user_ids = np.full(len(product_ids), user_mapping[user_id])
  item_ids = np.array([product_mapping[product_id] for product_id in product_ids])
  predictions = clfmodel.predict([user_ids, item_ids]).flatten()
  clf_predictions = pd.DataFrame({'product_id': product_ids,'clf_ratings': predictions})
  finalcbf =  pd.merge(cbfinfo, clf_predictions, on='product_id', how='left')
  
  # combine the result
  combined = pd.concat([finalclf, finalcbf], axis=0)
  combined['cbf_ratings'] = combined['cbf_ratings'] * (df['ratings'].max() - df['ratings'].min() ) + df['ratings'].min()
  combined['cbf_ratings'] = combined['cbf_ratings'].clip(lower=0.0, upper=5.0)
  combined['clf_ratings'] = combined['clf_ratings'] * (df['ratings'].max() - df['ratings'].min() ) + df['ratings'].min()
  combined['clf_ratings'] = combined['clf_ratings'].clip(lower=0.0, upper=5.0)
  combined = combined.drop_duplicates(subset='product_id', keep='first')

  return combined

# choose the top 10 recommendations from hybrid models (combination of 2 models) 
def hybrid_recommend(df,dataset,user_id,cbfmodel,clfmodel,budget):
    # get the combined dataset
    all_recs = finaldataset(df,dataset,user_id,cbfmodel,clfmodel)

    # Calculate the average predicted rating for both recommendations
    clf_avg_rating = np.mean(all_recs['clf_ratings'])
    cbf_avg_rating = np.mean(all_recs['cbf_ratings'])

    # Get the final recommendations based on weighted average ratings of each product from the 2 models
    all_recs['weighted_avg'] = (all_recs['clf_ratings'] * clf_avg_rating + all_recs['cbf_ratings']* cbf_avg_rating) / 2
    hybrid_recommendations = all_recs[['product_id','weighted_avg','discount_price']].values.tolist()

    # Sort the recommendations by the weighted average ratings in descending order
    hybrid_recommendations.sort(key=lambda x: x[1], reverse=True)

    # Filter the recommendations based on the user's budget
    filtered_recommendations = []
    total_cost = 0
    for product, rating,price in hybrid_recommendations:
        product_cost = price
        if total_cost + product_cost <= budget:
            filtered_recommendations.append(product)
            total_cost += product_cost
        if len(filtered_recommendations) >= 10: # limit recommendations to 10 items max
            break

    return filtered_recommendations, total_cost # return the recommendations and total cost of the recommended items



# JSON

In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

# Add an API endpoint for recommendations
@app.route('/recommendations', methods=['POST'])
def get_recommendations():
    # Load the transaction dataset
    df = pd.read_csv('/content/drive/MyDrive/final_dataset.csv')
    df = df.drop(["Unnamed: 0"], axis=1)

    # Load the collaborative filtering and content based recommender system model
    loaded_cbf_model = load_model('/content/drive/MyDrive/cbf_model.h5')
    loaded_clf_model = load_model('/content/drive/MyDrive/clf_model.h5')

    # Preprocess the dataset (rescale the data and label encoding categorical data)
    dataset = df.copy()
    scaler = MinMaxScaler()
    dataset[["actual_price", "discount_price", "ratings", "add_to_cart_order"]] = scaler.fit_transform(dataset[["actual_price", "discount_price", "ratings", "add_to_cart_order"]])
    dataset['category_id'] = LabelEncoder().fit_transform(dataset['category'])

    # request data / input from user
    user_id = request.json['user_id'] # request user id
    budget = request.json['budget'] # request budget 

    # Get the hybrid recommendations using collaborative filtering and content-based models
    recommendations = hybrid_recommend(df, dataset, user_id, loaded_cbf_model, loaded_clf_model, budget)

    # Return the recommendations as a JSON response
    return jsonify(recommendations)

if __name__ == '__main__':
    app.run()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
