In [1]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import KBinsDiscretizer
import warnings
warnings.filterwarnings("ignore")

In [2]:
np.random.seed(42)

# Generate random data
num_rows = 2000
data = {
    'ProductID': np.arange(1, num_rows + 1),
    'Revenue': np.random.randint(500, 2501, size=num_rows),
    'Amount': np.random.randint(200, 1001, size=num_rows),
    'Cashback': np.random.randint(20, 101, size=num_rows),
    'Reward': np.random.randint(3, 17, size=num_rows)
}

df = pd.DataFrame(data)

In [3]:
df1 = df.copy()

In [4]:
df1

Unnamed: 0,ProductID,Revenue,Amount,Cashback,Reward
0,1,1626,887,88,12
1,2,1959,697,27,4
2,3,1360,926,44,10
3,4,1794,676,65,12
4,5,1630,793,49,11
...,...,...,...,...,...
1995,1996,834,308,25,14
1996,1997,1128,467,75,11
1997,1998,2042,644,23,4
1998,1999,636,439,42,10


In [5]:
df1.nunique()

ProductID    2000
Revenue      1237
Amount        728
Cashback       81
Reward         14
dtype: int64

In [6]:

# Discretize continuous variables using KBinsDiscretizer
continuous_columns = ['Revenue', 'Amount', 'Cashback', 'Reward']
num_bins = 10

discretizer = KBinsDiscretizer(n_bins=num_bins, encode='ordinal', strategy='uniform')
df1[continuous_columns] = discretizer.fit_transform(df1[continuous_columns])

# Prepare the data for Naive Bayes classifier
X = df1[continuous_columns]
y = df1['ProductID']

In [7]:
nb = MultinomialNB()
nb.fit(X, y)

In [8]:
evidence_continuous = {
    'Revenue': 1270,
    'Amount': 1000,
    'Cashback': 50,
    'Reward': 11
}

# Discretize the evidence values
evidence_df = pd.DataFrame([evidence_continuous])
evidence_discretized = discretizer.transform(evidence_df)

In [9]:
evidence_discretized

array([[3., 9., 3., 6.]])

In [10]:
probs = nb.predict_proba(evidence_discretized)[0]

In [11]:
len(probs)

2000

In [12]:
final_data = df1[['ProductID']]
final_data['Probability'] = probs

In [13]:
temp = final_data.sort_values(by='Probability', ascending=False).reset_index(drop=True)

In [14]:
temp

Unnamed: 0,ProductID,Probability
0,667,5.501836e-03
1,1225,5.410338e-03
2,595,5.356741e-03
3,37,5.356741e-03
4,325,5.325845e-03
...,...,...
1995,1032,2.379161e-12
1996,898,1.320883e-12
1997,1989,3.153661e-13
1998,1412,1.628607e-13


In [15]:
df1[df1['ProductID'].isin(temp.head()['ProductID'])]

Unnamed: 0,ProductID,Revenue,Amount,Cashback,Reward
36,37,3.0,9.0,3.0,6.0
324,325,3.0,9.0,2.0,6.0
594,595,3.0,9.0,3.0,6.0
666,667,2.0,7.0,2.0,5.0
1224,1225,2.0,7.0,2.0,4.0


In [16]:
df[df['ProductID'].isin(temp.head()['ProductID'])]

Unnamed: 0,ProductID,Revenue,Amount,Cashback,Reward
36,37,1247,938,46,11
324,325,1108,985,40,11
594,595,1155,996,47,11
666,667,1050,801,39,10
1224,1225,1025,822,37,9
