In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from scipy.sparse.linalg import svds
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Concatenate
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [2]:
bills = pd.read_csv('bills.csv')
bills

Unnamed: 0,MaKhachHang,MaSanPham,SoLuong,NgayTao
0,KH0008,SP0034,6,24/06/2021
1,KH0003,SP0009,3,01/10/2022
2,KH0010,SP0005,8,07/12/2022
3,KH0006,SP0021,8,22/05/2021
4,KH0008,SP0028,6,04/07/2024
...,...,...,...,...
995,KH0006,SP0016,8,02/03/2020
996,KH0006,SP0009,4,07/12/2022
997,KH0008,SP0022,4,31/03/2021
998,KH0005,SP0028,10,21/07/2023


In [3]:
evaluates = pd.read_csv('evaluates.csv')
evaluates

Unnamed: 0,MaKhachHang,MaSanPham,DiemDanhGia
0,KH0001,SP0012,2
1,KH0004,SP0048,1
2,KH0008,SP0037,3
3,KH0007,SP0022,2
4,KH0007,SP0049,4
...,...,...,...
995,KH0008,SP0045,4
996,KH0004,SP0029,3
997,KH0004,SP0017,2
998,KH0002,SP0043,2


In [4]:
def add_time_weight(data, lambda_decay=0.01):
    data['NgayTao'] = pd.to_datetime(data['NgayTao'], format='%d/%m/%Y')
    data['DeltaT'] = (data['NgayTao'].dt.dayofyear - 1) % 365
    data['TimeWeight'] = np.exp(-lambda_decay * data['DeltaT'])
    data['AdjustedSoLuong'] = data['SoLuong'] * data['TimeWeight']
    return data

bills = add_time_weight(bills)

bill_matrix = bills.pivot_table(index='MaKhachHang', columns='MaSanPham', values='AdjustedSoLuong', fill_value=0)
evaluate_matrix = evaluates.pivot_table(index='MaKhachHang', columns='MaSanPham', values='DiemDanhGia', fill_value=0)

total_bill = bills.groupby('MaKhachHang')['SoLuong'].sum()
total_evaluate = evaluates.groupby('MaKhachHang')['DiemDanhGia'].sum()

weights_bill = total_bill / (total_bill + total_evaluate).fillna(0.5)
weights_evaluate = 1 - weights_bill

combined_matrix = bill_matrix.mul(weights_bill, axis=0).fillna(0) + evaluate_matrix.mul(weights_evaluate, axis=0).fillna(0)

In [5]:
combined_matrix

MaSanPham,SP0001,SP0002,SP0003,SP0004,SP0005,SP0006,SP0007,SP0008,SP0009,SP0010,...,SP0041,SP0042,SP0043,SP0044,SP0045,SP0046,SP0047,SP0048,SP0049,SP0050
MaKhachHang,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
KH0001,2.379931,2.148691,2.10307,4.376578,2.283792,1.377761,1.199484,2.470321,3.244189,1.259283,...,1.765239,2.805909,2.761173,1.929224,1.948651,2.421134,1.133046,1.650817,1.343176,2.105324
KH0002,1.274874,3.274596,2.837894,1.635945,3.030434,1.515806,0.981567,2.039739,1.646816,3.028433,...,2.275134,1.996392,1.527793,0.683072,2.351119,1.043411,1.592667,1.176225,0.23824,1.876712
KH0003,5.944983,0.203099,2.435216,2.092789,1.59168,2.068737,2.283752,0.949441,1.415256,1.324126,...,0.448059,6.228161,0.97001,1.542354,1.801661,1.962615,0.92357,1.199314,1.098197,3.0
KH0004,1.934807,1.082322,3.362391,3.241807,1.396545,0.852014,0.97875,2.029196,3.399736,5.041785,...,1.956192,1.816831,2.496971,2.403143,0.337305,2.175,1.935638,1.148202,2.098256,0.435
KH0005,0.110241,1.329065,1.553422,0.425222,1.080578,1.262797,2.168491,0.627273,2.731618,1.566987,...,1.382368,1.92246,0.945067,1.956424,0.627273,0.210612,0.791693,3.571015,1.241271,1.324051
KH0006,1.793764,1.149781,4.166483,1.361004,5.294324,1.115345,0.705706,0.675063,2.172177,2.444713,...,1.73412,1.099958,0.907336,3.241013,1.29594,0.907336,0.605643,1.293871,0.302445,3.605525
KH0007,3.068035,1.329004,1.745029,2.420566,1.497847,3.987201,1.329004,1.685693,2.814763,1.216376,...,1.029277,1.521476,1.356996,1.208186,1.884794,1.927155,2.963208,0.724911,2.518806,4.983171
KH0008,1.099859,3.48936,0.245342,1.620655,2.654384,0.578687,3.72767,0.935199,1.44681,1.50933,...,4.974827,1.488242,4.076494,3.096791,2.789366,1.109533,4.129258,2.273131,0.782544,1.574565
KH0009,2.503341,0.905077,1.982514,1.13715,2.523689,0.0,1.77684,1.470961,0.362031,1.550963,...,0.905077,2.529611,0.384191,2.633239,1.75966,2.058435,1.256682,0.419906,2.022753,2.360325
KH0010,1.072841,0.0,3.297274,2.282264,1.310227,2.407189,1.962883,5.21211,3.69821,2.449738,...,2.085012,0.342304,0.974265,1.388162,1.863911,1.650037,1.792266,1.088825,1.569044,0.974265


In [6]:
user_item_matrix = combined_matrix.fillna(0).values

In [7]:
user_item_matrix

array([[2.3799307 , 2.14869139, 2.10306973, 4.37657822, 2.28379249,
        1.37776144, 1.19948382, 2.47032123, 3.24418925, 1.25928297,
        1.58694878, 1.06107306, 0.99347425, 1.38645216, 2.67363545,
        1.543379  , 1.15753425, 1.44264769, 1.35787775, 1.68927708,
        0.16339293, 1.63751502, 1.15753425, 0.87306256, 0.33392078,
        3.76302217, 1.76354039, 3.17804879, 1.81956383, 1.00046913,
        2.85178038, 3.76839249, 1.13698979, 1.8450895 , 2.87007541,
        1.25314248, 4.84471457, 2.22335265, 1.59739592, 1.66197947,
        1.76523908, 2.80590925, 2.76117337, 1.92922374, 1.94865134,
        2.42113401, 1.13304556, 1.6508175 , 1.34317618, 2.10532402],
       [1.27487418, 3.27459645, 2.83789373, 1.6359447 , 3.03043399,
        1.51580615, 0.98156682, 2.0397392 , 1.64681597, 3.02843292,
        2.36196179, 1.30875576, 1.8776423 , 2.28372607, 1.03533123,
        1.68760488, 1.63941821, 1.01760618, 1.83386812, 2.7638391 ,
        2.43884497, 2.08258328, 1.58032324, 2.0

In [8]:
# Bước 1: Áp dụng SVD
k_max = min(user_item_matrix.shape) -1
k = min(50, k_max)

u, sigma, vt = svds(user_item_matrix, k=k)
sigma_matrix = np.diag(sigma)
user_factors = np.dot(u, sigma_matrix)
item_factors = np.dot(sigma_matrix, vt).T

In [9]:
# Bước 2: Xây dựng Mạng Nơ-ron cải tiến
user_input = Input(shape=(user_factors.shape[1],), name='user_input')
item_input = Input(shape=(item_factors.shape[1],), name='item_input')

user_hidden = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(user_input)
user_hidden = Dropout(0.3)(user_hidden)
user_hidden = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(user_hidden)

item_hidden = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(item_input)
item_hidden = Dropout(0.3)(item_hidden)
item_hidden = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(item_hidden)

combined = Concatenate()([user_hidden, item_hidden])

hidden = Dense(64, activation='relu')(combined)
hidden = Dropout(0.3)(hidden)
hidden = Dense(32, activation='relu')(hidden)
hidden = Dropout(0.3)(hidden)
hidden = Dense(16, activation='relu')(hidden)
output = Dense(1, activation='linear')(hidden)

model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [10]:
model.summary()

In [11]:
# Chuẩn bị dữ liệu đầu vào
user_ids, item_ids = user_item_matrix.nonzero()
train_user_factors = user_factors[user_ids]
train_item_factors = item_factors[item_ids]
train_ratings = user_item_matrix[user_ids, item_ids]

In [34]:
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.fit(
    [train_user_factors, train_item_factors],
    train_ratings,
    epochs=100,
    batch_size=128,
    validation_split=0.2,
    shuffle=True,
    callbacks=[early_stopping]
)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 80ms/step - accuracy: 0.0000e+00 - loss: 0.5995 - val_accuracy: 0.0000e+00 - val_loss: 1.0818
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 0.6332 - val_accuracy: 0.0000e+00 - val_loss: 1.0721
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0000e+00 - loss: 0.6138 - val_accuracy: 0.0000e+00 - val_loss: 1.0576
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 0.6102 - val_accuracy: 0.0000e+00 - val_loss: 1.0505
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 0.6340 - val_accuracy: 0.0000e+00 - val_loss: 1.0403
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 0.5531 - val_accuracy: 0.0000e+00 - val_loss:

<keras.src.callbacks.history.History at 0x7a066fb46380>

In [20]:
user_factors

array([[ 1.86124474e+00, -1.26685423e+00, -2.57495845e+00,
         2.46211952e+00,  1.00335465e+00,  9.91548332e-01,
        -1.68657716e+00, -2.14280903e+00,  1.41563955e+01],
       [-1.97157710e+00,  3.71054581e+00, -1.44126454e+00,
         1.73652014e+00, -1.69650371e+00, -1.83816012e+00,
         2.24934679e+00,  1.68564433e-01,  1.17085219e+01],
       [-6.09519642e-01,  1.49184497e+00,  9.08404107e-01,
         1.12171579e+00,  3.17169422e+00,  1.00729228e+00,
        -5.72558368e+00, -4.07662268e+00,  1.34704068e+01],
       [-2.30389761e+00, -2.66103127e+00,  2.85400801e+00,
         3.16075373e+00,  3.89327936e-01, -6.80444919e-03,
         2.63621577e+00,  3.34490190e-02,  1.29969835e+01],
       [-9.27617752e-01,  1.26351729e-01, -5.75302070e-01,
        -3.97836359e+00,  4.94016314e+00,  1.79829561e+00,
         2.52758083e+00,  2.51244946e+00,  1.29243094e+01],
       [ 1.23031505e+00, -7.89690259e-01, -1.65997145e+00,
        -1.81796904e-01,  1.17072062e+00, -5.884203

In [21]:
def get_top_n_recommendations(user_id, all_user_ids, all_product_ids, user_factors, item_factors, model, top_n=5):
    if user_id not in all_user_ids:
        print(f"User {user_id} not found in the data.")
        return []

    # Get the index of the user in the user factors matrix
    user_index = all_user_ids.index(user_id)
    user_vector = user_factors[user_index].reshape(1, -1)  # Reshape to 2D

    # Predict ratings for all products and keep only the top-N
    product_ratings = []
    for product_index, product_id in enumerate(all_product_ids):
        # Skip products the user has already interacted with
        if user_item_matrix[user_index, product_index] > 0:
            continue

        # Get the item factor vector and predict the rating
        product_vector = item_factors[product_index].reshape(1, -1)  # Reshape to 2D
        predicted_rating = model.predict([user_vector, product_vector])[0][0]

        # Collect the product ID and predicted rating
        product_ratings.append((product_id, predicted_rating))

    # Sort by predicted rating in descending order and select the top-N products
    product_ratings.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = [product_id for product_id, rating in product_ratings[:top_n]]

    return top_recommendations

In [35]:
user_id = 'KH0009'
top_n = 5
all_user_ids = list(bill_matrix.index)
all_product_ids = list(bill_matrix.columns)

recommendations = get_top_n_recommendations(user_id, all_user_ids, all_product_ids, user_factors, item_factors, model, top_n)
recommendations

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


['SP0006', 'SP0014']

In [36]:
model.save("recommendation_model.h5")



In [37]:
import numpy as np
import pickle

# Lưu user_factors, item_factors và ánh xạ ID khách hàng, sản phẩm
np.save("user_factors.npy", user_factors)
np.save("item_factors.npy", item_factors)

# Tạo và lưu ánh xạ ID
with open("user_index_mapping.pkl", "wb") as f1, open("item_index_mapping.pkl", "wb") as f2:
    pickle.dump({user_id: index for index, user_id in enumerate(bill_matrix.index)}, f1)
    pickle.dump({item_id: index for index, item_id in enumerate(bill_matrix.columns)}, f2)
