## Load Necessary Packages

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

## Import Dataset Online_Retail


In [None]:
df = pd.read_excel("C:\\Users\\Rog\\Downloads\\Online_Retail.xlsx")    # Import data Online_Retail
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])   # Chuyển đổi InvoiceDate sang dạng Datetime
df['Month'] = df['InvoiceDate'].dt.month     # Trích cột tháng từ InvoiceDate và lưu vào cột mới có tên Month          
data = df[(df['Month'] >= 2) & (df['Month'] <= 6)]    # Lọc dữ liệu để chỉ lấy các giao dịch từ tháng 2 tới tháng 6
data_cleaned = data.dropna() # Loại bỏ các giá trị NA

## Tạo ma trận khách hàng - sản phẩm

In [20]:
# Tạo ma trận khách hàng - sản phẩm
customer_item_matrix = data_cleaned.pivot_table(index="CustomerID", columns="StockCode", values="Quantity", fill_value=0)

# Tính toán độ tương tự giữa sản phẩm
item_item_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix.T), 
                                    index=customer_item_matrix.columns, 
                                    columns=customer_item_matrix.columns)

item_item_sim_matrix


StockCode,10002,10080,10120,10125,10133,10135,11001,15030,15034,15036,...,90214P,90214R,90214V,90214Y,BANK CHARGES,C2,D,M,PADS,POST
StockCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10002,1.000000,0.0,0.004667,0.701142,0.246778,0.000000,0.022515,0.000000,0.000134,0.000851,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.000058,0.0,-0.019462
10080,0.000000,1.0,0.000000,0.000000,0.000000,0.013975,0.000000,0.000000,0.019202,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
10120,0.004667,0.0,1.000000,0.004081,0.000000,0.000000,0.000000,0.000000,0.003290,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
10125,0.701142,0.0,0.004081,1.000000,0.027345,0.010777,0.000000,0.000000,0.000108,0.000085,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.000302,0.001312,0.0,-0.007800
10133,0.246778,0.0,0.000000,0.027345,1.000000,0.036062,0.009675,0.019398,0.006651,0.000320,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000168,0.0,0.000976
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C2,0.000000,0.0,0.000000,0.000000,0.000000,0.067847,0.054099,0.000000,0.000000,0.007155,...,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.0,0.010912
D,0.000000,0.0,0.000000,-0.000302,0.000000,0.000000,0.000000,0.000000,0.000000,-0.000322,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.007795,0.0,0.000059
M,-0.000058,0.0,0.000000,0.001312,0.000168,0.000012,0.002353,0.001139,0.000800,0.000012,...,0.000727,0.000727,0.000727,0.000727,0.000514,0.000000,0.007795,1.000000,0.0,0.000229
PADS,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0,0.000000


## Giả sử khách hàng mua một sản phẩm với mã stockcode là 22708

### Tìm ra một số sản phẩm mà khách hàng đó có khả năng cao sẽ mua
### Tìm ra 10 sản phẩm giống nhất với sản phẩm có stock code 22708

In [26]:
top_10_similar_items_22708 = list(
    item_item_sim_matrix
    .loc[22708]
    .sort_values(ascending=False)
    .iloc[:10]
    .index
)
top_10_similar_items_22708

[22708, 22308, '85071C', 21902, 23176, 21899, 22981, 21900, 22615, 23242]

### Mô tả 10 sản phẩm giống với sản phẩm có stock code 22708

In [27]:
df.loc[
df['StockCode'].isin(top_10_similar_items_22708), 
['StockCode', 'Description']
 ].drop_duplicates().set_index('StockCode').loc[top_10_similar_items_22708]

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
22708,WRAP DOLLY GIRL
22308,TEA COSY BLUE STRIPE
85071C,"CHARLIE+LOLA""EXTREMELY BUSY"" SIGN"
21902,"KEY FOB , FRONT DOOR"
23176,ABC TREASURE BOOK BOX
21899,"KEY FOB , GARAGE DESIGN"
21899,GARAGE KEY FOB
22981,PANTRY APPLE CORER
21900,"KEY FOB , SHED"
22615,PACK OF 12 CIRCUS PARADE TISSUES
