In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
# from underthesea import word_tokenize, pos_tag, sent_tokenize
from pyvi.ViTokenizer import tokenize
from pyvi import ViTokenizer
from underthesea import word_tokenize
import warnings
from gensim import corpora, models, similarities
from tqdm import tqdm

import re

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_excel('data_motobikes.xlsx')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7208 entries, 0 to 7207
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   7208 non-null   int64 
 1   Ti√™u ƒë·ªÅ              7207 non-null   object
 2   Gi√°                  7206 non-null   object
 3   Kho·∫£ng gi√° min       7006 non-null   object
 4   Kho·∫£ng gi√° max       7011 non-null   object
 5   ƒê·ªãa ch·ªâ              7167 non-null   object
 6   M√¥ t·∫£ chi ti·∫øt       7208 non-null   object
 7   Th∆∞∆°ng hi·ªáu          7205 non-null   object
 8   D√≤ng xe              7205 non-null   object
 9   NƒÉm ƒëƒÉng k√Ω          7208 non-null   object
 10  S·ªë Km ƒë√£ ƒëi          7208 non-null   int64 
 11  T√¨nh tr·∫°ng           7208 non-null   object
 12  Lo·∫°i xe              7208 non-null   object
 13  Dung t√≠ch xe         7208 non-null   object
 14  Xu·∫•t x·ª©              7208 non-null   object
 15  Ch√≠nh s√°

In [4]:
df.columns

Index(['id', 'Ti√™u ƒë·ªÅ', 'Gi√°', 'Kho·∫£ng gi√° min', 'Kho·∫£ng gi√° max', 'ƒê·ªãa ch·ªâ',
       'M√¥ t·∫£ chi ti·∫øt', 'Th∆∞∆°ng hi·ªáu', 'D√≤ng xe', 'NƒÉm ƒëƒÉng k√Ω',
       'S·ªë Km ƒë√£ ƒëi', 'T√¨nh tr·∫°ng', 'Lo·∫°i xe', 'Dung t√≠ch xe', 'Xu·∫•t x·ª©',
       'Ch√≠nh s√°ch b·∫£o h√†nh', 'Tr·ªçng l∆∞·ª£ng', 'Href'],
      dtype='object')

In [5]:
data = df[['id', 'Ti√™u ƒë·ªÅ','M√¥ t·∫£ chi ti·∫øt']]
data.head()

Unnamed: 0,id,Ti√™u ƒë·ªÅ,M√¥ t·∫£ chi ti·∫øt
0,1,"B√°n Vespa Sprint 125cc 2024 xanh d∆∞∆°ng, xe ƒë·∫πp...",B√°n xe #Vespa Sprint 125cc. Mua m·ªõi t·∫°i #Topco...
1,2,üî•üî•SH 150i Th·∫Øng ABS 2019 BSTP Ch√≠nh Ch·ªß,"_B√°n SH 150i Th·∫Øng ABS 2019 X√°m B·∫°c, √öp Team X..."
2,3,CC Vision Th·ªÉ Thao 2023 ƒêen+b·ªô ƒë√®n Demi audi A7,Ch√≠nh ch·ªß b√°n Vision phi√™n b·∫£n Th·ªÉ Thao 2023 ƒê...
3,4,Vespa Sprint 2019 -125- ƒêen ƒêoÃâ Sport -CHIÃÅNH ...,XE CAÃÅ NH√ÇN BAÃÅN - XE DO EM ƒê∆ØÃÅNG T√äN CHIÃÅNH C...
4,5,Xe tay ga Yamaha Latte 125 ‚Äì ƒêƒÉng k√Ω 2021,üõµ Th√¥ng tin xe:\n\nD√≤ng xe tay ga cao c·∫•p Yama...


In [6]:
data['Content'] = data['M√¥ t·∫£ chi ti·∫øt'].apply(lambda x: ' '.join(x.split()[:200]))
data.head()

Unnamed: 0,id,Ti√™u ƒë·ªÅ,M√¥ t·∫£ chi ti·∫øt,Content
0,1,"B√°n Vespa Sprint 125cc 2024 xanh d∆∞∆°ng, xe ƒë·∫πp...",B√°n xe #Vespa Sprint 125cc. Mua m·ªõi t·∫°i #Topco...,B√°n xe #Vespa Sprint 125cc. Mua m·ªõi t·∫°i #Topco...
1,2,üî•üî•SH 150i Th·∫Øng ABS 2019 BSTP Ch√≠nh Ch·ªß,"_B√°n SH 150i Th·∫Øng ABS 2019 X√°m B·∫°c, √öp Team X...","_B√°n SH 150i Th·∫Øng ABS 2019 X√°m B·∫°c, √öp Team X..."
2,3,CC Vision Th·ªÉ Thao 2023 ƒêen+b·ªô ƒë√®n Demi audi A7,Ch√≠nh ch·ªß b√°n Vision phi√™n b·∫£n Th·ªÉ Thao 2023 ƒê...,Ch√≠nh ch·ªß b√°n Vision phi√™n b·∫£n Th·ªÉ Thao 2023 ƒê...
3,4,Vespa Sprint 2019 -125- ƒêen ƒêoÃâ Sport -CHIÃÅNH ...,XE CAÃÅ NH√ÇN BAÃÅN - XE DO EM ƒê∆ØÃÅNG T√äN CHIÃÅNH C...,XE CAÃÅ NH√ÇN BAÃÅN - XE DO EM ƒê∆ØÃÅNG T√äN CHIÃÅNH C...
4,5,Xe tay ga Yamaha Latte 125 ‚Äì ƒêƒÉng k√Ω 2021,üõµ Th√¥ng tin xe:\n\nD√≤ng xe tay ga cao c·∫•p Yama...,üõµ Th√¥ng tin xe: D√≤ng xe tay ga cao c·∫•p Yamaha ...


In [7]:
data.columns

Index(['id', 'Ti√™u ƒë·ªÅ', 'M√¥ t·∫£ chi ti·∫øt', 'Content'], dtype='object')

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7208 entries, 0 to 7207
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              7208 non-null   int64 
 1   Ti√™u ƒë·ªÅ         7207 non-null   object
 2   M√¥ t·∫£ chi ti·∫øt  7208 non-null   object
 3   Content         7208 non-null   object
dtypes: int64(1), object(3)
memory usage: 225.4+ KB


In [9]:
stop_word_file = 'files/vietnamese-stopwords.txt'
emojicon_file = 'files/emojicon.txt'
teencode_file = 'files/teencode.txt'


In [10]:
with open(stop_word_file, 'r', encoding='utf-8') as f:
    stopwords = set([w.strip() for w in f.readlines() if w.strip()])

with open(emojicon_file, 'r', encoding='utf-8') as f:
    emojicons = [w.strip() for w in f.readlines() if w.strip()]

with open(teencode_file, 'r', encoding='utf-8') as f:
    teencode_map = {}
    for line in f:
        parts = line.strip().split()
        if len(parts) >= 2:
            teencode_map[parts[0]] = " ".join(parts[1:])


special_tokens = ['', ' ', ',', '.', '...', '-', ':', ';', '?', '%', '(', ')', '+', '/', "'", '&', '#', '*', '!', '"', '_', '=', '[', ']', '{', '}', '~', '`', '|', '\\']


print(f"Stopwords: {len(stopwords)}, Emojis: {len(emojicons)}, Teencode: {len(teencode_map)}")


Stopwords: 1957, Emojis: 67, Teencode: 416


In [11]:
# C√°c h√†m x·ª≠ l√Ω
def remove_emojis(text):
    for emo in emojicons:
        text = text.replace(emo, ' ')
    return text

def normalize_teencode(text):
    for key, val in teencode_map.items():
        text = re.sub(rf'\b{re.escape(key)}\b', val, text)
    return text

def remove_special_chars(text):
    text = re.sub(r'[^\w\s]', ' ', text)  # lo·∫°i k√Ω t·ª± ƒë·∫∑c bi·ªát
    text = re.sub(r'\s+', ' ', text).strip()  # lo·∫°i kho·∫£ng tr·∫Øng th·ª´a
    return text

# -----------------------
# 4. T√ÅCH STOPWORD RI√äNG
# -----------------------
def remove_stopwords(text):
    tokens = word_tokenize(text, format="text").split()
    tokens = [t for t in tokens if t not in stopwords]
    return ' '.join(tokens)

# -----------------------
# 5. CHU·∫®N H√ìA T·ªîNG H·ª¢P
# -----------------------
def clean_text(text):
    text = str(text).lower()
    text = remove_emojis(text)
    text = normalize_teencode(text)
    text = remove_special_chars(text)
    text = remove_stopwords(text)
    return text

In [12]:
data['clean_text'] = data['Content'].apply(clean_text)
data['clean_text']

0       xe vespa sprint 125 cc mua topcom 01 2024 xe c...
1       _b√°n sh 150 th·∫Øng abs 2019 x√°m b·∫°c √∫p team x√°m...
2       ch·ªß vision phi√™n_b·∫£n th·ªÉ_thao 2023 ƒëen xe mua ...
3       xe ca_nh√¢n n xe ƒë∆∞ chi nh chu_chi ti√™ t hi_nh ...
4       th√¥ng_tin xe d√≤ng xe ga cao_c·∫•p yamaha latte 1...
                              ...                        
7203    xe mua ch·ªß 2024 ch·∫°y 1000 km xe bao zin nhu_c·∫ß...
7204    mua xe lead 12 tr 1 gi√°o gi√°o_vi√™n thay l·ªëp n·ªì...
7205    xe yamaha janus 2017 fi smart key m√†u xanh ch·ªß...
7206    xe c≈© ƒë·∫πp li·ªÅn l·∫°c xe ch·∫•t zin xe ch·ªß_k√≠ uy_qu...
7207    ƒë·ªïi xe ducati_scrambler s·∫£n_xu·∫•t 2019 ƒëk 2020 ...
Name: clean_text, Length: 7208, dtype: object

In [13]:
data.head()

Unnamed: 0,id,Ti√™u ƒë·ªÅ,M√¥ t·∫£ chi ti·∫øt,Content,clean_text
0,1,"B√°n Vespa Sprint 125cc 2024 xanh d∆∞∆°ng, xe ƒë·∫πp...",B√°n xe #Vespa Sprint 125cc. Mua m·ªõi t·∫°i #Topco...,B√°n xe #Vespa Sprint 125cc. Mua m·ªõi t·∫°i #Topco...,xe vespa sprint 125 cc mua topcom 01 2024 xe c...
1,2,üî•üî•SH 150i Th·∫Øng ABS 2019 BSTP Ch√≠nh Ch·ªß,"_B√°n SH 150i Th·∫Øng ABS 2019 X√°m B·∫°c, √öp Team X...","_B√°n SH 150i Th·∫Øng ABS 2019 X√°m B·∫°c, √öp Team X...",_b√°n sh 150 th·∫Øng abs 2019 x√°m b·∫°c √∫p team x√°m...
2,3,CC Vision Th·ªÉ Thao 2023 ƒêen+b·ªô ƒë√®n Demi audi A7,Ch√≠nh ch·ªß b√°n Vision phi√™n b·∫£n Th·ªÉ Thao 2023 ƒê...,Ch√≠nh ch·ªß b√°n Vision phi√™n b·∫£n Th·ªÉ Thao 2023 ƒê...,ch·ªß vision phi√™n_b·∫£n th·ªÉ_thao 2023 ƒëen xe mua ...
3,4,Vespa Sprint 2019 -125- ƒêen ƒêoÃâ Sport -CHIÃÅNH ...,XE CAÃÅ NH√ÇN BAÃÅN - XE DO EM ƒê∆ØÃÅNG T√äN CHIÃÅNH C...,XE CAÃÅ NH√ÇN BAÃÅN - XE DO EM ƒê∆ØÃÅNG T√äN CHIÃÅNH C...,xe ca_nh√¢n n xe ƒë∆∞ chi nh chu_chi ti√™ t hi_nh ...
4,5,Xe tay ga Yamaha Latte 125 ‚Äì ƒêƒÉng k√Ω 2021,üõµ Th√¥ng tin xe:\n\nD√≤ng xe tay ga cao c·∫•p Yama...,üõµ Th√¥ng tin xe: D√≤ng xe tay ga cao c·∫•p Yamaha ...,th√¥ng_tin xe d√≤ng xe ga cao_c·∫•p yamaha latte 1...


In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7208 entries, 0 to 7207
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              7208 non-null   int64 
 1   Ti√™u ƒë·ªÅ         7207 non-null   object
 2   M√¥ t·∫£ chi ti·∫øt  7208 non-null   object
 3   Content         7208 non-null   object
 4   clean_text      7208 non-null   object
dtypes: int64(1), object(4)
memory usage: 281.7+ KB


In [15]:
data.columns

Index(['id', 'Ti√™u ƒë·ªÅ', 'M√¥ t·∫£ chi ti·∫øt', 'Content', 'clean_text'], dtype='object')

In [16]:
vectorizer = TfidfVectorizer(
    analyzer='word',
    max_features=8000
)
tfidf_matrix = vectorizer.fit_transform(data['clean_text'])
print("Shape TF-IDF:", tfidf_matrix.shape)

Shape TF-IDF: (7208, 8000)


### Consin Simalarity

In [17]:
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
print("Cosine similarity matrix shape:", cosine_sim_matrix.shape)

Cosine similarity matrix shape: (7208, 7208)


In [18]:
data_show = pd.DataFrame(cosine_sim_matrix)
data_show

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7198,7199,7200,7201,7202,7203,7204,7205,7206,7207
0,1.000000,0.010010,0.042739,0.010077,0.058899,0.062430,0.117835,0.035624,0.051165,0.078843,...,0.054485,0.048460,0.059432,0.063983,0.029400,0.207776,0.057208,0.074375,0.059106,0.004182
1,0.010010,1.000000,0.026529,0.024690,0.014651,0.004013,0.007642,0.029320,0.018933,0.034610,...,0.036796,0.034389,0.086733,0.007490,0.004468,0.033342,0.058151,0.061670,0.022135,0.034215
2,0.042739,0.026529,1.000000,0.015068,0.062520,0.008429,0.016051,0.057184,0.055128,0.058608,...,0.020041,0.031258,0.023835,0.050278,0.022892,0.087660,0.043359,0.026643,0.038182,0.023762
3,0.010077,0.024690,0.015068,1.000000,0.015735,0.007525,0.041835,0.041549,0.019059,0.038008,...,0.008401,0.035247,0.007807,0.014045,0.008379,0.030416,0.007874,0.015101,0.021854,0.006579
4,0.058899,0.014651,0.062520,0.015735,1.000000,0.023428,0.053589,0.033012,0.023118,0.078809,...,0.022728,0.050014,0.026261,0.057024,0.042388,0.056372,0.022769,0.104887,0.047744,0.027955
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7203,0.207776,0.033342,0.087660,0.030416,0.056372,0.045687,0.079550,0.074161,0.160331,0.107681,...,0.052548,0.072610,0.040834,0.198860,0.062357,1.000000,0.079355,0.045645,0.109901,0.052784
7204,0.057208,0.058151,0.043359,0.007874,0.022769,0.005360,0.010208,0.007938,0.025290,0.059104,...,0.046551,0.012830,0.119726,0.030616,0.005969,0.079355,1.000000,0.084363,0.034469,0.003268
7205,0.074375,0.061670,0.026643,0.015101,0.104887,0.010280,0.019578,0.040752,0.041015,0.105069,...,0.040970,0.047482,0.138863,0.019188,0.149839,0.045645,0.084363,1.000000,0.053000,0.026978
7206,0.059106,0.022135,0.038182,0.021854,0.047744,0.062623,0.023281,0.037695,0.064465,0.099562,...,0.111308,0.034642,0.012683,0.047505,0.013613,0.109901,0.034469,0.053000,1.000000,0.016634


In [19]:
def recommend(item_id: int, top_n: int = 5):
    """
    Recommend similar motorbikes based on cosine similarity.
    Args:
        item_id (int): id ho·∫∑c index c·ªßa xe trong DataFrame
        top_n (int): s·ªë l∆∞·ª£ng g·ª£i √Ω mu·ªën l·∫•y
    Returns:
        DataFrame ch·ª©a c√°c xe t∆∞∆°ng t·ª±
    """
    if item_id not in data.index:
        raise ValueError(f"id {item_id} kh√¥ng t·ªìn t·∫°i trong DataFrame")

    # L·∫•y h√†ng t∆∞∆°ng ·ª©ng trong ma tr·∫≠n cosine
    sim_scores = list(enumerate(cosine_sim_matrix[item_id]))

    # S·∫Øp x·∫øp theo ƒë·ªô t∆∞∆°ng ƒë·ªìng gi·∫£m d·∫ßn, b·ªè ch√≠nh n√≥
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1: top_n + 1]

    # L·∫•y index xe t∆∞∆°ng t·ª±
    similar_indices = [i[0] for i in sim_scores]
    similar_scores = [i[1] for i in sim_scores]

    # T·∫°o DataFrame k·∫øt qu·∫£
    recommendations = data.loc[similar_indices, ['id', 'Ti√™u ƒë·ªÅ', 'Content']].copy()
    recommendations['similarity'] = similar_scores
    return recommendations.reset_index(drop=True)

In [20]:
def recommend_cosine(item_id: int, top_n: int = 5):
    sim_scores = cosine_sim_matrix[item_id]
    sim_scores[item_id] = -1  # b·ªè ch√≠nh n√≥

    similar_idx = sim_scores.argsort()[::-1][:top_n]
    similar_scores = sim_scores[similar_idx]

    results = data.iloc[similar_idx][['id', 'Ti√™u ƒë·ªÅ', 'Content']].copy()
    results['similarity'] = similar_scores
    return results.reset_index(drop=True)

In [21]:
sample_id = 3
print(f"\nXe g·ªëc:\n{data.loc[sample_id, ['id', 'Ti√™u ƒë·ªÅ', 'Content']]}")
print("\nG·ª£i √Ω c√°c xe t∆∞∆°ng t·ª±:")
recommendation = recommend(sample_id, top_n=5)
recommendation


Xe g·ªëc:
id                                                         4
Ti√™u ƒë·ªÅ    Vespa Sprint 2019 -125- ƒêen ƒêoÃâ Sport -CHIÃÅNH ...
Content    XE CAÃÅ NH√ÇN BAÃÅN - XE DO EM ƒê∆ØÃÅNG T√äN CHIÃÅNH C...
Name: 3, dtype: object

G·ª£i √Ω c√°c xe t∆∞∆°ng t·ª±:


Unnamed: 0,id,Ti√™u ƒë·ªÅ,Content,similarity
0,4116,xe nh√† ƒëang x·ª≠ d·ª•ng n√™n c·∫ßn b√°n a,Xe m√†u ƒëen nh√°m √°o m·ªõi l√†m keo trong Xe dan ba...,0.257452
1,6515,tr∆∞·ªõc mua c·ªß ·ªü c·ª≠a h√†ng xe m√°y . bi·ªÉn s·ªë sg q1 .,Xe ko ch√≠nh ch·ªß xe ddag ·ªü c·ªß chi . Nay minh ƒë·ªï...,0.252209
2,3251,Epicupro chiÃÅnh chuÃâ,Xe hoaÃÄng haÃâo ƒëi√™Ã£n n∆∞∆°ÃÅc caÃÅc ki√™Ãâu √¥Ãân ƒëiÃ£n...,0.247112
3,3172,439 ho hoc lam,Xe bi m·∫•t ch√¨a kh√≥a m√°y im xe nh∆∞ h√¨nh b√°n cho...,0.245392
4,6048,SH 150i ABS ƒêEN NHAÃÅM CU·ªêI 2021 CHIÃÅNH CHUÃâ.,üåàSH 150i ABS ƒêEN NHAÃÅM CU·ªêI 2021. ‚ûñ#BS_80508. ...,0.238655


In [22]:
import joblib

# 2Ô∏è L∆∞u cosine matrix v√† vectorizer ƒë·ªÉ t√°i s·ª≠ d·ª•ng
joblib.dump(cosine_sim_matrix, 'cosine_sim_matrix.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
print("ƒê√£ l∆∞u cosine_sim_matrix.pkl v√† tfidf_vectorizer.pkl")


ƒê√£ l∆∞u cosine_sim_matrix.pkl v√† tfidf_vectorizer.pkl


In [23]:
def load_cosine_data():
    """
    Load cosine similarity matrix v√† vectorizer ƒë√£ l∆∞u tr∆∞·ªõc ƒë√≥.
    """
    cosine_matrix = joblib.load('cosine_sim_matrix.pkl')
    vectorizer_loaded = joblib.load('tfidf_vectorizer.pkl')
    print("ƒê√£ load cosine matrix & vectorizer th√†nh c√¥ng.")
    return cosine_matrix, vectorizer_loaded

# Gensim

In [24]:
content_gem = [[text for text in x.split()] for x in data.clean_text]

In [25]:
len(content_gem)

7208

In [26]:
content_gem[:1]

[['xe',
  'vespa',
  'sprint',
  '125',
  'cc',
  'mua',
  'topcom',
  '01',
  '2024',
  'xe',
  'ch·ªß',
  'ch·∫°y',
  '14',
  '000',
  'km',
  'gi√°',
  '66',
  'tr',
  'th∆∞∆°ng_l∆∞·ª£ng',
  'anh_ch·ªã',
  'nh·∫Øn_Zalo',
  'c√¥ng_ch·ª©ng',
  'h·ª£p_ƒë·ªìng',
  'mua_b√°n']]

In [27]:
dictionary = corpora.Dictionary(content_gem)

In [28]:
dictionary.token2id

{'000': 0,
 '01': 1,
 '125': 2,
 '14': 3,
 '2024': 4,
 '66': 5,
 'anh_ch·ªã': 6,
 'cc': 7,
 'ch·∫°y': 8,
 'ch·ªß': 9,
 'c√¥ng_ch·ª©ng': 10,
 'gi√°': 11,
 'h·ª£p_ƒë·ªìng': 12,
 'km': 13,
 'mua': 14,
 'mua_b√°n': 15,
 'nh·∫Øn_Zalo': 16,
 'sprint': 17,
 'th∆∞∆°ng_l∆∞·ª£ng': 18,
 'topcom': 19,
 'tr': 20,
 'vespa': 21,
 'xe': 22,
 '1': 23,
 '150': 24,
 '2019': 25,
 '_b√°n': 26,
 '_mua': 27,
 '_xe': 28,
 '_xem': 29,
 'abs': 30,
 'bi·ªÉn': 31,
 'b·∫°c': 32,
 'b·ªëc': 33,
 'gi·∫•y': 34,
 'keng': 35,
 'k√Ω': 36,
 'k·ªπ': 37,
 'm√°y_m√≥c': 38,
 'nguy√™n': 39,
 'nh√†_t√¢n': 40,
 'nh√¨': 41,
 'nh·∫°c': 42,
 'n·ªët': 43,
 'p_t√¢n': 44,
 'q': 45,
 'sang_t√™n': 46,
 'sh': 47,
 's∆°n': 48,
 'team': 49,
 'test': 50,
 'tho·∫£i_m√°i': 51,
 'th√†nh_ph·ªë': 52,
 'th·∫Øng': 53,
 't√¢n_ph√∫': 54,
 'xi_mƒÉng': 55,
 'x√°m': 56,
 'zin': 57,
 '√™m': 58,
 '√∫p': 59,
 '09': 60,
 '10': 61,
 '2023': 62,
 '3': 63,
 '5': 64,
 '7': 65,
 'a7': 66,
 'a7_gi√°': 67,
 'audi': 68,
 'b·∫£o_d∆∞·ª°ng': 69,
 'demi': 70,
 'gia_ƒë

In [29]:
feature_cnt = len(dictionary.token2id)
feature_cnt

12231

In [30]:
corpus = [dictionary.doc2bow(text) for text in content_gem]

In [31]:
corpus[1]

[(9, 1),
 (22, 2),
 (23, 1),
 (24, 1),
 (25, 1),
 (26, 1),
 (27, 1),
 (28, 2),
 (29, 1),
 (30, 1),
 (31, 1),
 (32, 1),
 (33, 1),
 (34, 1),
 (35, 1),
 (36, 1),
 (37, 1),
 (38, 1),
 (39, 1),
 (40, 1),
 (41, 2),
 (42, 1),
 (43, 1),
 (44, 1),
 (45, 1),
 (46, 1),
 (47, 1),
 (48, 2),
 (49, 1),
 (50, 1),
 (51, 1),
 (52, 1),
 (53, 1),
 (54, 1),
 (55, 1),
 (56, 2),
 (57, 1),
 (58, 1),
 (59, 1)]

In [32]:
# Use TF-IDF Model to process corpus, obtaining index
tfidf = models.TfidfModel(corpus)
# t√≠nh to√°n s·ª± t∆∞∆°ng t·ª± trong ma tr·∫≠n th∆∞a th·ªõt
index = similarities.SparseMatrixSimilarity(tfidf[corpus],
                                            num_features = feature_cnt)
# ma tran: n x n

In [33]:
data_1 = pd.DataFrame(index)
data_1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7198,7199,7200,7201,7202,7203,7204,7205,7206,7207
0,1.000000,0.001832,0.017369,0.000165,0.032576,0.040736,0.085422,0.012458,0.015705,0.037873,...,0.028935,0.031461,0.041328,0.031471,0.011843,0.158824,0.026359,0.037390,0.024457,0.000079
1,0.001832,1.000000,0.010755,0.013058,0.006481,0.008879,0.000151,0.017103,0.003490,0.011595,...,0.019793,0.021403,0.066909,0.000121,0.009049,0.010653,0.046150,0.033392,0.006666,0.022845
2,0.017369,0.010755,1.000000,0.001102,0.040386,0.000157,0.000307,0.070246,0.015209,0.017002,...,0.004927,0.012582,0.006267,0.019507,0.006677,0.032327,0.015719,0.003613,0.008858,0.010501
3,0.000165,0.013058,0.001102,1.000000,0.002835,0.005770,0.017066,0.015852,0.000314,0.005768,...,0.000137,0.016721,0.000143,0.000195,0.005880,0.004416,0.007783,0.000233,0.001796,0.000764
4,0.032576,0.006481,0.040386,0.002835,1.000000,0.017124,0.026969,0.008547,0.000441,0.031863,...,0.007130,0.026201,0.012603,0.025244,0.027224,0.019865,0.019893,0.059628,0.014750,0.015008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7203,0.158824,0.010653,0.032327,0.004416,0.019865,0.017760,0.042014,0.029043,0.072953,0.033360,...,0.021127,0.033864,0.013678,0.105504,0.028798,1.000000,0.031037,0.007886,0.043065,0.029803
7204,0.026359,0.046150,0.015719,0.007783,0.019893,0.033485,0.000191,0.000128,0.004413,0.027712,...,0.017817,0.015888,0.088036,0.010867,0.034127,0.031037,1.000000,0.048107,0.009750,0.000054
7205,0.037390,0.033392,0.003613,0.000233,0.059628,0.000203,0.000396,0.011887,0.006278,0.049021,...,0.019168,0.027542,0.101994,0.000319,0.101571,0.007886,0.048107,1.000000,0.018027,0.016921
7206,0.024457,0.006666,0.008858,0.001796,0.014750,0.029891,0.000500,0.008843,0.017129,0.032788,...,0.065674,0.012692,0.000295,0.010614,0.000261,0.043065,0.009750,0.018027,1.000000,0.004622


In [34]:
# gi·∫£ s·ª≠ df_1 l√† ma tr·∫≠n t∆∞∆°ng ƒë·ªìng n√ón, index v√† columns ƒë·ªÅu l√† ch·ªâ s·ªë/ID
id = 3
row = data_1.loc[id]

top5 = row.drop(id, errors='ignore').nlargest(5)   # b·ªè ch√≠nh n√≥
top5_indices = top5.index.tolist()                # <-- l·∫•y index c·ªßa Series
print(top5_indices)   
data.iloc[top5_indices]                            

[6200, 436, 6849, 295, 296]


Unnamed: 0,id,Ti√™u ƒë·ªÅ,M√¥ t·∫£ chi ti·∫øt,Content,clean_text
6200,6201,BaÃÅn xe chiÃÅnh chuÃâ,"Xe chiÃÅnh chuÃâ c√¢ÃÄn baÃÅn honda deam luÃÄn, xe s...","Xe chiÃÅnh chuÃâ c√¢ÃÄn baÃÅn honda deam luÃÄn, xe s...",xe chi nh chu_c√¢ n n honda deam lu n xe s∆∞u_t√¢...
436,437,Vario 160 1 ƒë∆°ÃÄi chuÃâ Ohlis + 4road bs qu√¢Ã£n 5,Vario 160 indo 1 ƒë∆°ÃÄi chuÃâ bi√™Ãân s√¥ÃÅ qu√¢Ã£n 5. ...,Vario 160 indo 1 ƒë∆°ÃÄi chuÃâ bi√™Ãân s√¥ÃÅ qu√¢Ã£n 5. ...,vario 160 indo 1 ƒë∆° chu_bi√™ n s√¥ qu√¢ n 5 xe 23...
6849,6850,suzuki viva,nhaÃÄ coÃÄn 1 chi√™ÃÅc xe nh∆∞ hiÃÄnh baÃÅn cho ai c√¢...,nhaÃÄ coÃÄn 1 chi√™ÃÅc xe nh∆∞ hiÃÄnh baÃÅn cho ai c√¢...,nha co_n 1 chi√™ xe hi_nh n c√¢ n la m mai_mo sa...
295,296,Exciter 150 2015 bstp,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,exciter 150 2015 bstp gi√¢ y_t∆° ƒë√¢ y_ƒëu chi nh ...
296,297,Exciter 150 2015 bstp,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,exciter 150 2015 bstp gi√¢ y_t∆° ƒë√¢ y_ƒëu chi nh ...


In [35]:
# Tr∆∞·ªùng h·ª£p kh√°ch h√†ng nh·∫≠p th√¥ng tin t√¨m k·∫øm
search_str = "xe ch√≠nh ch·ªß, nguy√™n ki·ªán"
search_str_wt = clean_text(search_str)
print(search_str_wt.split())
# content_gem_re[:1]

['xe', 'ch·ªß_nguy√™n', 'ki·ªán']


In [36]:
view_content = search_str_wt.split()
kw_vector = dictionary.doc2bow(view_content)
sim = index[tfidf[kw_vector]]

In [37]:
# sim l√† numpy array ch·ª©a ƒë·ªô t∆∞∆°ng ƒë·ªìng
# T·∫°o DataFrame g·ªìm 2 c·ªôt: id v√† sim
df_sim = pd.DataFrame({
    "id": range(len(sim)),
    "sim": sim
})

# S·∫Øp x·∫øp theo sim gi·∫£m d·∫ßn
df_sorted_search = df_sim.sort_values(by="sim", ascending=False)
recommend = df_sorted_search.head()
recommend

Unnamed: 0,id,sim
3735,3735,0.514529
3449,3449,0.428086
5605,5605,0.291052
4609,4609,0.273028
1501,1501,0.247979


In [38]:
data.iloc[recommend["id"]]

Unnamed: 0,id,Ti√™u ƒë·ªÅ,M√¥ t·∫£ chi ti·∫øt,Content,clean_text
3735,3736,"B√°n xe Winner 150 ch√≠nh ch·ªß,nguy√™n zin","B√°n xe winner 150 ch√≠nh ch·ªß.nguy√™n zin, m·ªõi le...","B√°n xe winner 150 ch√≠nh ch·ªß.nguy√™n zin, m·ªõi le...",xe winner 150 ch·ªß_nguy√™n zin leng keng li√™n_h·ªá
3449,3450,C·∫ßn b√°n g·∫•p wave ch√≠nh ch·ªß 110 2023 bstp,Wave 110 ƒë·ªùi 2023\nodo 19k\nbi·ªÉn s·ªë tp ch√≠nh c...,Wave 110 ƒë·ªùi 2023 odo 19k bi·ªÉn s·ªë tp ch√≠nh ch·ªß...,wave 110 ƒë·ªùi 2023 odo 19 k bi·ªÉn tp ch·ªß_nguy√™n ...
5605,5606,Future led 8/2022 bstp ch√≠nh ch·ªß xe √≠t s·ª≠ d·ª•ng,Future led bstp ch√≠nh ch·ªß 8/2022 ƒë·∫ßy ƒë·ªß ph·ª• ki...,Future led bstp ch√≠nh ch·ªß 8/2022 ƒë·∫ßy ƒë·ªß ph·ª• ki...,future led bstp ch·ªß 8 2022 ƒë·∫ßy_ƒë·ªß ph·ª• ki·ªán xe ...
4609,4610,Ch√≠nh ch·ªß c·∫ßn b√°n Exciter 150 2017 m√°y zin,B√°n xe exciter 150 ch√≠nh ch·ªß nguy√™n zin\n- Bi·ªÉ...,B√°n xe exciter 150 ch√≠nh ch·ªß nguy√™n zin - Bi·ªÉn...,xe exciter 150 ch·ªß_nguy√™n zin bi·ªÉn l√¢m_ƒë·ªìng 49...
1501,1502,"C·∫ßn b√°n Wave Rs b·∫£n 2010 , xe 1 ƒë·ªùi ch·ªß nguy√™n...",C·∫ßn b√°n Wave Rs 110 ƒë·ªùi 2010 xe 1 ƒë·ªùi ch·ªß nguy...,C·∫ßn b√°n Wave Rs 110 ƒë·ªùi 2010 xe 1 ƒë·ªùi ch·ªß nguy...,wave rs 110 ƒë·ªùi 2010 xe 1 ƒë·ªùi ch·ªß_nguy√™n zin h...


In [39]:
class Recommender:
    def __init__(self, dictionary, tfidf, index, data: pd.DataFrame, clean_text):
        self.dictionary = dictionary
        self.tfidf = tfidf
        self.index = index
        self.data = data
        self.clean_text = clean_text

    def recommend_by_text(self, search_str: str, top_n: int = 5) -> pd.DataFrame:
        """G·ª£i √Ω theo n·ªôi dung vƒÉn b·∫£n."""
        search_str_wt = self.clean_text(search_str)
        tokens = search_str_wt.split()
        kw_vector = self.dictionary.doc2bow(tokens)
        sim = self.index[self.tfidf[kw_vector]]
        df_sim = pd.DataFrame({"id": range(len(sim)), "sim": sim}).sort_values(by="sim", ascending=False)
        recommend = df_sim.head(top_n)
        results = self.data.iloc[recommend["id"].to_list()].copy()
        results["sim"] = recommend["sim"].values
        return results

    def recommend_by_id(self, id: int, top_n: int = 5) -> pd.DataFrame:
        """G·ª£i √Ω c√°c t√†i li·ªáu t∆∞∆°ng t·ª± v·ªõi doc_id tr√™n ma tr·∫≠n t∆∞∆°ng ƒë·ªìng n√ón."""
        # L·∫•y d√≤ng t∆∞∆°ng ƒë·ªìng t·ª´ ma tr·∫≠n
        data_1 = pd.DataFrame(self.index)
        try:
            row = data_1.loc[id]
        except KeyError:
            raise ValueError(f"id {id} kh√¥ng t·ªìn t·∫°i trong ma tr·∫≠n t∆∞∆°ng ƒë·ªìng.")

        # B·ªè ch√≠nh n√≥, l·∫•y top N
        top_n_sim = row.drop(id, errors="ignore").nlargest(top_n)
        top_n_indices = top_n_sim.index.tolist()

        # Truy xu·∫•t d·ªØ li·ªáu t∆∞∆°ng ·ª©ng
        results = self.data.iloc[top_n_indices].copy()
        results["sim"] = top_n_sim.values

        return results

In [40]:
# gi·∫£ s·ª≠ df_1 l√† ma tr·∫≠n t∆∞∆°ng ƒë·ªìng n√ón, index v√† columns ƒë·ªÅu l√† ch·ªâ s·ªë/ID
id = 0
row = data_1.loc[id]

top5 = row.drop(id, errors='ignore').nlargest(5)   # b·ªè ch√≠nh n√≥
top5_indices = top5.index.tolist()                 # <-- l·∫•y index c·ªßa Series
print(top5_indices)                               # [ch·ªâ s·ªë c√°c item t∆∞∆°ng t·ª± nh·∫•t]

[3817, 6730, 4410, 3456, 4726]


In [41]:
rec = Recommender(dictionary, tfidf, index, data, clean_text)
recommend_by_text = rec.recommend_by_text
recommend_by_id = rec.recommend_by_id

In [42]:
search_str = "xe c√≤n m·ªõi, m√°y √™m, hao xƒÉng √≠t, ƒë·ªùi t·ª´ 2019 tr·ªü l√™n. N·∫øu c√≥ Vision ho·∫∑c Janus ch·∫°y d∆∞·ªõi 10.000km th√¨ c√†ng t·ªët."
recommend_by_text(search_str)


Unnamed: 0,id,Ti√™u ƒë·ªÅ,M√¥ t·∫£ chi ti·∫øt,Content,clean_text,sim
6604,6605,Xe janus x√°m ƒëen b·∫£ng smartkey,C·∫ßn b√°n xe janus \nDo mua xe m·ªõi n√™n c·∫ßn b√°n l·∫°i,C·∫ßn b√°n xe janus Do mua xe m·ªõi n√™n c·∫ßn b√°n l·∫°i,xe janus mua xe,0.447361
6442,6443,C·∫ßn b√°n xe tay ga Yamaha Janus,Xe ch√≠nh ch·ªß c·∫ßn b√°n xe Yamaha Janus ai c·∫ßn mu...,Xe ch√≠nh ch·ªß c·∫ßn b√°n xe Yamaha Janus ai c·∫ßn mu...,xe ch·ªß xe yamaha janus mua li√™n_h·ªá xe,0.349798
1992,1993,janus,Janus ƒë·ªè ƒëen ƒë√®n r·∫•t s√°ng gi·∫•y t·ªù ƒë·∫ßy ƒë·ªß,Janus ƒë·ªè ƒëen ƒë√®n r·∫•t s√°ng gi·∫•y t·ªù ƒë·∫ßy ƒë·ªß,janus ƒë·ªè ƒëen ƒë√®n gi·∫•y_t·ªù ƒë·∫ßy_ƒë·ªß,0.304264
3123,3124,B√°n Janus xe n·ªØ ƒëi ƒë·∫ßu ƒë·ªß gi·∫•y t·ªù,B√°n Janus xe n·ªØ ƒëi ƒë·∫ßu ƒë·ªß gi·∫•y t·ªù \nGi√° c·∫£ th∆∞...,B√°n Janus xe n·ªØ ƒëi ƒë·∫ßu ƒë·ªß gi·∫•y t·ªù Gi√° c·∫£ th∆∞∆°n...,janus xe n·ªØ ƒë·∫ßu gi·∫•y_t·ªù gi√°_c·∫£ th∆∞∆°ng_l∆∞·ª£ng,0.284921
6576,6577,B√°n Vision 215 m√†u ƒë·ªè bi·ªÉn tp th√¨ chia l·∫°i cho e,Anh ch·ªã n√†o mu·ªën l√™n ƒë·ªùi ho·∫∑c nh√† d∆∞ vision ƒë·ªù...,Anh ch·ªã n√†o mu·ªën l√™n ƒë·ªùi ho·∫∑c nh√† d∆∞ vision ƒë·ªù...,anh_ch·ªã ƒë·ªùi nh√†_d∆∞ vision ƒë·ªùi 215 tr·ªü m√†u ƒë·ªè b...,0.269901


In [43]:
id = 3
sample_id = df.loc[id, 'id']
print(f"\nXe g·ªëc:\n{df.loc[df['id'] == sample_id, ['id', 'Ti√™u ƒë·ªÅ', 'M√¥ t·∫£ chi ti·∫øt']].iloc[0]}")
print("\nG·ª£i √Ω c√°c xe t∆∞∆°ng t·ª±:")
recommend_by_id(id)


Xe g·ªëc:
id                                                                4
Ti√™u ƒë·ªÅ           Vespa Sprint 2019 -125- ƒêen ƒêoÃâ Sport -CHIÃÅNH ...
M√¥ t·∫£ chi ti·∫øt    XE CAÃÅ NH√ÇN BAÃÅN - XE DO EM ƒê∆ØÃÅNG T√äN CHIÃÅNH C...
Name: 3, dtype: object

G·ª£i √Ω c√°c xe t∆∞∆°ng t·ª±:


Unnamed: 0,id,Ti√™u ƒë·ªÅ,M√¥ t·∫£ chi ti·∫øt,Content,clean_text,sim
6200,6201,BaÃÅn xe chiÃÅnh chuÃâ,"Xe chiÃÅnh chuÃâ c√¢ÃÄn baÃÅn honda deam luÃÄn, xe s...","Xe chiÃÅnh chuÃâ c√¢ÃÄn baÃÅn honda deam luÃÄn, xe s...",xe chi nh chu_c√¢ n n honda deam lu n xe s∆∞u_t√¢...,0.331512
436,437,Vario 160 1 ƒë∆°ÃÄi chuÃâ Ohlis + 4road bs qu√¢Ã£n 5,Vario 160 indo 1 ƒë∆°ÃÄi chuÃâ bi√™Ãân s√¥ÃÅ qu√¢Ã£n 5. ...,Vario 160 indo 1 ƒë∆°ÃÄi chuÃâ bi√™Ãân s√¥ÃÅ qu√¢Ã£n 5. ...,vario 160 indo 1 ƒë∆° chu_bi√™ n s√¥ qu√¢ n 5 xe 23...,0.329765
6849,6850,suzuki viva,nhaÃÄ coÃÄn 1 chi√™ÃÅc xe nh∆∞ hiÃÄnh baÃÅn cho ai c√¢...,nhaÃÄ coÃÄn 1 chi√™ÃÅc xe nh∆∞ hiÃÄnh baÃÅn cho ai c√¢...,nha co_n 1 chi√™ xe hi_nh n c√¢ n la m mai_mo sa...,0.309942
295,296,Exciter 150 2015 bstp,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,exciter 150 2015 bstp gi√¢ y_t∆° ƒë√¢ y_ƒëu chi nh ...,0.299775
296,297,Exciter 150 2015 bstp,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,Exciter 150 2015 bstp gi√¢ÃÅy t∆°ÃÄ ƒë√¢ÃÄy ƒëuÃâ kh√¥ng...,exciter 150 2015 bstp gi√¢ y_t∆° ƒë√¢ y_ƒëu chi nh ...,0.299775


In [44]:
def evaluate_cosine(data, cosine_sim_matrix, sample_size=200):
    indices = np.random.choice(data.index, sample_size, replace=False)

    top1_scores = []
    top3_scores = []
    times = []

    for idx in tqdm(indices, desc="Evaluating Cosine"):
        t0 = time.time()

        sim_scores = cosine_sim_matrix[idx]
        sim_scores[idx] = -1  # lo·∫°i ch√≠nh n√≥

        top3_idx = np.argsort(sim_scores)[-3:][::-1]
        top3_sim = sim_scores[top3_idx]

        t1 = time.time() - t0
        times.append(t1)

        top1_scores.append(top3_sim[0])
        top3_scores.append(np.mean(top3_sim))

    return {
        "Model": "Cosine-Similarity",
        "Avg_Time": np.mean(times),
        "Avg_Top1_Sim": np.mean(top1_scores),
        "Avg_Top3_Sim": np.mean(top3_scores)
    }



In [45]:
def evaluate_gensim(model, data, sample_size=200):
    indices = np.random.choice(len(data), size=sample_size, replace=False)

    top1_scores = []
    top3_scores = []
    times = []

    for idx in tqdm(indices, desc="Evaluating Gensim"):
        t0 = time.time()
        recs = model.recommend_by_id(idx, top_n=3)  # d√πng ƒë√∫ng h√†m c·ªßa b·∫°n
        t1 = time.time() - t0
        times.append(t1)

        top1_scores.append(recs.iloc[0]["sim"])
        top3_scores.append(recs["sim"].mean())

    return {
        "Model": "Gensim",
        "Avg_Time": np.mean(times),
        "Avg_Top1_Sim": np.mean(top1_scores),
        "Avg_Top3_Sim": np.mean(top3_scores)
    }


In [46]:
import random
import time

results = []

res_gensim = evaluate_gensim(rec, data, sample_size=10)
results.append(res_gensim)

res_cosine = evaluate_cosine(data, cosine_sim_matrix, sample_size=10)
results.append(res_cosine)

df_results = pd.DataFrame(results)
df_results



Evaluating Gensim: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [03:31<00:00, 21.10s/it]
Evaluating Cosine: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:00<00:00, 1681.56it/s]


Unnamed: 0,Model,Avg_Time,Avg_Top1_Sim,Avg_Top3_Sim
0,Gensim,21.099047,0.409649,0.320596
1,Cosine-Similarity,0.000595,0.451329,0.412692


In [47]:
def recommend_cosine_by_text(query: str, top_n: int = 5):
    """
    G·ª£i √Ω xe m√°y t∆∞∆°ng t·ª± d·ª±a tr√™n vƒÉn b·∫£n ng∆∞·ªùi d√πng nh·∫≠p v√†o.
    
    Args:
        query (str): vƒÉn b·∫£n t√¨m ki·∫øm
        top_n (int): s·ªë l∆∞·ª£ng g·ª£i √Ω
    
    Returns:
        DataFrame: danh s√°ch xe t∆∞∆°ng t·ª± + ƒë·ªô t∆∞∆°ng ƒë·ªìng
    """

    # 1. Ti·ªÅn x·ª≠ l√Ω query b·∫±ng h√†m clean_text c·ªßa b·∫°n
    clean_query = clean_text(query)

    # 2. Vector h√≥a query
    query_vec = vectorizer.transform([clean_query])

    # 3. T√≠nh ƒë·ªô t∆∞∆°ng ƒë·ªìng cosine gi·ªØa query v√† to√†n b·ªô item
    sims = cosine_similarity(query_vec, tfidf_matrix).flatten()

    # 4. L·∫•y top N k·∫øt qu·∫£ cao nh·∫•t
    top_idx = sims.argsort()[::-1][:top_n]
    top_scores = sims[top_idx]

    # 5. Tr·∫£ v·ªÅ DataFrame k·∫øt qu·∫£
    result = data.iloc[top_idx][['id', 'Ti√™u ƒë·ªÅ', 'Content']].copy()
    result["similarity"] = top_scores

    return result.reset_index(drop=True)


In [48]:
recommend_cosine_by_text("xe c√≤n m·ªõi, m√°y √™m, hao xƒÉng √≠t, ƒë·ªùi t·ª´ 2019 tr·ªü l√™n. N·∫øu c√≥ Vision ho·∫∑c Janus ch·∫°y d∆∞·ªõi 10.000km th√¨ c√†ng t·ªët.", top_n=10)


Unnamed: 0,id,Ti√™u ƒë·ªÅ,Content,similarity
0,6605,Xe janus x√°m ƒëen b·∫£ng smartkey,C·∫ßn b√°n xe janus Do mua xe m·ªõi n√™n c·∫ßn b√°n l·∫°i,0.405727
1,6427,C·∫ßn b√°n xe nh∆∞ h√¨nh m·ªôt ch·ªß,Xe c√≤n t·ªët √≠t ƒëi M·ªôt ƒë·ªùi ch·ªß Xem xe t·∫°i nh√† Gi...,0.394077
2,1906,Gia ƒë√¨nh c·∫ßn ƒë·ªïi xe c·∫ßn b√°n l·∫°i xe janus ƒë·ªùi 2016,Gia ƒë√¨nh c·∫ßn ƒë·ªïi xe mu·ªën b√°n l·∫°i xe janus ƒë·ªùi ...,0.342975
3,6443,C·∫ßn b√°n xe tay ga Yamaha Janus,Xe ch√≠nh ch·ªß c·∫ßn b√°n xe Yamaha Janus ai c·∫ßn mu...,0.308978
4,5601,"B√°n xe Vision 2018 - ch·∫°y 30.000km - Gi√°: 17,5...",B√°n xe Vision 2018 nh√† ƒëang s·ª≠ d·ª•ng - ch·∫°y 30....,0.291242
5,774,"C√° nh√¢n ƒë·ªïi xe, n√™n b√°n - Vespa 2014",Xe c√≤n ƒëi t·ªët B√°n c√° nh√¢n ƒê√£ qua 1 ƒë·ªùi ch·ªß. Ti...,0.280459
6,5978,B√°n,Xe ch√≠nh ch·ªß m√°y zin ch∆∞a r·ªõt Odo ~10k km Xe n...,0.279231
7,1479,c·∫ßn b√°n xe sh150i abs m·ªõi 99% tr·∫Øng sx 2019,M√¨nh c·∫ßn b√°n xe sh150i 2019 abs m√†u tr·∫Øng - xe...,0.261588
8,1993,janus,Janus ƒë·ªè ƒëen ƒë√®n r·∫•t s√°ng gi·∫•y t·ªù ƒë·∫ßy ƒë·ªß,0.261481
9,6577,B√°n Vision 215 m√†u ƒë·ªè bi·ªÉn tp th√¨ chia l·∫°i cho e,Anh ch·ªã n√†o mu·ªën l√™n ƒë·ªùi ho·∫∑c nh√† d∆∞ vision ƒë·ªù...,0.257763
