In [47]:
import pandas as pd
import numpy as np
from pythainlp import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics.pairwise import pairwise_kernels
from scipy.spatial.distance import cdist

import joblib
import seaborn as sns
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

In [34]:
from pythainlp.corpus.common import thai_stopwords
from pythainlp.tokenize import word_tokenize
import re

from gensim.models import Word2Vec, KeyedVectors

In [35]:
from pythainlp import word_vector
thai2vec_model = word_vector.WordVector(model_name="thai2fit_wv").get_model()

In [36]:
attractions_df = pd.read_csv("./frontend/merged_tat_attractions.csv")

In [37]:
attractions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4325 entries, 0 to 4324
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   placeId               4325 non-null   object 
 1   place_name_th         4324 non-null   object 
 2   introduction_th       2546 non-null   object 
 3   category_name         4325 non-null   object 
 4   categoryId            4325 non-null   int64  
 5   latitude              4313 non-null   float64
 6   longitude             4313 non-null   float64
 7   postcode              4215 non-null   float64
 8   thumbnail_url         3601 non-null   object 
 9   tags                  857 non-null    object 
 10  province_Id           4325 non-null   int64  
 11  province_name_th      4325 non-null   object 
 12  district_Id           4317 non-null   float64
 13  district_name_th      4317 non-null   object 
 14  sub_district_Id       4217 non-null   float64
 15  sub_district_name_th 

In [38]:
def clean_thai_text(text):
    if not isinstance(text, str):
        return ""
    stopwords_list = set(thai_stopwords())
    # Remove non-Thai characters and numbers
    text = re.sub(r"[^ก-๙\s]", "", text)
    # Tokenize and remove stopwords
    tokens = word_tokenize(text)
    cleaned_tokens = [word for word in tokens if word not in stopwords_list]
    return " ".join(cleaned_tokens)

In [39]:
attractions_df = attractions_df.dropna(subset=["introduction_th"])
attractions_df["cleaned_introduction_th"] = attractions_df["introduction_th"].apply(clean_thai_text)


input_text = "เล่นน้ำเย็นสบาย"
cleaned_input_text = clean_thai_text(input_text)


In [40]:
# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(attractions_df["cleaned_introduction_th"])
input_tfidf_vector = tfidf_vectorizer.transform([cleaned_input_text])

In [41]:
# Train Word2Vec model
w2v_model = Word2Vec(sentences=[word_tokenize(text) for text in attractions_df["cleaned_introduction_th"]],
                     vector_size=100, min_count=1)

In [42]:
def vectorize_word2vec(text, model):
    tokens = word_tokenize(text)
    vectors = [model.wv[word] for word in tokens if word in model.wv]
    return np.mean(vectors, axis=0) if vectors else np.zeros(model.vector_size)

w2v_review_vectors = np.array([vectorize_word2vec(text, w2v_model) for text in attractions_df["cleaned_introduction_th"]])
w2v_input_vector = vectorize_word2vec(cleaned_input_text, w2v_model)

In [43]:
# Step 5: Thai2Vec Implementation
# Load pre-trained Thai2Vec model
# thai2vec_model_path = "thai2vec_no_neg.bin"  # Update this path
# thai2vec_model = KeyedVectors.load_word2vec_format(thai2vec_model_path, binary=True)

def vectorize_thai2vec(text, model):
    tokens = word_tokenize(text)
    vectors = [model[word] for word in tokens if word in model]
    return np.mean(vectors, axis=0) if vectors else np.zeros(model.vector_size)

thai2vec_review_vectors = np.array([vectorize_thai2vec(text, thai2vec_model) for text in attractions_df["cleaned_introduction_th"]])
thai2vec_input_vector = vectorize_thai2vec(cleaned_input_text, thai2vec_model)

In [44]:
def combine_word2vec_tfidf(tfidf_vectorizer, tfidf_matrix, text, w2v_model, reviews):
    tfidf_weights = tfidf_vectorizer.transform([text]).toarray()
    word2vec_combined_vectors = []

    for i, review in enumerate(reviews):
        word2vec_vector = vectorize_word2vec(review, w2v_model)
        combined_vector = word2vec_vector * tfidf_weights[0, i]
        word2vec_combined_vectors.append(combined_vector)

    return np.mean(word2vec_combined_vectors, axis=0)

word2vec_tfidf_input_vector = combine_word2vec_tfidf(tfidf_vectorizer, tfidf_matrix, cleaned_input_text, w2v_model,
                                                     attractions_df["cleaned_introduction_th"])
word2vec_tfidf_similarity_scores = cosine_similarity([word2vec_tfidf_input_vector], w2v_review_vectors)
word2vec_tfidf_most_similar_index = np.argmax(word2vec_tfidf_similarity_scores)

In [45]:
# Thai2Vec + TF-IDF Combination
def combine_thai2vec_tfidf(tfidf_vectorizer, tfidf_matrix, text, thai2vec_model, reviews):
    tfidf_weights = tfidf_vectorizer.transform([text]).toarray()
    thai2vec_combined_vectors = []

    for i, review in enumerate(reviews):
        thai2vec_vector = vectorize_thai2vec(review, thai2vec_model)
        combined_vector = thai2vec_vector * tfidf_weights[0, i]
        thai2vec_combined_vectors.append(combined_vector)

    return np.mean(thai2vec_combined_vectors, axis=0)

thai2vec_tfidf_input_vector = combine_thai2vec_tfidf(tfidf_vectorizer, tfidf_matrix, cleaned_input_text, thai2vec_model,
                                                     attractions_df["cleaned_introduction_th"])


In [69]:

# Laplacian Kernel
def rbf_kernel(x, y, gamma=1.0):
    # ตรวจสอบขนาด input
    if x.ndim == 1:
        x = x.reshape(1, -1)
    if y.ndim == 1 or not isinstance(y, np.ndarray):
        y = np.array(y).reshape(-1, x.shape[1])
    # คำนวณระยะทางและ kernel
    distance = cdist(x, y, metric='euclidean')  # Euclidean distance
    return np.exp(-gamma * (distance ** 2))

# Linear Kernel
def linear_kernel(x, y):
    return np.dot(x, y.T)

# RBF Kernel
def rbf_kernel(x, y, gamma=1.0):
    distance = cdist(x, y, metric='euclidean')  # Euclidean distance (L2)
    return np.exp(-gamma * (distance ** 2))

# Pairwise Kernel
def pairwise_custom_kernel(x, y, metric='cosine'):
    return pairwise_kernels(x, y, metric=metric)

def laplacian_kernel(x, y, sigma=1.0):
    distance = cdist(x, y, metric='cityblock')  # Manhattan distance (L1)
    return np.exp(-distance / sigma)

In [65]:
# ตรวจสอบ tfidf_matrix
if not isinstance(tfidf_matrix, np.ndarray):
    tfidf_matrix = tfidf_matrix.toarray()  # แปลงจาก sparse matrix เป็น array

# ตรวจสอบ input_tfidf_vector
if not isinstance(input_tfidf_vector, np.ndarray):
    input_tfidf_vector = input_tfidf_vector.toarray()

In [66]:
print("Shape of input_tfidf_vector:", input_tfidf_vector.shape)  # ต้องเป็น (1, N)
print("Shape of tfidf_matrix:", tfidf_matrix.shape)       

Shape of input_tfidf_vector: (1, 4971)
Shape of tfidf_matrix: (2546, 4971)


In [58]:
input_tfidf_vector_2d = input_tfidf_vector.reshape(1, -1)  # Make sure input vector is 2D
tfidf_matrix_2d = tfidf_matrix if len(tfidf_matrix.shape) == 2 else tfidf_matrix.toarray()
if input_tfidf_vector.size == 0:
    print("Error: input_tfidf_vector is empty!")


In [64]:
print("Shape of input_tfidf_vector:", input_tfidf_vector.shape)  # ต้องเป็น (1, N)
print("Shape of tfidf_matrix:", tfidf_matrix.shape)       

Shape of input_tfidf_vector: (1, 4971)
Shape of tfidf_matrix: (2546, 4971)


In [71]:
print("\nINPUT TEXT: ", input_text)

# **TF-IDF Model**
print("\n=== TF-IDF Model ===")

# Cosine Similarity
tfidf_cosine_similarity_scores = cosine_similarity(input_tfidf_vector, tfidf_matrix)
tfidf_cosine_most_similar_index = np.argmax(tfidf_cosine_similarity_scores)
print("\nCosine Similarity")
print("Attraction Name:", attractions_df.iloc[tfidf_cosine_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[tfidf_cosine_most_similar_index]["introduction_th"])
print("Similarity Score:", tfidf_cosine_similarity_scores[0, tfidf_cosine_most_similar_index])

# Laplacian Kernel
laplacian_similarity_scores = laplacian_kernel(input_tfidf_vector, tfidf_matrix, sigma=1.0)
laplacian_most_similar_index = np.argmax(laplacian_similarity_scores)
print("\nLaplacian Kernel")
print("Attraction Name:", attractions_df.iloc[laplacian_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[laplacian_most_similar_index]["introduction_th"])
print("Similarity Score:", laplacian_similarity_scores[0, laplacian_most_similar_index])

# Linear Kernel
linear_similarity_scores = linear_kernel(input_tfidf_vector, tfidf_matrix)
linear_most_similar_index = np.argmax(linear_similarity_scores)
print("\nLinear Kernel")
print("Attraction Name:", attractions_df.iloc[linear_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[linear_most_similar_index]["introduction_th"])
print("Similarity Score:", linear_similarity_scores[0, linear_most_similar_index])

# RBF Kernel
rbf_similarity_scores = rbf_kernel(input_tfidf_vector, tfidf_matrix, gamma=0.5)
rbf_most_similar_index = np.argmax(rbf_similarity_scores)
print("\nRBF Kernel")
print("Attraction Name:", attractions_df.iloc[rbf_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[rbf_most_similar_index]["introduction_th"])
print("Similarity Score:", rbf_similarity_scores[0, rbf_most_similar_index])

# Pairwise Kernel
pairwise_similarity_scores = pairwise_custom_kernel(input_tfidf_vector, tfidf_matrix, metric='cosine')
pairwise_most_similar_index = np.argmax(pairwise_similarity_scores)
print("\nPairwise Kernel")
print("Attraction Name:", attractions_df.iloc[pairwise_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[pairwise_most_similar_index]["introduction_th"])
print("Similarity Score:", pairwise_similarity_scores[0, pairwise_most_similar_index])



INPUT TEXT:  เล่นน้ำเย็นสบาย

=== TF-IDF Model ===

Cosine Similarity
Attraction Name: อ่างเก็บน้ำรัตนัย
Most Similar Review: ในบริเวณอ่างเก็บน้ำมีลักษณะคล้ายทะเลสาบ ในตอนเย็นจะมีลมพัดเย็นสบายเหมาะสำหรับนั่งพักผ่อน
Similarity Score: 0.3880524701259517

Laplacian Kernel
Attraction Name: หาดตะโละกาโปร์
Most Similar Review:  
Similarity Score: 0.18534920708658978

Linear Kernel
Attraction Name: อ่างเก็บน้ำรัตนัย
Most Similar Review: ในบริเวณอ่างเก็บน้ำมีลักษณะคล้ายทะเลสาบ ในตอนเย็นจะมีลมพัดเย็นสบายเหมาะสำหรับนั่งพักผ่อน
Similarity Score: 0.3880524701259517

RBF Kernel
Attraction Name: หาดตะโละกาโปร์
Most Similar Review:  
Similarity Score: 0.6065306597126334

Pairwise Kernel
Attraction Name: อ่างเก็บน้ำรัตนัย
Most Similar Review: ในบริเวณอ่างเก็บน้ำมีลักษณะคล้ายทะเลสาบ ในตอนเย็นจะมีลมพัดเย็นสบายเหมาะสำหรับนั่งพักผ่อน
Similarity Score: 0.3880524701259517


In [72]:
# **Word2Vec Model**
print("\n=== Word2Vec Model ===")

# Cosine Similarity
w2v_cosine_similarity_scores = cosine_similarity([w2v_input_vector], w2v_review_vectors)
w2v_cosine_most_similar_index = np.argmax(w2v_cosine_similarity_scores)
print("\nCosine Similarity")
print("Attraction Name:", attractions_df.iloc[w2v_cosine_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[w2v_cosine_most_similar_index]["introduction_th"])
print("Similarity Score:", w2v_cosine_similarity_scores[0, w2v_cosine_most_similar_index])

# Laplacian Kernel
w2v_laplacian_similarity_scores = laplacian_kernel([w2v_input_vector], w2v_review_vectors, sigma=1.0)
w2v_laplacian_most_similar_index = np.argmax(w2v_laplacian_similarity_scores)
print("\nLaplacian Kernel")
print("Attraction Name:", attractions_df.iloc[w2v_laplacian_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[w2v_laplacian_most_similar_index]["introduction_th"])
print("Similarity Score:", w2v_laplacian_similarity_scores[0, w2v_laplacian_most_similar_index])

# Linear Kernel
w2v_linear_similarity_scores = linear_kernel([w2v_input_vector], w2v_review_vectors)
w2v_linear_most_similar_index = np.argmax(w2v_linear_similarity_scores)
print("\nLinear Kernel")
print("Attraction Name:", attractions_df.iloc[w2v_linear_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[w2v_linear_most_similar_index]["introduction_th"])
print("Similarity Score:", w2v_linear_similarity_scores[0, w2v_linear_most_similar_index])

# RBF Kernel
w2v_rbf_similarity_scores = rbf_kernel([w2v_input_vector], w2v_review_vectors, gamma=0.5)
w2v_rbf_most_similar_index = np.argmax(w2v_rbf_similarity_scores)
print("\nRBF Kernel")
print("Attraction Name:", attractions_df.iloc[w2v_rbf_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[w2v_rbf_most_similar_index]["introduction_th"])
print("Similarity Score:", w2v_rbf_similarity_scores[0, w2v_rbf_most_similar_index])

# Pairwise Kernel
w2v_pairwise_similarity_scores = pairwise_custom_kernel([w2v_input_vector], w2v_review_vectors, metric='cosine')
w2v_pairwise_most_similar_index = np.argmax(w2v_pairwise_similarity_scores)
print("\nPairwise Kernel")
print("Attraction Name:", attractions_df.iloc[w2v_pairwise_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[w2v_pairwise_most_similar_index]["introduction_th"])
print("Similarity Score:", w2v_pairwise_similarity_scores[0, w2v_pairwise_most_similar_index])


# **Thai2Vec Model**
print("\n=== Thai2Vec Model ===")

# Cosine Similarity
thai2vec_cosine_similarity_scores = cosine_similarity([thai2vec_input_vector], thai2vec_review_vectors)
thai2vec_cosine_most_similar_index = np.argmax(thai2vec_cosine_similarity_scores)
print("\nCosine Similarity")
print("Attraction Name:", attractions_df.iloc[thai2vec_cosine_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[thai2vec_cosine_most_similar_index]["introduction_th"])
print("Similarity Score:", thai2vec_cosine_similarity_scores[0, thai2vec_cosine_most_similar_index])

# Laplacian Kernel
thai2vec_laplacian_similarity_scores = laplacian_kernel([thai2vec_input_vector], thai2vec_review_vectors, sigma=1.0)
thai2vec_laplacian_most_similar_index = np.argmax(thai2vec_laplacian_similarity_scores)
print("\nLaplacian Kernel")
print("Attraction Name:", attractions_df.iloc[thai2vec_laplacian_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[thai2vec_laplacian_most_similar_index]["introduction_th"])
print("Similarity Score:", thai2vec_laplacian_similarity_scores[0, thai2vec_laplacian_most_similar_index])

# Linear Kernel
thai2vec_linear_similarity_scores = linear_kernel([thai2vec_input_vector], thai2vec_review_vectors)
thai2vec_linear_most_similar_index = np.argmax(thai2vec_linear_similarity_scores)
print("\nLinear Kernel")
print("Attraction Name:", attractions_df.iloc[thai2vec_linear_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[thai2vec_linear_most_similar_index]["introduction_th"])
print("Similarity Score:", thai2vec_linear_similarity_scores[0, thai2vec_linear_most_similar_index])

# RBF Kernel
thai2vec_rbf_similarity_scores = rbf_kernel([thai2vec_input_vector], thai2vec_review_vectors, gamma=0.5)
thai2vec_rbf_most_similar_index = np.argmax(thai2vec_rbf_similarity_scores)
print("\nRBF Kernel")
print("Attraction Name:", attractions_df.iloc[thai2vec_rbf_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[thai2vec_rbf_most_similar_index]["introduction_th"])
print("Similarity Score:", thai2vec_rbf_similarity_scores[0, thai2vec_rbf_most_similar_index])

# Pairwise Kernel
thai2vec_pairwise_similarity_scores = pairwise_custom_kernel([thai2vec_input_vector], thai2vec_review_vectors, metric='cosine')
thai2vec_pairwise_most_similar_index = np.argmax(thai2vec_pairwise_similarity_scores)
print("\nPairwise Kernel")
print("Attraction Name:", attractions_df.iloc[thai2vec_pairwise_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[thai2vec_pairwise_most_similar_index]["introduction_th"])
print("Similarity Score:", thai2vec_pairwise_similarity_scores[0, thai2vec_pairwise_most_similar_index])


# **Word2Vec + TF-IDF Model**
print("\n=== Word2Vec + TF-IDF Model ===")

# Cosine Similarity
word2vec_tfidf_cosine_similarity_scores = cosine_similarity([word2vec_tfidf_input_vector], w2v_review_vectors)
word2vec_tfidf_cosine_most_similar_index = np.argmax(word2vec_tfidf_cosine_similarity_scores)
print("\nCosine Similarity")
print("Attraction Name:", attractions_df.iloc[word2vec_tfidf_cosine_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[word2vec_tfidf_cosine_most_similar_index]["introduction_th"])
print("Similarity Score:", word2vec_tfidf_cosine_similarity_scores[0, word2vec_tfidf_cosine_most_similar_index])

# Laplacian Kernel
word2vec_tfidf_laplacian_similarity_scores = laplacian_kernel([word2vec_tfidf_input_vector], w2v_review_vectors, sigma=1.0)
word2vec_tfidf_laplacian_most_similar_index = np.argmax(word2vec_tfidf_laplacian_similarity_scores)
print("\nLaplacian Kernel")
print("Attraction Name:", attractions_df.iloc[word2vec_tfidf_laplacian_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[word2vec_tfidf_laplacian_most_similar_index]["introduction_th"])
print("Similarity Score:", word2vec_tfidf_laplacian_similarity_scores[0, word2vec_tfidf_laplacian_most_similar_index])

# Linear Kernel
word2vec_tfidf_linear_similarity_scores = linear_kernel([word2vec_tfidf_input_vector], w2v_review_vectors)
word2vec_tfidf_linear_most_similar_index = np.argmax(word2vec_tfidf_linear_similarity_scores)
print("\nLinear Kernel")
print("Attraction Name:", attractions_df.iloc[word2vec_tfidf_linear_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[word2vec_tfidf_linear_most_similar_index]["introduction_th"])
print("Similarity Score:", word2vec_tfidf_linear_similarity_scores[0, word2vec_tfidf_linear_most_similar_index])

# RBF Kernel
word2vec_tfidf_rbf_similarity_scores = rbf_kernel([word2vec_tfidf_input_vector], w2v_review_vectors, gamma=0.5)
word2vec_tfidf_rbf_most_similar_index = np.argmax(word2vec_tfidf_rbf_similarity_scores)
print("\nRBF Kernel")
print("Attraction Name:", attractions_df.iloc[word2vec_tfidf_rbf_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[word2vec_tfidf_rbf_most_similar_index]["introduction_th"])
print("Similarity Score:", word2vec_tfidf_rbf_similarity_scores[0, word2vec_tfidf_rbf_most_similar_index])

# Pairwise Kernel
word2vec_tfidf_pairwise_similarity_scores = pairwise_custom_kernel([word2vec_tfidf_input_vector], w2v_review_vectors, metric='cosine')
word2vec_tfidf_pairwise_most_similar_index = np.argmax(word2vec_tfidf_pairwise_similarity_scores)
print("\nPairwise Kernel")
print("Attraction Name:", attractions_df.iloc[word2vec_tfidf_pairwise_most_similar_index]["place_name_th"])
print("Most Similar Review:", attractions_df.iloc[word2vec_tfidf_pairwise_most_similar_index]["introduction_th"])
print("Similarity Score:", word2vec_tfidf_pairwise_similarity_scores[0, word2vec_tfidf_pairwise_most_similar_index])



=== Word2Vec Model ===

Cosine Similarity
Attraction Name: น้ำตกธารรัตนา
Most Similar Review: มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน้ำมากในช่วงฤดูฝน มีกิจกรรมโรยตัวจากหน้าผาของน้ำตก
Similarity Score: 0.9999455

Laplacian Kernel
Attraction Name: น้ำตกธารรัตนา
Most Similar Review: มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน้ำมากในช่วงฤดูฝน มีกิจกรรมโรยตัวจากหน้าผาของน้ำตก
Similarity Score: 0.664967293380787

Linear Kernel
Attraction Name: หาดตะโละกาโปร์
Most Similar Review:  
Similarity Score: 34.086494

RBF Kernel
Attraction Name: น้ำตกธารรัตนา
Most Similar Review: มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน้ำมากในช่วงฤดูฝน มีกิจกรรมโรยตัวจากหน้าผาของน้ำตก
Similarity Score: 0.9986217486372876

Pairwise Kernel
Attraction Name: น้ำตกธารรัตนา
Most Similar Review: มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน้ำมากในช่วงฤดูฝน มีกิจกรรมโรยตัวจากหน้าผาของน้ำตก
Similarity Score: 0.9999455

=== Thai2Vec Model ===

Cosine Similarity
Attraction Name: สวนน้ำสวนสัตว์สงขลา
Most Simila

In [75]:

# สร้าง DataFrame เปล่าสำหรับเก็บผลลัพธ์
results_df = pd.DataFrame(columns=["Model", "Kernel", "Attraction Name", "Most Similar Review", "Similarity Score"])

# ฟังก์ชันช่วยเพิ่มข้อมูลในตาราง
def add_to_results(model, kernel, attraction_name, review, score):
    global results_df
    results_df = pd.concat([
        results_df,
        pd.DataFrame({
            "Model": [model],
            "Kernel": [kernel],
            "Attraction Name": [attraction_name],
            "Most Similar Review": [review],
            "Similarity Score": [score]
        })
    ], ignore_index=True)

# === TF-IDF Model ===
model = "TF-IDF"
# Cosine Similarity
tfidf_cosine_most_similar_index = np.argmax(tfidf_cosine_similarity_scores)
add_to_results(
    model, "Cosine Similarity",
    attractions_df.iloc[tfidf_cosine_most_similar_index]["place_name_th"],
    attractions_df.iloc[tfidf_cosine_most_similar_index]["introduction_th"],
    tfidf_cosine_similarity_scores[0, tfidf_cosine_most_similar_index]
)
# Laplacian Kernel
tfidf_laplacian_most_similar_index = np.argmax(laplacian_similarity_scores)
add_to_results(
    model, "Laplacian Kernel",
    attractions_df.iloc[tfidf_laplacian_most_similar_index]["place_name_th"],
    attractions_df.iloc[tfidf_laplacian_most_similar_index]["introduction_th"],
    laplacian_similarity_scores[0, tfidf_laplacian_most_similar_index]
)
# Linear Kernel
tfidf_linear_most_similar_index = np.argmax(linear_similarity_scores)
add_to_results(
    model, "Linear Kernel",
    attractions_df.iloc[tfidf_linear_most_similar_index]["place_name_th"],
    attractions_df.iloc[tfidf_linear_most_similar_index]["introduction_th"],
    linear_similarity_scores[0, tfidf_linear_most_similar_index]
)
# RBF Kernel
tfidf_rbf_most_similar_index = np.argmax(rbf_similarity_scores)
add_to_results(
    model, "RBF Kernel",
    attractions_df.iloc[tfidf_rbf_most_similar_index]["place_name_th"],
    attractions_df.iloc[tfidf_rbf_most_similar_index]["introduction_th"],
    rbf_similarity_scores[0, tfidf_rbf_most_similar_index]
)
# Pairwise Kernel
tfidf_pairwise_most_similar_index = np.argmax(pairwise_similarity_scores)
add_to_results(
    model, "Pairwise Kernel",
    attractions_df.iloc[tfidf_pairwise_most_similar_index]["place_name_th"],
    attractions_df.iloc[tfidf_pairwise_most_similar_index]["introduction_th"],
    pairwise_similarity_scores[0, tfidf_pairwise_most_similar_index]
)

# === Word2Vec Model ===
model = "Word2Vec"
# Cosine Similarity
w2v_cosine_most_similar_index = np.argmax(w2v_cosine_similarity_scores)
add_to_results(
    model, "Cosine Similarity",
    attractions_df.iloc[w2v_cosine_most_similar_index]["place_name_th"],
    attractions_df.iloc[w2v_cosine_most_similar_index]["introduction_th"],
    w2v_cosine_similarity_scores[0, w2v_cosine_most_similar_index]
)
# Laplacian Kernel
w2v_laplacian_most_similar_index = np.argmax(w2v_laplacian_similarity_scores)
add_to_results(
    model, "Laplacian Kernel",
    attractions_df.iloc[w2v_laplacian_most_similar_index]["place_name_th"],
    attractions_df.iloc[w2v_laplacian_most_similar_index]["introduction_th"],
    w2v_laplacian_similarity_scores[0, w2v_laplacian_most_similar_index]
)
# Linear Kernel
w2v_linear_most_similar_index = np.argmax(w2v_linear_similarity_scores)
add_to_results(
    model, "Linear Kernel",
    attractions_df.iloc[w2v_linear_most_similar_index]["place_name_th"],
    attractions_df.iloc[w2v_linear_most_similar_index]["introduction_th"],
    w2v_linear_similarity_scores[0, w2v_linear_most_similar_index]
)
# RBF Kernel
w2v_rbf_most_similar_index = np.argmax(w2v_rbf_similarity_scores)
add_to_results(
    model, "RBF Kernel",
    attractions_df.iloc[w2v_rbf_most_similar_index]["place_name_th"],
    attractions_df.iloc[w2v_rbf_most_similar_index]["introduction_th"],
    w2v_rbf_similarity_scores[0, w2v_rbf_most_similar_index]
)
# Pairwise Kernel
w2v_pairwise_most_similar_index = np.argmax(w2v_pairwise_similarity_scores)
add_to_results(
    model, "Pairwise Kernel",
    attractions_df.iloc[w2v_pairwise_most_similar_index]["place_name_th"],
    attractions_df.iloc[w2v_pairwise_most_similar_index]["introduction_th"],
    w2v_pairwise_similarity_scores[0, w2v_pairwise_most_similar_index]
)

# === Thai2Vec Model ===
# (ทำเหมือน Word2Vec)

# === Word2Vec + TF-IDF Model ===
# (ทำเหมือน Word2Vec)

# แสดงผลลัพธ์เป็นตาราง



  results_df = pd.concat([


In [76]:
from IPython.display import display
display(results_df)

Unnamed: 0,Model,Kernel,Attraction Name,Most Similar Review,Similarity Score
0,TF-IDF,Cosine Similarity,อ่างเก็บน้ำรัตนัย,ในบริเวณอ่างเก็บน้ำมีลักษณะคล้ายทะเลสาบ ในตอนเ...,0.388052
1,TF-IDF,Laplacian Kernel,หาดตะโละกาโปร์,,0.185349
2,TF-IDF,Linear Kernel,อ่างเก็บน้ำรัตนัย,ในบริเวณอ่างเก็บน้ำมีลักษณะคล้ายทะเลสาบ ในตอนเ...,0.388052
3,TF-IDF,RBF Kernel,หาดตะโละกาโปร์,,0.606531
4,TF-IDF,Pairwise Kernel,อ่างเก็บน้ำรัตนัย,ในบริเวณอ่างเก็บน้ำมีลักษณะคล้ายทะเลสาบ ในตอนเ...,0.388052
5,Word2Vec,Cosine Similarity,น้ำตกธารรัตนา,มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน...,0.999946
6,Word2Vec,Laplacian Kernel,น้ำตกธารรัตนา,มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน...,0.664967
7,Word2Vec,Linear Kernel,หาดตะโละกาโปร์,,34.086494
8,Word2Vec,RBF Kernel,น้ำตกธารรัตนา,มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน...,0.998622
9,Word2Vec,Pairwise Kernel,น้ำตกธารรัตนา,มีสภาพเป็นแก่งน้ำไหลตามหุบเขาในเทือกเขาใหญ่มีน...,0.999946
