In [21]:
import pandas as pd
import openpyxl
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

In [22]:
file_path =  r'C:\Users\Multi 03\Desktop\code\reviewdata.xlsx'
review_data = pd.read_excel(file_path)

In [42]:
stop_words_ko = [
    "수", "것", "들", "점", "등", "더", "이", "그", "저", 
    "때", "거", "왜", "이런", "저런", "그런", "너무", "정말", 
    "진짜", "좀", "많이", "안", "못", "매우", "아주"
]


In [43]:
def preprocess_text(text):
    # Remove special characters and lowercase
    text = re.sub(r'[^a-zA-Z가-힣\s]', '', str(text))
    text = text.lower()
    # Tokenize and remove stop words
    tokens = [word for word in text.split() if word not in stop_words_ko]
    return tokens

In [44]:
review_data['Processed_Text'] = review_data['Review'].fillna('').astype(str) + ' ' + review_data['Menu'].fillna('')
review_data['Tokens'] = review_data['Processed_Text'].apply(preprocess_text)

In [45]:
sentences = review_data['Tokens'].tolist()

In [46]:
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

In [47]:
def text_to_vector(tokens, model):
    vectors = [model.wv[token] for token in tokens if token in model.wv]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(model.vector_size)

In [48]:
review_data['Vector'] = review_data['Tokens'].apply(lambda x: text_to_vector(x, model))

In [49]:
def find_similar_words(word, top_n=5):
    if word not in model.wv:
        return f"The word '{word}' is not in the vocabulary."
    similar_words = model.wv.most_similar(word, topn=top_n)
    return similar_words

In [108]:
def recommend_similar_menu_with_restaurant(menu_name, top_n=5):
    # Find the vector for the given menu
    menu_index = review_data[review_data['Menu'].str.contains(menu_name, na=False)].index
    if len(menu_index) == 0:
        return f"No menu found matching: {menu_name}"
    
    menu_vector = review_data.loc[menu_index[0], 'Vector']
    
    # Calculate cosine similarity with all other menus
    similarities = cosine_similarity([menu_vector], np.stack(review_data['Vector'].values))
    
    # Get indices of top N most similar menus (excluding the input menu itself)
    similar_indices = similarities[0].argsort()[-top_n-1:-1][::-1]
    
    # Retrieve recommended menus with restaurant information
    recommendations = review_data.iloc[similar_indices][['Restaurant', 'Menu', 'Review']]
    return recommendations

In [112]:
similar_words_example = find_similar_words("돼지")
print("Words similar to '냉면':", similar_words_example)

Words similar to '냉면': [('했고', 0.9923644065856934), ('간', 0.9899376034736633), ('반찬', 0.9896684288978577), ('밥에', 0.9894627332687378), ('가게에', 0.989234983921051)]


In [110]:
similar_menus_with_restaurants = recommend_similar_menu_with_restaurant("물냉면")
print("Menus similar to '물냉면' with restaurant information:")
print(similar_menus_with_restaurants)

Menus similar to '물냉면' with restaurant information:
       Restaurant                                               Menu  \
65934  교촌치킨-미장수송점                                허니콤보/1(세트 선택(선택안함))   
31691       손주연푸드  우엉김밥/2(초장 추가선택(추가안함)),손주연김밥/2(초장 추가선택(추가안함)),김...   
7522    카페인중독-군산점                               국물떡볶이（달콤）/1,청사과에이드/1   
37758     형제족발&보쌈                                  형제보쌈/1(사이즈 선택(小))   
45656       한우네곱창                           곱창＋막창＋갈비＋음료수/1(맛 선택(양념))   

                                                  Review  
65934  여기는 매번 기본 1시간은 걸리는 거 같아요.. 맛은 맛있으나 치킨먹고싶어서 시켰는...  
31691  사장님~~~ 제가 너무너무 좋아하는집에 우엉김밥인데~~쬐끔 짜용~~ 한번도 안그랫는...  
7522   전에 여기 떡볶이가 넘 맛잇엇던 기억이 잇어서 다이어트 끝나자마자 바로 주문햇습니다...  
37758  맛있어요~~~ 다음날 상온에 두고 먹었는데 냄새 안나고 부드럽고 기름 굳은것도 없고...  
45656  요기요에선 왜 별점이 낮은지 모르겠어요. ㅂㅁ은 괜찮던데.. 솔직히 군산 탑이에요....  


In [21]:
def recommend_restaurants_for_similar_words(base_word, top_n=5):
    if base_word not in model.wv:
        return f"The word '{base_word}' is not in the vocabulary."
    
    # Find top N most similar words
    similar_words = model.wv.most_similar(base_word, topn=top_n)
    similar_words_list = [word for word, _ in similar_words]
    
    # Filter menus containing similar words
    matched_menus = review_data[review_data['Tokens'].apply(lambda tokens: any(word in tokens for word in similar_words_list))]
    
    # Retrieve restaurants and menus
    recommendations = matched_menus[['Restaurant', 'Menu', 'Review']]
    return recommendations

In [None]:
restaurants_for_similar_words = recommend_restaurants_for_similar_words("냉면")

# Display the results
print("Restaurants and menus for words similar to '냉면':")
print(restaurants_for_similar_words)

Restaurants and menus for words similar to '냉면':
       Restaurant                                               Menu  \
4561   점프JUMP-나운점  ＋과일잼 와플 ＋/1(와플 선택(딸기잼 와플),생크림 선택(생크림 x),Waffle...   
4562   점프JUMP-나운점  ＋과일잼 와플 ＋/2(와플 선택(블루베리잼 와플),생크림 선택(생크림 기본)),카라...   
4566   점프JUMP-나운점  유산균 요거트 점프치노Yogurt/1(메뉴 선택(딸기 요거트 점프치노)),[한정판]...   
4567   점프JUMP-나운점  유산균 요거트 점프치노Yogurt/1(메뉴 선택(애플망고 요거트 점프치노)),[한정...   
4605   점프JUMP-나운점  수제 후랑크 독일소세지/1(Sausage 추가선택(케찹)),과일주스 Fruits J...   
...           ...                                                ...   
66245  쉬즈베이글-군산대점  콰트로불고기플러스 토스트/1(빵 변경 선택(변경 안함)),콰트로핫새우플러스 토스트/...   
66246  쉬즈베이글-군산대점  콰트로베이컨플러스 토스트/1(빵 변경 선택(변경 안함)),크림치즈베이글/1,바닐라라...   
66247  쉬즈베이글-군산대점  콰트로불고기플러스 토스트/1(빵 변경 선택(베이글로 변경),토핑 추가(포테이토 추가...   
66249  쉬즈베이글-군산대점  콰트로불고기플러스 토스트/1(빵 변경 선택(베이글로 변경),토핑 추가(포테이토 추가...   
66251  쉬즈베이글-군산대점  콰트로불고기플러스 토스트/1(빵 변경 선택(베이글로 변경)),포테이토치즈베이글/1(...   

                                                  Review  
4561                       

In [54]:

def recommend_specific_menu_and_restaurants(base_word, top_n=5):
    if base_word not in model.wv:
        return f"The word '{base_word}' is not in the vocabulary."

    similar_words = model.wv.most_similar(base_word, topn=top_n)

    results = []
    for similar_word, similarity_score in similar_words: #요기서 불용 명사 제거, 조사 제거
        matching_rows = review_data[review_data['Tokens'].apply(lambda tokens: similar_word in tokens)]
        
        for _, row in matching_rows.iterrows():
            results.append({
                "Similar Word": similar_word,
                "Similarity Score": similarity_score,
                "Restaurant": row['Restaurant'],
                "Menu": row['Menu'],
                "Review": row['Review']
            })

    results_df = pd.DataFrame(results)
    

    results_df = results_df.drop_duplicates(subset=["Similar Word", "Restaurant"])
    
    results_df = results_df.sort_values(by="Similarity Score", ascending=False)
    
    return results_df

specific_menu_recommendations = recommend_specific_menu_and_restaurants("물냉면")


print("Recommendations for menus and restaurants similar to '냉면':")
print(specific_menu_recommendations.to_string(index=False))


Recommendations for menus and restaurants similar to '냉면':
Similar Word  Similarity Score       Restaurant                                                                                                                                    Menu                                                                                                                                            Review
          열무          0.983786            맛남-본점                                                                                                                      얼큰수제비만두국/1,수제돈까스/1                                                         깍두기의 열무 짱 아삭아삭 반찬가게인줄,,, 얼큰수제비는 전에 먹었을때보다 자극적이지 않고 좋았어요 얼큰이라 맵고 강했는데 저는 오늘이 부담스럽지 않고 좋았어요
          열무          0.983786     메밀냉면&굴구이통삼겹살 둘이 먹을꺼야 세트（간장 수육）/1(냉면 선택 1(메밀 물냉면),냉면 선택 2(열무 물 냉면),만두 선택 1(고기만두),만두 선택 2(김치만두),양념장 선택 1(보통 양념장),양념장 선택 2(보통 양념장),물 육수 추가 선택(물 육수 추가))                                                                                                  

In [41]:
# Example usage: Find menus and restaurants similar to "냉면"
specific_menu_recommendations = recommend_specific_menu_and_restaurants("짜장면")

# Display the results
print("Recommendations for menus and restaurants similar to '냉면':")
print(specific_menu_recommendations.to_string(index=False))


Recommendations for menus and restaurants similar to '냉면':
Similar Word  Similarity Score        Restaurant                                                                                                                                                                                                                                                                                                                                                                                                                                       Menu                                                                                                                                                                                                                                                                                                      Review
         짬뽕이          0.989952        국수나무-군산나운점                                                                                                                     

JVMNotFoundException: No JVM shared library file (jvm.dll) found. Try setting up the JAVA_HOME environment variable properly.

In [None]:
# Function to recommend menus and restaurants based on menu names (not review tokens)
def recommend_specific_menu_and_restaurants_from_menu(base_menu_name, top_n=5):
    if base_menu_name not in model.wv:
        return f"The menu name '{base_menu_name}' is not in the vocabulary."
    
    # Find top N most similar words (menu names) to the base menu name
    similar_menus = model.wv.most_similar(base_menu_name, topn=top_n)
    
    # Prepare results with similarity scores and matching restaurant info
    results = []
    for similar_menu, similarity_score in similar_menus:
        # Filter rows where the similar menu appears in the "Menu" column
        matching_rows = review_data[review_data['Menu'].str.contains(similar_menu, na=False)]
        
        for _, row in matching_rows.iterrows():
            results.append({
                "Similar Menu Name": similar_menu,
                "Similarity Score": similarity_score,
                "Restaurant": row['Restaurant'],
                "Menu": row['Menu'],
                "Review": row['Review']
            })
    
    # Convert to a DataFrame for better readability
    results_df = pd.DataFrame(results)
    return results_df

# Example usage: Find menus and restaurants similar to "물냉면"
specific_menu_recommendations = recommend_specific_menu_and_restaurants_from_menu("물냉면")

# Display the results
print("Menu and Restaurant Recommendations (From Menu Names):")
print(specific_menu_recommendations.to_string(index=False))


Menu and Restaurant Recommendations (From Menu Names):
Similar Menu Name  Similarity Score       Restaurant                                                                                                                                                                                                                                                                                                                                                                                                                                                    Menu                                                                                                                                                                                                                                                 Review
               반한          0.993313        카페인중독-군산점                                                                                                                                                         

In [7]:
# Step 1: Load the data
import pandas as pd
import re

# Load the Excel file
file_path = 'reviewdata.xlsx'  # Ensure this path points to your actual file location
review_data = pd.read_excel(file_path)

# Clean the Corrected_Food_Names column
def extract_nouns_simple(menu_list):
    cleaned_menu = []
    for menu in menu_list:
        menu = re.sub(r'[^\w\s]', '', menu)  # Remove unwanted characters
        words = re.findall(r'[가-힣]+', menu)  # Extract Hangul words
        cleaned_menu.extend(words)
    return cleaned_menu

review_data['Cleaned_Nouns'] = review_data['Corrected_Food_Names'].apply(
    lambda x: extract_nouns_simple(eval(x)) if isinstance(x, str) else []
)

# Step 2: Define the recommendation function
def recommend_specific_menu_and_restaurants(keyword):
    # Filter data where the keyword is in 'Cleaned_Nouns'
    related_data = review_data[review_data['Cleaned_Nouns'].apply(lambda nouns: keyword in nouns)]
    
    # Analyze and summarize the most frequent menus and restaurants
    top_menus = related_data['Menu'].value_counts().head(10)
    top_restaurants = related_data['Restaurant'].value_counts().head(10)
    
    # Create a DataFrame for results
    recommendations = pd.DataFrame({
        'Menu': top_menus.index,
        'Menu Frequency': top_menus.values,
        'Restaurant': top_restaurants.index,
        'Restaurant Frequency': top_restaurants.values
    })
    return recommendations

# Step 3: Use the function
specific_menu_recommendations = recommend_specific_menu_and_restaurants("족발")

# Step 4: Display the results
print("Recommendations for menus and restaurants similar to '냉면':")
print(specific_menu_recommendations.to_string(index=False))


ValueError: All arrays must be of the same length