In [38]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, manhattan_distances 
from tabulate import tabulate

class RestaurantRecommender:
    def __init__(self, data_path):
        self.data = pd.read_csv(data_path, encoding='cp949')

    def preprocess_data(self):
        self.data['메뉴'] = self.data['메뉴'].apply(lambda x: ''.join(x))

    def calculate_similarity(self):
        count_vec = CountVectorizer(min_df=0, ngram_range=(1, 2))
        self.menu_mat = count_vec.fit_transform(self.data['메뉴'])
        self.menu_sim = cosine_similarity(self.menu_mat, self.menu_mat)

    def weighted_vote_average(self, record, m, C):
        v = record['별점 수']
        R = record['별점']
        return ((v / (v + m)) * R) + ((m / (m + v)) * C)

    def calculate_weighted_ratings(self, percentile):
        m = self.data['별점 수'].quantile(percentile)
        C = self.data['별점'].mean()
        self.data['가중 별점'] = self.data.apply(self.weighted_vote_average, axis=1, args=(m, C))

    def find_similar_restaurant(self, title_name, top_n=10):
        self.rest_menu = self.data[self.data['업소명'] == title_name]
        self.rest_index = self.rest_menu.index.values
        self.data["유사도"] = self.menu_sim[self.rest_index, :].reshape(-1, 1)
        self.pre_temp = self.data.sort_values(by=["유사도", "가중 별점"], ascending=False)
        temp = self.pre_temp[self.pre_temp.index.values != self.rest_index]
        final_index = temp.index.values[:top_n]
        return self.data.iloc[final_index]

    def display_recommendations(self, recommendations):
        sim_rest_df = pd.DataFrame(recommendations[['업소명', '별점', '유사도']])
        # print("메뉴 유사도-가중 평점 고려 식당 추천")
        # print(tabulate(sim_rest_df, headers='keys', tablefmt='fancy_outline'))

    def evaluate__similarity(self, title_name):
        # 입력 식당의 메뉴, 식당명을 따로 추출
        # 비교 식당들의 메뉴, 식당명을 따로 추출
        self.rest_menu = self.data[self.data['업소명'] == title_name]
        print(self.rest_menu['업소명'])
        print(self.rest_menu['메뉴'])
        
        self.rest_index = self.rest_menu.index.values
        self.data["유사도"] = self.menu_sim[self.rest_index, :].reshape(-1, 1)
        self.pre_temp = self.data.sort_values(by=["유사도"], ascending=False)
        print(self.pre_temp["유사도"][:10])
        # # Calculate precision, recall, and F1 score
        # precision = precision_score(true_labels, predicted_labels)
        # recall = recall_score(true_labels, predicted_labels)
        # f1 = f1_score(true_labels, predicted_labels)

        # print("Precision:", precision)
        # print("Recall:", recall)
        # print("F1 Score:", f1)

In [39]:
if __name__ == '__main__':
    recommender = RestaurantRecommender('./source.csv')
    recommender.preprocess_data()
    recommender.calculate_similarity()
    recommender.calculate_weighted_ratings(0.6)
    similar_restaurants = recommender.find_similar_restaurant('충만치킨', 10)
    recommender.display_recommendations(similar_restaurants)
    recommender.evaluate__similarity("충만치킨")


27    충만치킨
Name: 업소명, dtype: object
27    후라이드치킨, 양념치킨, 간장치킨, 어니언치킨
Name: 메뉴, dtype: object
27     1.000000
74     0.714286
41     0.676123
102    0.585540
96     0.571429
90     0.503953
81     0.458349
66     0.428571
88     0.285714
68     0.251976
Name: 유사도, dtype: float64


In [10]:
def precision_at_k(true_items, predicted_items, k):
    """
    Calculate Precision@k.

    Parameters:
    - true_items: List of true items (ground truth)
    - predicted_items: List of predicted items
    - k: Number of top items to consider

    Returns:
    - Precision@k
    """
    if k <= 0:
        raise ValueError("k must be a positive integer.")

    # Take only the top-k predicted items
    predicted_items_at_k = predicted_items[:k]

    # Calculate the intersection between true_items and predicted_items_at_k
    intersection = set(true_items) & set(predicted_items_at_k)

    # Calculate Precision@k
    precision = len(intersection) / min(k, len(predicted_items))

    return precision

# Example usage:
true_items = [1, 2, 3, 4, 5]
predicted_items = [2, 4, 6, 8, 10]

k = 3
precision_at_k_value = precision_at_k(true_items, predicted_items, k)

print(f'Precision@{k}: {precision_at_k_value}')



Precision: 0.5
Recall: 1.0
F1 Score: 0.6666666666666666
