In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.impute import SimpleImputer

global raw_restaurant_dt
# 1. 데이터 로드 및 전처리
def load_and_preprocess(file_path_user, file_paths_restaurant):
   
    user_data = pd.read_csv(file_path_user)
    restaurant_data = pd.read_csv(file_paths_restaurant)

    # 사용자와 식당의 ID 추출
    user_ids = user_data['nickname'].tolist()
    restaurant_ids = restaurant_data['가게명'].tolist()

    # 숫자형 데이터 추출
    user_data_numeric = user_data.select_dtypes(include=['float64', 'int64'])
    restaurant_data_numeric = restaurant_data.select_dtypes(include=['float64', 'int64'])

    # 결측치 처리
    imputer = SimpleImputer(strategy='mean')
    user_data_imputed = imputer.fit_transform(user_data_numeric)
    restaurant_data_imputed = imputer.fit_transform(restaurant_data_numeric)

    # 비율 계산 (빈도수를 전체 합으로 나눔)
    restaurant_data_proportions = restaurant_data_imputed / restaurant_data_imputed.sum(axis=1, keepdims=True)

    # 정규화
    scaler = StandardScaler()
    user_data_scaled = scaler.fit_transform(user_data_imputed)
    restaurant_data_scaled = scaler.fit_transform(restaurant_data_proportions)

    return user_data_scaled, restaurant_data_scaled, user_ids, restaurant_ids, restaurant_data

# 2. PCA 수행
def perform_pca(data, n_components=3):
   
    pca = PCA(n_components=n_components)
    return pca.fit_transform(data)

# 3. 유사도 계산
def calculate_recommendation_scores(user_features, restaurant_features, weights=None):
    
    if weights is not None:
        # 가중치 적용
        weights = np.array(weights)
        restaurant_features = restaurant_features * weights
    return cosine_similarity(user_features, restaurant_features)


# 4. 추천 리스트 생성
def generate_recommendations(scores, user_ids, restaurant_ids, top_n=5):
    
    recommendations = []
    for i, user_id in enumerate(user_ids):
        top_indices = scores[i].argsort()[::-1][:top_n]
        top_restaurants = [restaurant_ids[j] for j in top_indices]
        top_scores = [scores[i][j] for j in top_indices]
        recommendations.append({"user_id": user_id, "recommended_restaurants": top_restaurants, "scores": top_scores})
    return pd.DataFrame(recommendations)

# 5. 결과 출력 함수
def print_recommendations(recommendations):
   
    for _, row in recommendations.iterrows():
        print(f"User ID: {row['user_id']}")
        print("Recommended Restaurants and Scores:")
        for restaurant, score in zip(row['recommended_restaurants'], row['scores']):
            print(f"  - {restaurant}: {score:.4f}")
        print()
        
def main(file_path_user, file_paths_restaurant, n_components=3, top_n=5):
    
    # 데이터 로드 및 전처리
    user_data, restaurant_data, user_ids, restaurant_ids, raw_restaurant_data = load_and_preprocess(
        file_path_user, file_paths_restaurant
    )

    # PCA 수행
    user_features = perform_pca(user_data, n_components)
    restaurant_features = perform_pca(restaurant_data, n_components)

    # 유사도 계산
    scores = calculate_recommendation_scores(user_features, restaurant_features)

    # 추천 결과 생성 및 출력
    recommendations = generate_recommendations(scores, user_ids, restaurant_ids, top_n)
    print_recommendations(recommendations)


# 실행
user_file_path="dacos_tagging_adjusted_sorted.csv"
restaurant_file_paths="combined_restaurant_data.csv"
main(user_file_path, restaurant_file_paths, n_components=3, top_n=5)

User ID: 369369z
Recommended Restaurants and Scores:
  - 짚신매운갈비찜 숙대점: 0.9886
  - 쌍대포소금구이 본점: 0.9879
  - 상록수: 0.9867
  - 원동미나리삼겹살: 0.9839
  - 청기와타운 남영점: 0.9834

User ID: A8뜨뚜
Recommended Restaurants and Scores:
  - 구복만두: 0.9913
  - 옛날감자전: 0.9902
  - 품계: 0.9896
  - 굴다리소곱창: 0.9862
  - 버거인: 0.9831

User ID: Chiabata
Recommended Restaurants and Scores:
  - 네코노스시: 0.9801
  - 베스트프렌드: 0.9781
  - 부암동치킨: 0.9636
  - 비일: 0.9628
  - 나폴리키친: 0.9590

User ID: DEJAVU
Recommended Restaurants and Scores:
  - 별진화로구이: 0.9745
  - 홍짜장 숙대점: 0.9444
  - 두리식당: 0.9381
  - 김밥2000: 0.9356
  - 대관령목장: 0.9193

User ID: FromA
Recommended Restaurants and Scores:
  - 하이퐁가든: 0.9683
  - 솔티드 스모크: 0.9192
  - 윤지식당: 0.8751
  - 홍곱창 숙명여대점: 0.8605
  - 별진화로구이: 0.8425

User ID: Junns
Recommended Restaurants and Scores:
  - 마시앤바시: 0.9526
  - 킷테: 0.8714
  - 사이공마켓 숙대점: 0.6843
  - 스타벅스 숙대점: 0.6206
  - 스택빈 숙대점: 0.6085

User ID: Reenfldos
Recommended Restaurants and Scores:
  - 멘타미: 0.9428
  - 오복함흥냉면: 0.9093
  - 온센 용산구점: 0.9089
  - 품계: 0