데이터 수집 및 처리

In [13]:
import pandas as pd

# 예시 구매 이력 데이터 (이전 단계에서 생성한 데이터 사용)
purchase_data = pd.DataFrame({
    'user_id': ['user1', 'user1', 'user2', 'user2', 'user3', 'user3'],
    'product_id': ['product1', 'product2', 'product2', 'product4', 'product1', 'product5'],
    'purchase_date': pd.to_datetime(['2024-01-01', '2024-01-05', '2024-01-03', '2024-01-07', '2024-01-02', '2024-01-08']),
    'quantity': [1, 2, 1, 3, 1, 2]
})

# 데이터 전처리 함수 정의
def preprocess_data(df):
    # 결측값 처리 (예: 결측값을 0으로 채움)
    df.fillna(0, inplace=True)
    
    # 중복 데이터 제거
    df.drop_duplicates(inplace=True)
    
    return df

# 전처리된 구매 이력 데이터
purchase_data = preprocess_data(purchase_data)

print("Preprocessed Purchase Data:")
print(purchase_data)


Preprocessed Purchase Data:
  user_id product_id purchase_date  quantity
0   user1   product1    2024-01-01         1
1   user1   product2    2024-01-05         2
2   user2   product2    2024-01-03         1
3   user2   product4    2024-01-07         3
4   user3   product1    2024-01-02         1
5   user3   product5    2024-01-08         2


1. 데이터 구조 정의

In [15]:
class ListNode:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None

class HashTable:
    def __init__(self, size=100):
        self.size = size
        self.table = [None] * size

    def _hash(self, key):
        return hash(key) % self.size

    def insert(self, key, value):
        index = self._hash(key)
        if not self.table[index]:
            self.table[index] = ListNode(key, value)
        else:
            current = self.table[index]
            while current.next:
                current = current.next
            current.next = ListNode(key, value)

    def search(self, key):
        index = self._hash(key)
        current = self.table[index]
        while current:
            if current.key == key:
                return current.value
            current = current.next
        return None


2. 데이터 저장

In [17]:
# 구매 이력 데이터를 저장할 해시 테이블 생성
purchase_history = HashTable()

# 구매 이력 데이터를 해시 테이블에 저장
for _, row in purchase_data.iterrows():
    user_id = row['user_id']
    product_id = row['product_id']
    quantity = row['quantity']
    if purchase_history.search(user_id):
        purchase_history.search(user_id).append((product_id, quantity))
    else:
        purchase_history.insert(user_id, [(product_id, quantity)])

# 해시 테이블의 데이터 확인
for index, node in enumerate(purchase_history.table):
    if node:
        current = node
        print(f"Index {index}: ", end="")
        while current:
            print(f"({current.key}, {current.value}) -> ", end="")
            current = current.next
        print("None")


Index 7: (user1, [('product1', 1), ('product2', 2)]) -> None
Index 21: (user2, [('product2', 1), ('product4', 3)]) -> None
Index 65: (user3, [('product1', 1), ('product5', 2)]) -> None


3. 유사도 계산

In [19]:
def calculate_similarity(user1_data, user2_data):
    common_products = set([product for product, _ in user1_data]) & set([product for product, _ in user2_data])
    similarity = sum([min(dict(user1_data)[product], dict(user2_data)[product]) for product in common_products])
    return similarity

# 사용자 간의 유사도 계산
user_ids = [row['user_id'] for _, row in purchase_data.iterrows()]
user_ids = list(set(user_ids))

similarity_scores = {}
for i, user1 in enumerate(user_ids):
    for user2 in user_ids[i + 1:]:
        user1_data = purchase_history.search(user1)
        user2_data = purchase_history.search(user2)
        if user1_data and user2_data:
            similarity = calculate_similarity(user1_data, user2_data)
            similarity_scores[(user1, user2)] = similarity

# 유사도 점수 출력
print("\nUser Similarity Scores:")
for (user1, user2), score in similarity_scores.items():
    print(f"{user1} <-> {user2}: {score}")



User Similarity Scores:
user2 <-> user1: 1
user2 <-> user3: 0
user1 <-> user3: 1


4. 정렬 알고리즘 추가 (삽입 정렬 사용)

In [21]:
# 삽입 정렬 함수 정의
def insertion_sort(items):
    for i in range(1, len(items)):
        key_item = items[i]
        j = i - 1
        while j >= 0 and items[j][1] < key_item[1]:
            items[j + 1] = items[j]
            j -= 1
        items[j + 1] = key_item
    return items


5. 큐(Queue)를 사용한 유사 사용자 탐색

In [23]:
from collections import deque

# 큐를 사용한 유사 사용자 탐색
def bfs_similarity(user_id, similarity_scores):
    visited = set()
    queue = deque([user_id])
    similar_users = []

    while queue:
        current_user = queue.popleft()
        if current_user not in visited:
            visited.add(current_user)
            for (user1, user2), score in similarity_scores.items():
                if user1 == current_user and user2 not in visited:
                    queue.append(user2)
                    similar_users.append(user2)
                elif user2 == current_user and user1 not in visited:
                    queue.append(user1)
                    similar_users.append(user1)
    return similar_users


6. 다익스트라 알고리즘 사용

In [25]:
import heapq

def dijkstra_similarity(user_id, similarity_scores):
    distances = {user: float('inf') for user in user_ids}
    distances[user_id] = 0
    priority_queue = [(0, user_id)]
    
    while priority_queue:
        current_distance, current_user = heapq.heappop(priority_queue)
        
        if current_distance > distances[current_user]:
            continue
        
        for (user1, user2), score in similarity_scores.items():
            if user1 == current_user:
                neighbor = user2
            elif user2 == current_user:
                neighbor = user1
            else:
                continue
            
            distance = current_distance + (1 - score)  # 유사도 점수를 거리로 변환
            if distance < distances[neighbor]:
                distances[neighbor] = distance
                heapq.heappush(priority_queue, (distance, neighbor))
    
    return distances


7. 백트래킹을 사용한 최적의 추천 상품 조합 찾기

In [27]:
def find_best_combination(products, target, current_combination=[], best_combination=[]):
    if sum([quantity for _, quantity in current_combination]) == target:
        if len(current_combination) > len(best_combination):
            best_combination[:] = current_combination[:]
        return
    
    for product in products:
        if product not in current_combination:
            current_combination.append(product)
            find_best_combination(products, target, current_combination, best_combination)
            current_combination.pop()

    return best_combination


8. 추천 시스템 구현 (머신러닝 알고리즘 사용)

In [29]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder

def train_recommendation_model(purchase_data):
    X = []
    y = []
    label_encoder_user = LabelEncoder()
    label_encoder_product = LabelEncoder()
    user_ids_encoded = label_encoder_user.fit_transform(purchase_data['user_id'])
    product_ids_encoded = label_encoder_product.fit_transform(purchase_data['product_id'])
    for i in range(len(user_ids_encoded)):
        X.append([user_ids_encoded[i], product_ids_encoded[i]])
        y.append(purchase_data['quantity'].iloc[i])
    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(X, y)
    return model, label_encoder_user, label_encoder_product

# 머신러닝 모델 훈련
recommendation_model, user_encoder, product_encoder = train_recommendation_model(purchase_data)

def recommend_products(user_id, purchase_history, similarity_scores, recommendation_model, user_encoder, product_encoder):
    similar_users = bfs_similarity(user_id, similarity_scores)
    distances = dijkstra_similarity(user_id, similarity_scores)
    recommendations = []
    
    for similar_user in similar_users:
        if similar_user != user_id:
            user_data = purchase_history.search(similar_user)
            if user_data:
                recommendations.extend(user_data)
    
    sorted_recommendations = insertion_sort(recommendations)
    best_combination = find_best_combination(sorted_recommendations, target_quantity)
    
    user_id_encoded = user_encoder.transform([user_id])[0]
    product_ids_encoded = [product_encoder.transform([product])[0] for product, _ in best_combination]
    X_new = [[user_id_encoded, product_id] for product_id in product_ids_encoded]
    quantities = recommendation_model.predict(X_new)
    
    recommended_products = [product_encoder.inverse_transform([product_id])[0] for product_id in product_ids_encoded]
    
    return recommended_products

# 사용자 'user1'에 대한 추천 상품 생성
user_id = 'user1'
target_quantity = 5
recommendations = recommend_products(user_id, purchase_history, similarity_scores, recommendation_model, user_encoder, product_encoder)

print(f"\nRecommendations for {user_id}: {recommendations}")



Recommendations for user1: ['product4', 'product2', 'product1']
