In [None]:
import matplotlib
print(matplotlib.__version__)

3.7.5


In [45]:
# 필요한 라이브러리 임포트
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report, confusion_matrix
import ast
from tqdm import tqdm
from collections import Counter

# 모델 정의
class CocktailRecommendationModel(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(CocktailRecommendationModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.batch_norm1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.batch_norm2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# GPU 사용 여부 확인 및 CPU로 강제 설정
device = torch.device("cpu")
print(f"Using device: {device}")

# 데이터 로드 및 전처리
file_path = "final_cocktails.csv"
data = pd.read_csv(file_path)

# 데이터 확인 (필요시 주석 해제)
# print("Available cocktails in the dataset:")
# print(data['category'].unique())

# 'ingredients' 데이터를 리스트로 변환하고 텍스트로 병합
data['ingredients'] = data['ingredients'].apply(ast.literal_eval)
data['ingredients_text'] = data['ingredients'].apply(lambda x: ' '.join(x))

# TF-IDF 벡터화
vectorizer = TfidfVectorizer(max_features=100)
X = vectorizer.fit_transform(data['ingredients_text']).toarray()

# 'category' 레이블 인코딩
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['category'])

# 클래스 불균형 확인
class_counts = Counter(y)
print("Class counts before SMOTE:", class_counts)

# 클래스 불균형 해결 (SMOTE)
min_samples = min(class_counts.values())
smote = SMOTE(random_state=42, k_neighbors=min(min_samples - 1, 5))
X_resampled, y_resampled = smote.fit_resample(X, y)

# 데이터 정규화
scaler = StandardScaler()
X_resampled = scaler.fit_transform(X_resampled)

# 학습/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# 텐서 변환 및 DataLoader 생성
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# 모델 초기화
input_size = 100
hidden_size1 = 256
hidden_size2 = 128
output_size = len(np.unique(y_resampled))
model = CocktailRecommendationModel(input_size=input_size, hidden_size1=hidden_size1, hidden_size2=hidden_size2, output_size=output_size).to(device)

# 손실 함수와 최적화기
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# 학습 루프
num_epochs = 100
losses = []

for epoch in range(num_epochs):
    model.train()  # 학습 모드 활성화
    running_loss = 0.0
    for batch_X, batch_y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        # 순전파
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        # 역전파 및 가중치 업데이트
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    losses.append(running_loss / len(train_loader))
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

    # 10 에포크마다 모델 저장
    if (epoch + 1) % 10 == 0:
        interim_save_path = os.path.join("models", f"cocktail_model_epoch{epoch+1}.pth")
        os.makedirs("models", exist_ok=True)
        torch.save(model.state_dict(), interim_save_path)
        print(f"Model saved at epoch {epoch+1}")

# 평가 루프
model.eval()  # 평가 모드 활성화
correct = 0
total = 0
all_predictions = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        predictions = torch.argmax(outputs, dim=1)
        correct += (predictions == batch_y).sum().item()
        total += batch_y.size(0)
        all_predictions.extend(predictions.cpu().numpy())

accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# 혼동 행렬 및 분류 리포트 출력
print("Confusion Matrix:")
print(confusion_matrix(y_test_tensor.cpu(), np.array(all_predictions)))
print("\nClassification Report:")
print(classification_report(y_test_tensor.cpu(), np.array(all_predictions), target_names=label_encoder.classes_))

# 최종 모델 저장
final_save_path = os.path.join("models", "cocktail_model.pth")
torch.save(model.state_dict(), final_save_path)
print(f"Final model saved to {final_save_path}")

Using device: cpu
Class counts before SMOTE: Counter({5: 192, 1: 100, 9: 33, 7: 24, 6: 23, 3: 23, 0: 7, 8: 7, 10: 6, 4: 5, 2: 5})


Epoch 1/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 123.54it/s]


Epoch [1/100], Loss: 1.2934


Epoch 2/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 174.16it/s]


Epoch [2/100], Loss: 0.4983


Epoch 3/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 188.40it/s]


Epoch [3/100], Loss: 0.3057


Epoch 4/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 206.88it/s]


Epoch [4/100], Loss: 0.2144


Epoch 5/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 248.01it/s]


Epoch [5/100], Loss: 0.1611


Epoch 6/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 195.27it/s]


Epoch [6/100], Loss: 0.1301


Epoch 7/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 192.44it/s]


Epoch [7/100], Loss: 0.1136


Epoch 8/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 200.36it/s]


Epoch [8/100], Loss: 0.0956


Epoch 9/100: 100%|███████████████████████████| 106/106 [00:00<00:00, 243.78it/s]


Epoch [9/100], Loss: 0.0880


Epoch 10/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 197.31it/s]


Epoch [10/100], Loss: 0.0798
Model saved at epoch 10


Epoch 11/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 209.10it/s]


Epoch [11/100], Loss: 0.0702


Epoch 12/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 218.34it/s]


Epoch [12/100], Loss: 0.0709


Epoch 13/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 213.16it/s]


Epoch [13/100], Loss: 0.0615


Epoch 14/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 217.06it/s]


Epoch [14/100], Loss: 0.0513


Epoch 15/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 212.70it/s]


Epoch [15/100], Loss: 0.0560


Epoch 16/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 256.61it/s]


Epoch [16/100], Loss: 0.0587


Epoch 17/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 177.59it/s]


Epoch [17/100], Loss: 0.0489


Epoch 18/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 201.25it/s]


Epoch [18/100], Loss: 0.0447


Epoch 19/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 193.24it/s]


Epoch [19/100], Loss: 0.0501


Epoch 20/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 191.93it/s]


Epoch [20/100], Loss: 0.0490
Model saved at epoch 20


Epoch 21/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 193.89it/s]


Epoch [21/100], Loss: 0.0493


Epoch 22/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 199.81it/s]


Epoch [22/100], Loss: 0.0604


Epoch 23/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 235.49it/s]


Epoch [23/100], Loss: 0.0458


Epoch 24/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 197.86it/s]


Epoch [24/100], Loss: 0.0440


Epoch 25/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 177.64it/s]


Epoch [25/100], Loss: 0.0407


Epoch 26/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 174.96it/s]


Epoch [26/100], Loss: 0.0471


Epoch 27/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 173.32it/s]


Epoch [27/100], Loss: 0.0437


Epoch 28/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 169.83it/s]


Epoch [28/100], Loss: 0.0465


Epoch 29/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 173.97it/s]


Epoch [29/100], Loss: 0.0403


Epoch 30/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 198.69it/s]


Epoch [30/100], Loss: 0.0455
Model saved at epoch 30


Epoch 31/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 171.11it/s]


Epoch [31/100], Loss: 0.0330


Epoch 32/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 164.54it/s]


Epoch [32/100], Loss: 0.0361


Epoch 33/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 172.26it/s]


Epoch [33/100], Loss: 0.0359


Epoch 34/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 161.91it/s]


Epoch [34/100], Loss: 0.0345


Epoch 35/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 178.99it/s]


Epoch [35/100], Loss: 0.0422


Epoch 36/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 162.05it/s]


Epoch [36/100], Loss: 0.0342


Epoch 37/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 175.28it/s]


Epoch [37/100], Loss: 0.0328


Epoch 38/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 179.55it/s]


Epoch [38/100], Loss: 0.0351


Epoch 39/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 168.61it/s]


Epoch [39/100], Loss: 0.0353


Epoch 40/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 165.18it/s]


Epoch [40/100], Loss: 0.0434
Model saved at epoch 40


Epoch 41/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 159.86it/s]


Epoch [41/100], Loss: 0.0361


Epoch 42/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 149.43it/s]


Epoch [42/100], Loss: 0.0362


Epoch 43/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 178.99it/s]


Epoch [43/100], Loss: 0.0371


Epoch 44/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 149.66it/s]


Epoch [44/100], Loss: 0.0313


Epoch 45/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 190.78it/s]


Epoch [45/100], Loss: 0.0340


Epoch 46/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 167.03it/s]


Epoch [46/100], Loss: 0.0323


Epoch 47/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 151.10it/s]


Epoch [47/100], Loss: 0.0338


Epoch 48/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 271.99it/s]


Epoch [48/100], Loss: 0.0372


Epoch 49/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 262.23it/s]


Epoch [49/100], Loss: 0.0334


Epoch 50/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 276.79it/s]


Epoch [50/100], Loss: 0.0342
Model saved at epoch 50


Epoch 51/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 187.73it/s]


Epoch [51/100], Loss: 0.0305


Epoch 52/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 182.47it/s]


Epoch [52/100], Loss: 0.0367


Epoch 53/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 151.89it/s]


Epoch [53/100], Loss: 0.0332


Epoch 54/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 154.16it/s]


Epoch [54/100], Loss: 0.0323


Epoch 55/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 181.58it/s]


Epoch [55/100], Loss: 0.0377


Epoch 56/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 166.99it/s]


Epoch [56/100], Loss: 0.0404


Epoch 57/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 173.89it/s]


Epoch [57/100], Loss: 0.0317


Epoch 58/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 198.58it/s]


Epoch [58/100], Loss: 0.0375


Epoch 59/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 189.67it/s]


Epoch [59/100], Loss: 0.0316


Epoch 60/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 188.00it/s]


Epoch [60/100], Loss: 0.0284
Model saved at epoch 60


Epoch 61/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 173.97it/s]


Epoch [61/100], Loss: 0.0286


Epoch 62/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 176.20it/s]


Epoch [62/100], Loss: 0.0248


Epoch 63/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 189.29it/s]


Epoch [63/100], Loss: 0.0280


Epoch 64/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 168.23it/s]


Epoch [64/100], Loss: 0.0288


Epoch 65/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 188.64it/s]


Epoch [65/100], Loss: 0.0295


Epoch 66/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 190.45it/s]


Epoch [66/100], Loss: 0.0323


Epoch 67/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 196.12it/s]


Epoch [67/100], Loss: 0.0318


Epoch 68/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 176.47it/s]


Epoch [68/100], Loss: 0.0257


Epoch 69/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 191.80it/s]


Epoch [69/100], Loss: 0.0252


Epoch 70/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 186.66it/s]


Epoch [70/100], Loss: 0.0322
Model saved at epoch 70


Epoch 71/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 165.12it/s]


Epoch [71/100], Loss: 0.0354


Epoch 72/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 175.18it/s]


Epoch [72/100], Loss: 0.0299


Epoch 73/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 183.15it/s]


Epoch [73/100], Loss: 0.0344


Epoch 74/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 176.52it/s]


Epoch [74/100], Loss: 0.0299


Epoch 75/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 171.47it/s]


Epoch [75/100], Loss: 0.0298


Epoch 76/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 209.01it/s]


Epoch [76/100], Loss: 0.0320


Epoch 77/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 157.52it/s]


Epoch [77/100], Loss: 0.0255


Epoch 78/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 188.43it/s]


Epoch [78/100], Loss: 0.0321


Epoch 79/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 159.63it/s]


Epoch [79/100], Loss: 0.0292


Epoch 80/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 163.35it/s]


Epoch [80/100], Loss: 0.0300
Model saved at epoch 80


Epoch 81/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 196.90it/s]


Epoch [81/100], Loss: 0.0257


Epoch 82/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 226.36it/s]


Epoch [82/100], Loss: 0.0259


Epoch 83/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 246.97it/s]


Epoch [83/100], Loss: 0.0304


Epoch 84/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 248.35it/s]


Epoch [84/100], Loss: 0.0262


Epoch 85/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 215.39it/s]


Epoch [85/100], Loss: 0.0265


Epoch 86/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 226.30it/s]


Epoch [86/100], Loss: 0.0303


Epoch 87/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 218.93it/s]


Epoch [87/100], Loss: 0.0249


Epoch 88/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 213.10it/s]


Epoch [88/100], Loss: 0.0292


Epoch 89/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 223.08it/s]


Epoch [89/100], Loss: 0.0277


Epoch 90/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 217.43it/s]


Epoch [90/100], Loss: 0.0306
Model saved at epoch 90


Epoch 91/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 249.04it/s]


Epoch [91/100], Loss: 0.0302


Epoch 92/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 245.50it/s]


Epoch [92/100], Loss: 0.0278


Epoch 93/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 247.02it/s]


Epoch [93/100], Loss: 0.0268


Epoch 94/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 305.19it/s]


Epoch [94/100], Loss: 0.0263


Epoch 95/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 226.35it/s]


Epoch [95/100], Loss: 0.0276


Epoch 96/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 257.98it/s]


Epoch [96/100], Loss: 0.0260


Epoch 97/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 277.34it/s]


Epoch [97/100], Loss: 0.0321


Epoch 98/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 199.43it/s]


Epoch [98/100], Loss: 0.0311


Epoch 99/100: 100%|██████████████████████████| 106/106 [00:00<00:00, 192.64it/s]


Epoch [99/100], Loss: 0.0297


Epoch 100/100: 100%|█████████████████████████| 106/106 [00:00<00:00, 193.89it/s]

Epoch [100/100], Loss: 0.0251
Model saved at epoch 100
Test Accuracy: 94.80%
Confusion Matrix:
[[52  0  0  0  0  0  0  0  0  0  0]
 [ 0 32  0  0  0  3  0  0  0  0  0]
 [ 0  0 36  0  0  0  0  0  0  0  0]
 [ 0  0  0 46  0  0  0  0  0  0  0]
 [ 0  0  0  0 34  0  0  0  0  0  0]
 [ 0  4  0  0  0 23  0  0  0  4  0]
 [ 4  0  0  0  0  0 35  0  0  0  0]
 [ 0  0  0  0  0  0  0 35  0  3  0]
 [ 0  0  0  0  0  0  0  0 35  0  0]
 [ 3  0  0  0  0  0  0  0  1 39  0]
 [ 0  0  0  0  0  0  0  0  0  0 34]]

Classification Report:
                     precision    recall  f1-score   support

               Beer       0.88      1.00      0.94        52
           Cocktail       0.89      0.91      0.90        35
              Cocoa       1.00      1.00      1.00        36
       Coffee / Tea       1.00      1.00      1.00        46
   Homemade Liqueur       1.00      1.00      1.00        34
     Ordinary Drink       0.88      0.74      0.81        31
    Other / Unknown       1.00      0.90      0.95      




In [None]:
# #모델이 유사 칵테일을 추천하는 방식

In [None]:
# 1. 모델과 도구 준비
# 1-1. 모댈 로드 

In [46]:
# 모델 정의와 초기화
model = CocktailRecommendationModel(input_size=100, hidden_size1=256, hidden_size2=128, output_size=output_size).to(device)

# 학습된 모델 가중치 로드
# 가중치만 로드하도록 명시적으로 설정
model.load_state_dict(torch.load("models/cocktail_model.pth", map_location=device, weights_only=True), strict=True)
model.eval()

CocktailRecommendationModel(
  (fc1): Linear(in_features=100, out_features=256, bias=True)
  (batch_norm1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (batch_norm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=128, out_features=11, bias=True)
)

In [47]:
# 1-2. 벡터화기 및 인코더 준비
# 이후의 유사 칵테일 추천 기능을 실행할 때 필요

In [48]:
import pickle

# vectorizer와 label_encoder 저장
with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

print("TF-IDF Vectorizer와 Label Encoder가 저장되었습니다.")

TF-IDF Vectorizer와 Label Encoder가 저장되었습니다.


In [49]:
with open("vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

In [50]:
# 2. 유사 칵테일 추천 함수

In [65]:
def recommend_similar_cocktails_by_name(input_cocktail_name, vectorizer, model, label_encoder, cocktail_data, top_k=5, min_probability=0.8):
    """
    입력 칵테일 이름을 기반으로 유사한 칵테일을 추천하는 함수
    - input_cocktail_name: 사용자 입력 칵테일 이름 (str)
    - vectorizer: TF-IDF 벡터화기
    - model: 학습된 추천 모델
    - label_encoder: 카테고리 레이블 인코더
    - cocktail_data: 원본 칵테일 데이터셋 (DataFrame)
    - top_k: 추천할 유사 칵테일 개수
    - min_probability: 최소 유사도 확률 (default=0.5)
    """
    input_cocktail_name = input_cocktail_name.strip().lower()

    # 입력된 칵테일 이름으로 재료를 추출
    cocktail_row = cocktail_data[cocktail_data['category_lower'] == input_cocktail_name]
    
    if cocktail_row.empty:
        print(f"Error: '{input_cocktail_name}' 칵테일을 찾을 수 없습니다.")
        print("Available cocktails in the dataset:")
        print(cocktail_data['category'].unique())
        return []
    
    input_ingredients = cocktail_row.iloc[0]['ingredients_text']

    # 재료를 TF-IDF로 벡터화
    input_vector = vectorizer.transform([input_ingredients]).toarray()
    input_tensor = torch.tensor(input_vector, dtype=torch.float32).to(device)

    # 모델을 통해 클래스 확률 계산
    with torch.no_grad():
        output_probs = model(input_tensor)
        output_probs = torch.softmax(output_probs, dim=1).cpu().numpy()

    # 클래스별 확률 추출
    class_indices = np.argsort(output_probs[0])[::-1]
    class_probs = output_probs[0][class_indices]

    # 추천 결과 필터링 (유사도 80% 이상)
    recommendations = []
    for idx, class_index in enumerate(class_indices[:top_k]):
        if class_probs[idx] >= min_probability:
            cocktail_name = label_encoder.classes_[class_index]
            cocktail_info = cocktail_data[cocktail_data['category_lower'] == cocktail_name.lower()].iloc[0]
            recommendations.append({
                "name": cocktail_name,
                "probability": class_probs[idx],
                "alcohol_content": cocktail_info.get('alcohol_content', '정보 없음'),
                "ingredients": cocktail_info.get('ingredients_text', '정보 없음')
            })
    
    return recommendations

In [66]:
# 3. 유사 칵테일 추천
# 유사도 80% 이상만 추천

In [None]:
# 허깅 페이스 데이터 

In [87]:
# 데이터셋에 category_lower 열 추가 (대소문자 구분 제거)
data['category_lower'] = data['category'].str.strip().str.lower()

# 데이터셋에 'Cocktail'이 존재하는지 확인
if data[data['category_lower'] == "cocktail"].empty:
    print("Cocktail not found in dataset. Adding a sample entry.")
    new_cocktail = {
        'category': 'Cocktail',
        'ingredients_text': 'vodka lime juice sugar syrup soda water',
        'alcohol_content': '12%'
    }
    data = data.append(new_cocktail, ignore_index=True)

# 테스트 입력
input_cocktail_name = "Cocktail"

# 유사 칵테일 추천
recommendations = recommend_similar_cocktails_by_name(
    input_cocktail_name=input_cocktail_name,
    vectorizer=vectorizer,
    model=model,
    label_encoder=label_encoder,
    cocktail_data=data,
    top_k=10,  # 상위 10개 추천
    min_probability=0.002
    # 최소 유사도 10% => 최소 유사도 20%하면 2개밖에 안나옴
)

# 결과 출력
print("Top Similar Cocktails:")
if not recommendations:
    print("No cocktails found with similarity above 20%.")
else:
    for rec in recommendations:
        print(f"Name: {rec['name']}")
        print(f"   Probability: {rec['probability'] * 100:.2f}%")
        print(f"Ingredients: {rec['ingredients']}")
        print("-" * 40)

Top Similar Cocktails:
Name: Other / Unknown
   Probability: 46.26%
Ingredients: Orange juice Grapefruit juice Apple juice Maraschino cherry
----------------------------------------
Name: Shot
   Probability: 29.28%
Ingredients: Amaretto Baileys irish cream Cognac
----------------------------------------
Name: Homemade Liqueur
   Probability: 16.44%
Ingredients: Water Sugar Coffee Vanilla Vodka Caramel coloring
----------------------------------------
Name: Punch / Party Drink
   Probability: 4.85%
Ingredients: Everclear Vodka Mountain Dew Surge Lemon juice Rum
----------------------------------------
Name: Coffee / Tea
   Probability: 2.04%
Ingredients: Kahlua Baileys irish cream Frangelico Coffee Cream
----------------------------------------
Name: Cocktail
   Probability: 0.52%
Ingredients: Gin Grand Marnier Lemon Juice Grenadine
----------------------------------------
Name: Shake
   Probability: 0.25%
Ingredients: Kahlua Milk Vanilla ice-cream
-------------------------------------