In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import os
from sklearn.preprocessing import StandardScaler

In [2]:
# GRUModel 클래스 정의
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])  # 마지막 타임스텝의 출력을 사용
        return out

# 초기화 파라미터 정의
input_size = 9  # 사용한 컬럼 수
hidden_size = 64
num_layers = 1
output_size = 25  # 출력 클래스 수

In [3]:
# 모델 초기화
model = GRUModel(input_size, hidden_size, num_layers, output_size)  # 필요한 파라미터로 모델 초기화

# 가중치 불러오기
model.load_state_dict(torch.load(r'C:\Users\csh16\Desktop\2024-2\졸업프로젝트\gru_model_weights.pth'))
model.eval()  # 평가 모드로 전환

GRUModel(
  (gru): GRU(9, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=25, bias=True)
)

In [4]:
# 1. 소비 데이터 로드
file_path = r"C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\정연주_240730_241030.csv"
data = pd.read_csv(file_path, encoding='euc-kr')

# 2. 출금 금액의 쉼표 제거 및 float으로 변환
data['출금'] = data['출금'].str.replace(',', '').astype(float)

# 카테고리별 출금액 계산
expense_categories = {
    '이용금액_업종기준': data['출금'].sum(),
    '이용금액_요식': data[data['카테고리'] == '요식']['출금'].sum(),
    '이용금액_교육': data[data['카테고리'] == '교육']['출금'].sum(),
    '여유_Pet이용금액': data[data['카테고리'] == '반려동물']['출금'].sum(),
    '이용금액_여행': data[data['카테고리'] == '여행']['출금'].sum(),
    '이용금액_자차': data[data['카테고리'] == '자차']['출금'].sum(),
    '이용금액_대중교통': data[data['카테고리'] == '대중교통']['출금'].sum(),
    '이용금액_여가활동': data[data['카테고리'] == '여가활동']['출금'].sum(),
}

# 3개월 치로 나누고 소수점 두 자리로 반올림
expense_categories['이용금액_일상생활_총합'] = sum(expense_categories.values())
for key in expense_categories:
    expense_categories[key] = round(expense_categories[key] / 3, 2)

# 소비 데이터 배열 생성
consumer_data = np.array(list(expense_categories.values()))

# PyTorch 텐서로 변환 및 차원 조정
consumer_data_tensor = torch.tensor(consumer_data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)

# 결과 확인
# print("Tensor:", consumer_data_tensor)

In [5]:
# 모델 예측
with torch.no_grad():
    test_outputs = model(consumer_data_tensor)
    _, predicted_label = torch.max(test_outputs, 1)

predicted_label = predicted_label.item()

# print("당신의 label은 ", predicted_label)

In [6]:
carrier_file_paths = {
    "skt_요금제": r"C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\skt_요금제.csv",
    "kt_요금제": r'C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\kt_요금제.csv',
    "U+_요금제": r'C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\U+_요금제.csv'
}

In [7]:
# 통신사별 요금제 추천
def get_plan_label(label):
    plan_dict = {
        0: "U+_요금제", # 반려동물
        1: "skt_요금제", # 일상생활
        2: "U+_요금제", # 여가
        3: "skt_요금제", # 일상생활
        4: "kt_요금제", # 주유
        5: "U+_요금제", # 교육
        6: "skt_요금제", # 여행
        7: "skt_요금제", # 일상생활
        8: "skt_요금제", # 일상생활
        9: "skt_요금제", # 일상생활
        10: "U+_요금제", # 교육
        11: "skt_요금제", # 여가
        12: "skt_요금제", # 일상생활
        13: "kt_요금제", # 주유
        14: "skt_요금제", # 일상생활
        15: "skt_요금제", # 일상생활
        16: "skt_요금제", # 일상생활
        17: "U+_요금제", # 교육
        18: "skt_요금제", # 일상 생활
        19: "skt_요금제", # 여가
        20: "skt_요금제", # 반려동물
        21: "kt_요금제", # 교통비
        22: "skt_요금제", # 일상생활
        23: "skt_요금제", # 일상생활
        24: "skt_요금제" # 일상생활
    }

    carrier_label = plan_dict.get(label, "알 수 없는 요금제")
    return carrier_file_paths.get(carrier_label)

In [18]:
def find_similar_plan(plans, current_payment, top_n=5):
    # Ensure '월정액' is in numeric format (if it was not converted before)
    plans['월정액'] = pd.to_numeric(plans['월정액'], errors='coerce')  # Convert to numeric, just in case

    # Remove rows with NaN values in '월정액'
    plans = plans.dropna(subset=['월정액'])
    
    # Calculate price difference
    plans['가격_차이'] = (plans['월정액'] - current_payment).abs()
    
    # Filter plans within 10,000 won range
    plans_within_range = plans[plans['가격_차이'] <= 10000]
    
    # Select top_n plans with the smallest price differences
    similar_plans = plans_within_range.nsmallest(top_n, '가격_차이')
    
    return similar_plans['요금제 이름']  # Only return the '요금제 이름' column


In [19]:
def recommend_plan(label, user_payment):
    file_path = get_plan_label(label)  # Assume this function returns the correct file path
    if file_path:
        # Check if the file exists
        if os.path.exists(file_path):
            plans_df = pd.read_csv(file_path, encoding='utf-8')  # Load with UTF-8 encoding
            
            # Remove the print statement to avoid displaying the DataFrame
            # print("Loaded Plans DataFrame:")
            # print(plans_df.head())
            
            recommended_plans = find_similar_plan(plans_df, user_payment)

            if not recommended_plans.empty:
                print("추천 요금제:")
                for plan in recommended_plans:
                    print(plan)
            else:
                print("추천할 요금제가 없습니다.")
        else:
            print(f"The file {file_path} does not exist.")

In [20]:
# 예시 사용
user_payment = float(input("Enter your payment amount: "))
recommend_plan(predicted_label, user_payment)

Enter your payment amount:  69000


추천 요금제:
다이렉트5G 69(넷플릭스)
0 청년 다이렉트 69(넷플릭스)
5GX 레귤러
0 청년 69
5G 행복누리 레귤러
                                
                                  복지


In [9]:
import pandas as pd
import os

# Define file paths for each carrier
carrier_file_paths = {
    "skt_요금제": r"C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\skt_요금제.csv",
    "kt_요금제": r"C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\kt_요금제.csv",
    "U+_요금제": r"C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\U+_요금제.csv"
}

# Map labels to carriers
def get_plan_label(label):
    plan_dict = {
        0: "U+_요금제", 1: "skt_요금제", 2: "U+_요금제", 3: "skt_요금제",
        4: "kt_요금제", 5: "U+_요금제", 6: "skt_요금제", 7: "skt_요금제",
        8: "skt_요금제", 9: "skt_요금제", 10: "U+_요금제", 11: "skt_요금제",
        12: "skt_요금제", 13: "kt_요금제", 14: "skt_요금제", 15: "skt_요금제",
        16: "skt_요금제", 17: "U+_요금제", 18: "skt_요금제", 19: "skt_요금제",
        20: "skt_요금제", 21: "kt_요금제", 22: "skt_요금제", 23: "skt_요금제",
        24: "skt_요금제"
    }
    return plan_dict.get(label, "알 수 없는 요금제")

# Find similar plans within a price range
def find_similar_plan(plans, current_payment, top_n=3):
    # Ensure '월정액' is in numeric format
    plans['월정액'] = pd.to_numeric(plans['월정액'], errors='coerce')
    plans = plans.dropna(subset=['월정액'])  # Drop rows with NaN values
    
    # Calculate price difference
    plans['가격_차이'] = (plans['월정액'] - current_payment).abs()
    
    # Filter plans within 10,000 won range
    plans_within_range = plans[plans['가격_차이'] <= 10000]
    
    # Select top_n plans with the smallest price differences
    similar_plans = plans_within_range.nsmallest(top_n, '가격_차이')
    
    return similar_plans['요금제 이름']

# Main function to handle multiple users and save results
def recommend_plans_for_all_users(cluster_file_path, output_file_path):
    # Load user data
    users_df = pd.read_csv(cluster_file_path, encoding='utf-8')
    
    # Add columns for recommended carrier and plans
    users_df['recommend_carrier'] = ""
    users_df['recommend_plans'] = ""
    
    # Process each user row
    for idx, row in users_df.iterrows():
        label = row['Cluster']
        user_payment = row['납부_통신비이용금액']
        
        # Determine the carrier based on the label
        carrier_label = get_plan_label(label)
        file_path = carrier_file_paths.get(carrier_label)
        
        if file_path and os.path.exists(file_path):
            # Load the corresponding carrier plans
            plans_df = pd.read_csv(file_path, encoding='utf-8')
            recommended_plans = find_similar_plan(plans_df, user_payment)
            
            # Update the user DataFrame
            users_df.at[idx, 'recommend_carrier'] = carrier_label
            users_df.at[idx, 'recommend_plans'] = ", ".join(recommended_plans) if not recommended_plans.empty else "추천할 요금제 없음"
        else:
            users_df.at[idx, 'recommend_carrier'] = "파일 없음"
            users_df.at[idx, 'recommend_plans'] = "파일 없음"

    # Save the updated DataFrame
    users_df.to_csv(output_file_path, index=False, encoding='utf-8')
    print(f"Updated file saved to {output_file_path}")

# Execute the recommendation process
recommend_plans_for_all_users(
    r"C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\cluster_with_labels_modified.csv",
    r"C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\cluster_with_labels_recommendations.csv"
)


Updated file saved to C:\Users\csh16\Desktop\2024-2\졸업프로젝트\dataset\cluster_with_labels_recommendations.csv
