# 고객 이탈 예측 및 마케팅 전략 최적화
1. 데이터 수집 및 전처리
2. 상태 및 행동 정의
3. 상태 전의 확률 및 보상 함수 추정
4. MDP 모델링 및 알고리즘 적용
5. 결과 해석 및 적용 방안 제시

## 1. 데이터 로드 및 전처리

In [1]:
import pandas as pd
import numpy as np

In [3]:
# 데이터 로드
data = pd.read_excel('./online+retail+ii/online_retail_II.xlsx')

In [None]:
data

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.10,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom
...,...,...,...,...,...,...,...,...
525456,538171,22271,FELTCRAFT DOLL ROSIE,2,2010-12-09 20:01:00,2.95,17530.0,United Kingdom
525457,538171,22750,FELTCRAFT PRINCESS LOLA DOLL,1,2010-12-09 20:01:00,3.75,17530.0,United Kingdom
525458,538171,22751,FELTCRAFT PRINCESS OLIVIA DOLL,1,2010-12-09 20:01:00,3.75,17530.0,United Kingdom
525459,538171,20970,PINK FLORAL FELTCRAFT SHOULDER BAG,2,2010-12-09 20:01:00,3.75,17530.0,United Kingdom


In [6]:
# 필요한 열 선택 및 결측값 제거
data = data[['Customer ID', 'InvoiceDate', 'Invoice', 'Quantity', 'Price']].dropna()

# 데이터 타입 변환
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])
data['Customer ID'] = data['Customer ID'].astype(int)

# 고객별 마지막 구매 날짜 계산
last_purchase = data.groupby('Customer ID')['InvoiceDate'].max().reset_index()
last_purchase.columns = ['Customer ID', 'LastPurchaseDate']

# 현재 날짜 설정 (데이터 세트의 마지막 날짜로 설정)
current_date = data['InvoiceDate'].max()

In [7]:
# 고객의 상태 정의 함수
def define_customer_state(row):
    days_since_last_purchase = (current_date - row['LastPurchaseDate']).days
    if days_since_last_purchase <= 30:
        return 'Active'
    elif days_since_last_purchase <= 90:
        return 'Dormant'
    else:
        return 'Churned'
    
# 고객 상태 할당
last_purchase['State'] = last_purchase.apply(define_customer_state, axis=1)

# 상태별 고객 수 확인
print('상태별 고객 수:')
print(last_purchase['State'].value_counts())

상태별 고객 수:
State
Active     1673
Churned    1449
Dormant    1261
Name: count, dtype: int64


## 2. 상태 전이 확률 추정

In [8]:
# 상태와 행동 정의
states = ['Active', 'Dormant', 'Churned']
actions = ['No Action', 'Email Campaign', 'Offer Discount']

# 상태 전이 확률 (임의 설정)
P = {
    'Active': {
        'No Action': {'Active': 0.6, 'Dormant': 0.3, 'Churned': 0.1},
        'Email Campaign': {'Active': 0.7, 'Dormant': 0.25, 'Churned': 0.05},
        'Offer Discount': {'Active': 0.8, 'Dormant': 0.15, 'Churned': 0.05}
    },
    'Dormant': {
        'No Action': {'Dormant': 0.5, 'Churned': 0.5},
        'Email Campaign': {'Active': 0.3, 'Dormant': 0.5, 'Churned': 0.2},
        'Offer Discount': {'Active': 0.5, 'Dormant': 0.4, 'Churned': 0.1}
    },
    'Churned': {
        'No Action': {'Churned': 1.0},
        'Email Campaign': {'Dormant': 0.2, 'Churned': 0.8},
        'Offer Discount': {'Active': 0.3, 'Churned': 0.7}
    }
}

# 보상 함수 정의 (고객의 상태와 행동에 따른 예상 수익)
R = {
    'Active': {'No Action': 10, 'Email Campaign': 8, 'Offer Discount': 6},
    'Dormant': {'No Action': 0, 'Email Campaign': 5, 'Offer Discount': 4},
    'Churned': {'No Action': -2, 'Email Campaign': 1, 'Offer Discount': 3}
}

## 3. 가치 반복 알고리즘 구현

In [9]:
# 가치 함수 및 정책 초기화
V = {s:0 for s in states}
policy = {s: None for s in states}

# 가치 반복 알고리즘
gamma = 0.9  # 할인율
theta = 0.0001  # 수렴 임계값
iteration = 0

while True:
    delta = 0
    print(f'Iteration {iteration}')
    for s in states:
        v = V[s]
        action_values = {}
        for a in actions:
            total = 0
            for s_prime in P[s][a]:
                prob = P[s][a][s_prime]
                reward = R[s][a]
                total += prob * (reward + gamma * V[s_prime])
            action_values[a] = total
            print(f'  State {s}, Action {a}: Value {total:.2f}')
        best_action = max(action_values, key=action_values.get)
        V[s] = action_values[best_action]
        policy[s] = best_action
        delta = max(delta, abs(v - V[s]))
    iteration += 1
    print(f"    Updated Values: {V}")
    print(f"    Updated Policy: {policy}\n")
    if delta < theta:
        break

Iteration 0
  State Active, Action No Action: Value 10.00
  State Active, Action Email Campaign: Value 8.00
  State Active, Action Offer Discount: Value 6.00
  State Dormant, Action No Action: Value 0.00
  State Dormant, Action Email Campaign: Value 7.70
  State Dormant, Action Offer Discount: Value 8.50
  State Churned, Action No Action: Value -2.00
  State Churned, Action Email Campaign: Value 2.53
  State Churned, Action Offer Discount: Value 5.70
    Updated Values: {'Active': 10.0, 'Dormant': 8.5, 'Churned': 5.699999999999999}
    Updated Policy: {'Active': 'No Action', 'Dormant': 'Offer Discount', 'Churned': 'Offer Discount'}

Iteration 1
  State Active, Action No Action: Value 18.21
  State Active, Action Email Campaign: Value 16.47
  State Active, Action Offer Discount: Value 14.60
  State Dormant, Action No Action: Value 6.39
  State Dormant, Action Email Campaign: Value 14.77
  State Dormant, Action Offer Discount: Value 15.77
  State Churned, Action No Action: Value 3.13
  S

In [10]:
# 결과 출력
print('최종 가치 함수:')
for s in states:
    print(f'State {s}: {V[s]:.2f}')

print('\n최적 정책:')
for s in states:
    print(f'State {s}: {policy[s]}')

최종 가치 함수:
State Active: 71.98
State Dormant: 65.39
State Churned: 60.64

최적 정책:
State Active: No Action
State Dormant: Offer Discount
State Churned: Offer Discount
