In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.font_manager as fm

In [9]:
# 한글 폰트 설정
from matplotlib import font_manager, rc
import matplotlib
import platform

if platform.system() == 'Windows':
# 윈도우인 경우
    font_name = font_manager.FontProperties(fname='c:/Windows/Fonts/malgun.ttf').get_name()
    rc('font', family=font_name)
else:
    #Mac인 경우
    rc('font', family = 'AppleGothic')
matplotlib.rcParams['axes.unicode_minus'] = False

In [4]:
# 데이터 로드
cohort_pivot = pd.read_csv('./cohort_pivot.csv', index_col=0)

In [10]:
# 1. Cohort Heatmap
fig, ax = plt.subplots(figsize=(14, 8))
sns.heatmap(
    cohort_pivot,
    annot=True,
    fmt='.1f',
    cmap='RdYlGn',
    vmin=0,
    vmax=100,
    linewidths=0.5,
    ax=ax,
    cbar_kws={'label': 'Retention Rate (%)'}
)
ax.set_title('고객 Cohort 유지율 Heatmap', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('첫 구매 후 경과 개월 수', fontsize=12, fontweight='bold')
ax.set_ylabel('Cohort 월', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.savefig('./cohort_heatmap.png', dpi=300, bbox_inches='tight')
print("✅ Cohort Heatmap 저장: cohort_heatmap.png")
plt.close()


✅ Cohort Heatmap 저장: cohort_heatmap.png


In [11]:
# 2. 주요 코호트 Retention Curve
cohort_data = pd.read_csv('./cohort_analysis.csv')
major_cohorts = ['2010-12', '2011-01', '2011-02', '2011-03']

fig, ax = plt.subplots(figsize=(12, 6))
for cohort in major_cohorts:
    data = cohort_data[cohort_data['CohortMonth'] == cohort]
    ax.plot(data['CohortIndex'], data['RetentionRate'], marker='o', label=cohort, linewidth=2)

ax.set_title('Cohort별 시간 경과에 따른 유지율', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('첫 구매 후 경과 개월 수', fontsize=12, fontweight='bold')
ax.set_ylabel('유지율 (%)', fontsize=12, fontweight='bold')
ax.legend(title='Cohort 월', fontsize=10)
ax.grid(True, alpha=0.3)
ax.set_ylim(0, 110)
plt.tight_layout()
plt.savefig('./cohort_retention_curve.png', dpi=300, bbox_inches='tight')
print("✅ Retention Curve 저장: cohort_retention_curve.png")
plt.close()


✅ Retention Curve 저장: cohort_retention_curve.png


In [13]:
# 3. 코호트 크기 분포
cohort_size = cohort_data.groupby('CohortMonth')['CohortSize'].first().reset_index()

fig, ax = plt.subplots(figsize=(12, 6))
ax.bar(range(len(cohort_size)), cohort_size['CohortSize'], color='steelblue', alpha=0.7)
ax.set_xticks(range(len(cohort_size)))
ax.set_xticklabels(cohort_size['CohortMonth'], rotation=45, ha='right')
ax.set_title('Cohort규모(월별 신규 고객 수)', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Cohort 월', fontsize=12, fontweight='bold')
ax.set_ylabel('신규 고객 수', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, axis='y')

# 값 표시
for i, v in enumerate(cohort_size['CohortSize']):
    ax.text(i, v + 10, str(v), ha='center', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig('./cohort_size.png', dpi=300, bbox_inches='tight')
print("✅ Cohort Size 차트 저장: cohort_size.png")
plt.close()


✅ Cohort Size 차트 저장: cohort_size.png


In [14]:
print("모든 시각화 작업이 완료되었습니다.")

모든 시각화 작업이 완료되었습니다.
