In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.optimize import curve_fit
import warnings
warnings.filterwarnings('ignore')

# –ó–ê–í–ê–ù–¢–ê–ñ–ï–ù–ù–Ø –î–ê–ù–ò–•
df = pd.read_csv(r'C:\Users\user\Downloads\KMA TestTask Product Analyst - Task2.csv')

print("="*80)
print("LTV PREDICTION ANALYSIS - SHIFTED GOMPERTZ CURVE ‚Äî Hanna Todorova")
print("="*80)

# 1. –ü–Ü–î–ì–û–¢–û–í–ö–ê –î–ê–ù–ò–•
print("\n1. –ü–Ü–î–ì–û–¢–û–í–ö–ê –î–ê–ù–ò–•")
print("-"*80)

df['event_timestamp'] = pd.to_datetime(df['event_timestamp'])
df['first_purchase_time'] = pd.to_datetime(df['first_purchase_time'])
df['cohort_week'] = pd.to_datetime(df['cohort_week'])
df['price'] = df['product_id'].str.extract(r'(\d+\.?\d*)').astype(float)

print(f"–ó–∞–≥–∞–ª—å–Ω–∞ –∫—ñ–ª—å–∫—ñ—Å—Ç—å —Ç—Ä–∞–Ω–∑–∞–∫—Ü—ñ–π: {len(df):,}")
print(f"–ö—ñ–ª—å–∫—ñ—Å—Ç—å —É–Ω—ñ–∫–∞–ª—å–Ω–∏—Ö –∫–æ—Ä–∏—Å—Ç—É–≤–∞—á—ñ–≤: {df['user_id'].nunique():,}")
print(f"–ü–µ—Ä—ñ–æ–¥ –¥–∞–Ω–∏—Ö: {df['cohort_week'].min()} - {df['cohort_week'].max()}")
print(f"–ú–∞–∫—Å–∏–º–∞–ª—å–Ω–∏–π lifetime_weeks: {df['lifetime_weeks'].max()}")

# 2. –†–û–ó–†–ê–•–£–ù–û–ö –§–ê–ö–¢–ò–ß–ù–û–ì–û LTV
print("\n\n2. –†–û–ó–†–ê–•–£–ù–û–ö –§–ê–ö–¢–ò–ß–ù–û–ì–û LTV (–ö–£–ú–£–õ–Ø–¢–ò–í–ù–ò–ô)")
print("-"*80)

ltv_data = []

for cohort in df['cohort_week'].unique():
    cohort_users = df[df['cohort_week'] == cohort]
    cohort_size = cohort_users['user_id'].nunique()
    
    for week in range(int(cohort_users['lifetime_weeks'].max()) + 1):
        revenue_until_week = cohort_users[cohort_users['lifetime_weeks'] <= week]['price'].sum()
        active_users = cohort_users[cohort_users['lifetime_weeks'] == week]['user_id'].nunique()
        
        ltv_data.append({
            'cohort_week': cohort,
            'lifetime_weeks': week,
            'cohort_size': cohort_size,
            'cumulative_revenue': revenue_until_week,
            'ltv': revenue_until_week / cohort_size,
            'active_users': active_users,
            'retention': active_users / cohort_size if cohort_size > 0 else 0
        })

ltv_df = pd.DataFrame(ltv_data)

print("\n–ü—Ä–∏–∫–ª–∞–¥ —Ñ–∞–∫—Ç–∏—á–Ω–æ–≥–æ LTV (–ø–µ—Ä—à–∞ –∫–æ–≥–æ—Ä—Ç–∞, –ø–µ—Ä—à—ñ 8 —Ç–∏–∂–Ω—ñ–≤):")
sample_cohort = ltv_df['cohort_week'].min()
sample_data = ltv_df[ltv_df['cohort_week'] == sample_cohort].head(8)
print(sample_data[['lifetime_weeks', 'ltv', 'active_users', 'retention']].to_string(index=False))

# 3. SHIFTED GOMPERTZ CURVE
print("\n\n3. –ú–ï–¢–û–î –ü–†–û–ì–ù–û–ó–£–í–ê–ù–ù–Ø")
print("-"*80)
print("–û–±—Ä–∞–Ω–∏–π –ø—ñ–¥—Ö—ñ–¥: Shifted Gompertz Curve")
print("–§–æ—Ä–º—É–ª–∞: LTV(t) = a * (1 - exp(-b * t)) * exp(-c * t) + d")
print("\n–û–±“ë—Ä—É–Ω—Ç—É–≤–∞–Ω–Ω—è:")
print("- –ú–æ–¥–µ–ª—é—î S-–ø–æ–¥—ñ–±–Ω—É –∫—Ä–∏–≤—É –∑ –≤–∏—Ö–æ–¥–æ–º –Ω–∞ –ø–ª–∞—Ç–æ")
print("- –í—Ä–∞—Ö–æ–≤—É—î —à–≤–∏–¥–∫–µ –∑—Ä–æ—Å—Ç–∞–Ω–Ω—è –Ω–∞ –ø–æ—á–∞—Ç–∫—É —Ç–∞ —É–ø–æ–≤—ñ–ª—å–Ω–µ–Ω–Ω—è –ø—ñ–∑–Ω—ñ—à–µ")
print("- –†–µ–∞–ª—ñ—Å—Ç–∏—á–Ω–æ –≤—ñ–¥–æ–±—Ä–∞–∂–∞—î –ø–æ–≤–µ–¥—ñ–Ω–∫—É –∑ churn")

def shifted_gompertz(t, a, b, c, d):
    """Shifted Gompertz curve"""
    return a * (1 - np.exp(-b * t)) * np.exp(-c * t) + d

# 4. –¢–†–ï–ù–£–í–ê–ù–ù–Ø –ú–û–î–ï–õ–Ü
print("\n\n4. –ü–û–ë–£–î–û–í–ê –ú–û–î–ï–õ–Ü")
print("-"*80)

predictions = []
min_weeks_for_training = 6
model_params = {}

for cohort in ltv_df['cohort_week'].unique():
    cohort_data = ltv_df[ltv_df['cohort_week'] == cohort].copy()
    cohort_data = cohort_data.sort_values('lifetime_weeks')
    max_week = cohort_data['lifetime_weeks'].max()
    
    if max_week >= min_weeks_for_training:
        X = cohort_data['lifetime_weeks'].values
        y = cohort_data['ltv'].values
        
        try:
            params, _ = curve_fit(
                shifted_gompertz, 
                X, y,
                p0=[y.max(), 0.1, 0.01, 0],
                maxfev=10000,
                bounds=([0, 0, 0, -np.inf], [np.inf, 1, 1, np.inf])
            )
            
            model_params[cohort] = params
            
            weeks_predict = np.arange(0, 53)
            ltv_predict = shifted_gompertz(weeks_predict, *params)
            
            for week in weeks_predict:
                pred_record = {
                    'cohort_week': cohort,
                    'lifetime_weeks': week,
                    'ltv_predicted': ltv_predict[week],
                    'cohort_size': cohort_data['cohort_size'].iloc[0]
                }
                
                actual_row = cohort_data[cohort_data['lifetime_weeks'] == week]
                if not actual_row.empty:
                    pred_record['ltv_actual'] = actual_row['ltv'].values[0]
                    pred_record['retention_actual'] = actual_row['retention'].values[0]
                else:
                    pred_record['ltv_actual'] = np.nan
                    pred_record['retention_actual'] = np.nan
                
                predictions.append(pred_record)
                
        except Exception as e:
            print(f"–ü–æ–º–∏–ª–∫–∞ –¥–ª—è –∫–æ–≥–æ—Ä—Ç–∏ {cohort.strftime('%Y-%m-%d')}: {e}")

predictions_df = pd.DataFrame(predictions)
print(f"\n–ö—ñ–ª—å–∫—ñ—Å—Ç—å –∫–æ–≥–æ—Ä—Ç –∑ –º–æ–¥–µ–ª—è–º–∏: {len(model_params)}")
print(f"–ü—Ä–æ–≥–Ω–æ–∑ –ø–æ–±—É–¥–æ–≤–∞–Ω–æ –¥–æ {predictions_df['lifetime_weeks'].max()} —Ç–∏–∂–Ω—ñ–≤")

# 5. –û–¶–Ü–ù–ö–ê –Ø–ö–û–°–¢–Ü
print("\n\n5. –û–¶–Ü–ù–ö–ê –Ø–ö–û–°–¢–Ü –ú–û–î–ï–õ–Ü")
print("-"*80)

validation_data = predictions_df[predictions_df['ltv_actual'].notna()].copy()
mae = rmse = r2 = mape = 0

if len(validation_data) > 0:
    mae = mean_absolute_error(validation_data['ltv_actual'], validation_data['ltv_predicted'])
    rmse = np.sqrt(mean_squared_error(validation_data['ltv_actual'], validation_data['ltv_predicted']))
    r2 = r2_score(validation_data['ltv_actual'], validation_data['ltv_predicted'])
    
    non_zero = validation_data[validation_data['ltv_actual'] > 0]
    if len(non_zero) > 0:
        mape = np.mean(np.abs((non_zero['ltv_actual'] - non_zero['ltv_predicted']) / 
                              non_zero['ltv_actual'])) * 100
    
    print(f"Mean Absolute Error (MAE): ${mae:.2f}")
    print(f"Root Mean Square Error (RMSE): ${rmse:.2f}")
    print(f"R¬≤ Score: {r2:.4f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    
    if r2 > 0.9:
        print("\n‚úì –í–Ü–î–ú–Ü–ù–ù–ê —è–∫—ñ—Å—Ç—å –º–æ–¥–µ–ª—ñ (R¬≤ > 0.9)")
    elif r2 > 0.8:
        print("\n‚úì –•–û–†–û–®–ê —è–∫—ñ—Å—Ç—å –º–æ–¥–µ–ª—ñ (R¬≤ > 0.8)")
    elif r2 > 0.7:
        print("\n‚ö° –ó–ê–î–û–í–Ü–õ–¨–ù–ê —è–∫—ñ—Å—Ç—å –º–æ–¥–µ–ª—ñ (R¬≤ > 0.7)")
    else:
        print("\n‚ö†Ô∏è –ù–ò–ó–¨–ö–ê —è–∫—ñ—Å—Ç—å –º–æ–¥–µ–ª—ñ (R¬≤ < 0.7)")

# 6. –í–Ü–ó–£–ê–õ–Ü–ó–ê–¶–Ü–Ø
print("\n\n6. –í–Ü–ó–£–ê–õ–Ü–ó–ê–¶–Ü–Ø")
print("-"*80)

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('LTV Prediction Analysis - Shifted Gompertz Curve', fontsize=16, fontweight='bold')

# 6.1. LTV –∫—Ä–∏–≤–∏—Ö –ø–æ –∫–æ–≥–æ—Ä—Ç–∞—Ö
ax1 = axes[0, 0]
cohorts_to_plot = predictions_df['cohort_week'].unique()[:5]

for i, cohort in enumerate(cohorts_to_plot):
    cohort_pred = predictions_df[predictions_df['cohort_week'] == cohort]
    actual_data = cohort_pred[cohort_pred['ltv_actual'].notna()]
    
    color = plt.cm.Set2(i)
    ax1.scatter(actual_data['lifetime_weeks'], actual_data['ltv_actual'], 
               alpha=0.7, s=50, color=color, label=f'{cohort.strftime("%Y-%m-%d")}')
    ax1.plot(cohort_pred['lifetime_weeks'], cohort_pred['ltv_predicted'], 
            '--', alpha=0.8, linewidth=2, color=color)

ax1.set_xlabel('Lifetime (Weeks)', fontweight='bold')
ax1.set_ylabel('LTV ($)', fontweight='bold')
ax1.set_title('LTV Curves: ‚óè Actual  - - Predicted', fontweight='bold')
ax1.legend(fontsize=9, loc='lower right')
ax1.grid(True, alpha=0.3)

# 6.2. –°–µ—Ä–µ–¥–Ω—ñ–π LTV
ax2 = axes[0, 1]
avg_actual = predictions_df.groupby('lifetime_weeks')['ltv_actual'].mean()
avg_predicted = predictions_df.groupby('lifetime_weeks')['ltv_predicted'].mean()

weeks_actual = avg_actual.dropna().index
ax2.plot(weeks_actual, avg_actual.loc[weeks_actual], 
        'o-', linewidth=3, markersize=8, label='Actual LTV', color='#2ecc71')
ax2.plot(avg_predicted.index, avg_predicted.values, 
        '--', linewidth=3, label='Predicted LTV', color='#e74c3c')

if len(weeks_actual) > 0:
    ax2.axvline(x=weeks_actual.max(), color='gray', linestyle=':', 
               label=f'Last Actual (Week {int(weeks_actual.max())})', linewidth=2)

ax2.set_xlabel('Lifetime (Weeks)', fontweight='bold')
ax2.set_ylabel('Average LTV ($)', fontweight='bold')
ax2.set_title('Average LTV: Actual vs Predicted', fontweight='bold')
legend = ax2.legend()
for text in legend.get_texts():
    text.set_fontweight('bold')
ax2.grid(True, alpha=0.3)

# 6.3. Retention curve
ax3 = axes[1, 0]
retention_by_week = ltv_df.groupby('lifetime_weeks')['retention'].mean() * 100

ax3.plot(retention_by_week.index, retention_by_week.values, 
        'o-', linewidth=3, markersize=8, color='#9b59b6')
ax3.set_xlabel('Lifetime (Weeks)', fontweight='bold')
ax3.set_ylabel('Retention Rate (%)', fontweight='bold')
ax3.set_title('Average Retention Rate by Week', fontweight='bold')
ax3.grid(True, alpha=0.3)
ax3.set_ylim(0, 105)

# 6.4. Actual vs Predicted
ax4 = axes[1, 1]
if len(validation_data) > 0:
    scatter = ax4.scatter(validation_data['ltv_actual'], 
                         validation_data['ltv_predicted'], 
                         c=validation_data['lifetime_weeks'],
                         alpha=0.6, s=50, cmap='viridis')
    
    min_val = min(validation_data['ltv_actual'].min(), validation_data['ltv_predicted'].min())
    max_val = max(validation_data['ltv_actual'].max(), validation_data['ltv_predicted'].max())
    ax4.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect Prediction')
    
    ax4.set_xlabel('Actual LTV ($)', fontweight='bold')
    ax4.set_ylabel('Predicted LTV ($)', fontweight='bold')
    ax4.set_title(f'Actual vs Predicted (R¬≤={r2:.3f})', fontweight='bold')
    
    cbar = plt.colorbar(scatter, ax=ax4)
    cbar.set_label('Week', fontweight='bold')
    legend = ax4.legend()
    for text in legend.get_texts():
        text.set_fontweight('bold')
    ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('ltv_analysis_charts.png', dpi=150, bbox_inches='tight')
print("–ì—Ä–∞—Ñ—ñ–∫–∏ –∑–±–µ—Ä–µ–∂–µ–Ω–æ: ltv_analysis_charts.png")
plt.show()

# 7. –ü–†–û–ì–ù–û–ó –ù–ê 52 –¢–ò–ñ–ù–Ü
print("\n\n7. –ü–†–û–ì–ù–û–ó LTV –ù–ê 52 –¢–ò–ñ–ù–Ü")
print("-"*80)

week_52_data = predictions_df[predictions_df['lifetime_weeks'] == 52]
ltv_52 = week_52_data.groupby('cohort_week')['ltv_predicted'].mean()
total_revenue_52 = (week_52_data['ltv_predicted'] * week_52_data['cohort_size']).sum()

print(f"\n–ü—Ä–æ–≥–Ω–æ–∑ –Ω–∞ 52 —Ç–∏–∂–Ω—ñ:")
print(f"  –°–µ—Ä–µ–¥–Ω—ñ–π LTV: ${ltv_52.mean():.2f}")
print(f"  –ú–µ–¥—ñ–∞–Ω–Ω–∏–π LTV: ${ltv_52.median():.2f}")
print(f"  –î—ñ–∞–ø–∞–∑–æ–Ω LTV: ${ltv_52.min():.2f} - ${ltv_52.max():.2f}")
print(f"  –ü—Ä–æ–≥–Ω–æ–∑–Ω–∏–π —Å—É–∫—É–ø–Ω–∏–π –¥–æ—Ö—ñ–¥: ${total_revenue_52:,.2f}")

last_week = predictions_df['ltv_actual'].last_valid_index()
growth_expected = 0

if last_week is not None:
    last_week_num = predictions_df.loc[last_week, 'lifetime_weeks']
    avg_last_ltv = predictions_df[predictions_df['lifetime_weeks'] == last_week_num]['ltv_actual'].mean()
    growth_expected = ((ltv_52.mean() - avg_last_ltv) / avg_last_ltv) * 100
    
    print(f"\n–ü–æ—Ä—ñ–≤–Ω—è–Ω–Ω—è:")
    print(f"  –û—Å—Ç–∞–Ω–Ω—ñ–π —Ñ–∞–∫—Ç–∏—á–Ω–∏–π LTV (—Ç–∏–∂–¥–µ–Ω—å {int(last_week_num)}): ${avg_last_ltv:.2f}")
    print(f"  –û—á—ñ–∫—É–≤–∞–Ω–∏–π –ø—Ä–∏—Ä—ñ—Å—Ç: ${ltv_52.mean() - avg_last_ltv:.2f} ({growth_expected:+.1f}%)")
    print(f"  –ú–æ–¥–µ–ª—å –ø–µ—Ä–µ–¥–±–∞—á–∞—î {'–≤–∏—Ö—ñ–¥ –Ω–∞ –ø–ª–∞—Ç–æ' if growth_expected < 30 else '–ø–æ–º—ñ—Ä–Ω–µ –∑—Ä–æ—Å—Ç–∞–Ω–Ω—è'}")

# 8. HTML –ó–í–Ü–¢
print("\n\n8. –ó–ë–ï–†–ï–ñ–ï–ù–ù–Ø –£ HTML")
print("-"*80)

quality_class = 'quality-excellent' if r2 > 0.9 else 'quality-good' if r2 > 0.8 else 'quality-fair' if r2 > 0.7 else 'quality-poor'
quality_text = '–í–Ü–î–ú–Ü–ù–ù–ê' if r2 > 0.9 else '–•–û–†–û–®–ê' if r2 > 0.8 else '–ó–ê–î–û–í–Ü–õ–¨–ù–ê' if r2 > 0.7 else '–ù–ò–ó–¨–ö–ê'

html_content = f"""<!DOCTYPE html>
<html lang="uk">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LTV Prediction Analysis - Shifted Gompertz ‚Äî Hanna Todorova</title>
    <style>
        body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }}
        .container {{ max-width: 1400px; margin: 0 auto; background-color: white; padding: 30px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }}
        h1 {{ color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }}
        h2 {{ color: #34495e; margin-top: 30px; border-left: 4px solid #3498db; padding-left: 10px; }}
        h3 {{ color: #34495e; margin-top: 20px; }}
        table {{ border-collapse: collapse; width: 100%; margin: 20px 0; font-size: 14px; }}
        th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
        th {{ background-color: #3498db; color: white; font-weight: bold; }}
        tr:nth-child(even) {{ background-color: #f9f9f9; }}
        .metric {{ display: inline-block; margin: 10px 20px 10px 0; padding: 15px; background-color: #ecf0f1; border-radius: 5px; min-width: 200px; }}
        .metric-label {{ font-weight: bold; color: #7f8c8d; font-size: 14px; }}
        .metric-value {{ font-size: 24px; color: #2c3e50; margin-top: 5px; }}
        .quality-excellent {{ color: #27ae60; font-weight: bold; }}
        .quality-good {{ color: #2ecc71; font-weight: bold; }}
        .quality-fair {{ color: #f39c12; font-weight: bold; }}
        .quality-poor {{ color: #e74c3c; font-weight: bold; }}
        img {{ max-width: 100%; height: auto; margin: 20px 0; border: 1px solid #ddd; }}
        .info-box {{ background-color: #e3f2fd; border-left: 5px solid #2196f3; padding: 15px; margin: 20px 0; }}
        .method-box {{ background-color: #f3e5f5; border-left: 5px solid #9c27b0; padding: 15px; margin: 20px 0; }}
        .warning-box {{ background-color: #fff3cd; border-left: 5px solid #ffc107; padding: 15px; margin: 20px 0; }}
        .formula {{ background-color: #f8f9fa; padding: 10px; border-radius: 5px; font-family: 'Courier New', monospace; font-size: 14px; margin: 10px 0; text-align: center; }}
        ul {{ line-height: 1.8; }}
        .highlight {{ background-color: #fff9c4; padding: 2px 5px; border-radius: 3px; }}
    </style>
</head>
<body>
    <div class="container">
        <h1>üìä LTV Prediction Analysis - Shifted Gompertz Curve ‚Äî Hanna Todorova</h1>
        <p><strong>–î–∞—Ç–∞ –∞–Ω–∞–ª—ñ–∑—É:</strong> {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
        
        <div class="warning-box">
            <h4>üìà Shifted Gompertz Curve –¥–ª—è –ø—Ä–æ–≥–Ω–æ–∑—É–≤–∞–Ω–Ω—è LTV</h4>
            <p>–¶—è –º–æ–¥–µ–ª—å –ø–µ—Ä–µ–¥–±–∞—á–∞—î:</p>
            <ul>
                <li><strong>S-–ø–æ–¥—ñ–±–Ω—É –∫—Ä–∏–≤—É</strong> - —à–≤–∏–¥–∫–µ –∑—Ä–æ—Å—Ç–∞–Ω–Ω—è –Ω–∞ –ø–æ—á–∞—Ç–∫—É, –ø–æ—Ç—ñ–º —É–ø–æ–≤—ñ–ª—å–Ω–µ–Ω–Ω—è</li>
                <li><strong>–í–∏—Ö—ñ–¥ –Ω–∞ –ø–ª–∞—Ç–æ</strong> - LTV —Å—Ç–∞–±—ñ–ª—ñ–∑—É—î—Ç—å—Å—è —á–µ—Ä–µ–∑ churn</li>
                <li><strong>–†–µ–∞–ª—ñ—Å—Ç–∏—á–Ω—É –ø–æ–≤–µ–¥—ñ–Ω–∫—É</strong> - –≤—ñ–¥–æ–±—Ä–∞–∂–∞—î —Å–ø—Ä–∞–≤–∂–Ω—ñ –ø–∞—Ç–µ—Ä–Ω–∏ retention</li>
            </ul>
        </div>
        
        <h2>1. –ó–∞–≥–∞–ª—å–Ω–∞ —ñ–Ω—Ñ–æ—Ä–º–∞—Ü—ñ—è</h2>
        <div class="metric">
            <div class="metric-label">–¢—Ä–∞–Ω–∑–∞–∫—Ü—ñ–π</div>
            <div class="metric-value">{len(df):,}</div>
        </div>
        <div class="metric">
            <div class="metric-label">–ö–æ—Ä–∏—Å—Ç—É–≤–∞—á—ñ–≤</div>
            <div class="metric-value">{df['user_id'].nunique():,}</div>
        </div>
        <div class="metric">
            <div class="metric-label">–ö–æ–≥–æ—Ä—Ç</div>
            <div class="metric-value">{df['cohort_week'].nunique()}</div>
        </div>
        
        <h2>2. –ú–µ—Ç–æ–¥ –ø—Ä–æ–≥–Ω–æ–∑—É–≤–∞–Ω–Ω—è</h2>
        <div class="method-box">
            <h3>üî¨ Shifted Gompertz Curve</h3>
            <div class="formula">LTV(t) = a √ó (1 - e<sup>-b√ót</sup>) √ó e<sup>-c√ót</sup> + d</div>
            <p><strong>–ü–∞—Ä–∞–º–µ—Ç—Ä–∏ –º–æ–¥–µ–ª—ñ:</strong></p>
            <ul>
                <li><strong>a</strong> - –º–∞–∫—Å–∏–º–∞–ª—å–Ω–∏–π –ø–æ—Ç–µ–Ω—Ü—ñ–π–Ω–∏–π LTV (–ø–ª–∞—Ç–æ)</li>
                <li><strong>b</strong> - —à–≤–∏–¥–∫—ñ—Å—Ç—å –ø–æ—á–∞—Ç–∫–æ–≤–æ–≥–æ –∑—Ä–æ—Å—Ç–∞–Ω–Ω—è</li>
                <li><strong>c</strong> - —à–≤–∏–¥–∫—ñ—Å—Ç—å –≤–∏—Ö–æ–¥—É –Ω–∞ –ø–ª–∞—Ç–æ (churn rate)</li>
                <li><strong>d</strong> - –±–∞–∑–æ–≤–µ –∑–º—ñ—â–µ–Ω–Ω—è</li>
            </ul>
        </div>
        
        <h3>–ü–∞—Ä–∞–º–µ—Ç—Ä–∏ –º–æ–¥–µ–ª–µ–π –ø–æ –∫–æ–≥–æ—Ä—Ç–∞—Ö (—Ç–æ–ø 10)</h3>
        <table>
            <tr>
                <th>–ö–æ–≥–æ—Ä—Ç–∞</th>
                <th>a (–ø–ª–∞—Ç–æ)</th>
                <th>b (–∑—Ä–æ—Å—Ç–∞–Ω–Ω—è)</th>
                <th>c (churn)</th>
                <th>d (–±–∞–∑–∞)</th>
            </tr>
"""

for i, (cohort, params) in enumerate(list(model_params.items())[:10]):
    html_content += f"""
            <tr>
                <td>{cohort.strftime('%Y-%m-%d')}</td>
                <td>{params[0]:.2f}</td>
                <td>{params[1]:.4f}</td>
                <td>{params[2]:.4f}</td>
                <td>{params[3]:.2f}</td>
            </tr>
"""

html_content += f"""
        </table>
        
        <h2>3. –Ø–∫—ñ—Å—Ç—å –º–æ–¥–µ–ª—ñ</h2>
        <table>
            <tr>
                <th>–ú–µ—Ç—Ä–∏–∫–∞</th>
                <th>–ó–Ω–∞—á–µ–Ω–Ω—è</th>
                <th>–û–ø–∏—Å</th>
            </tr>
            <tr>
                <td><strong>R¬≤ Score</strong></td>
                <td class="{quality_class}">{r2:.4f}</td>
                <td>–ö–æ–µ—Ñ—ñ—Ü—ñ—î–Ω—Ç –¥–µ—Ç–µ—Ä–º—ñ–Ω–∞—Ü—ñ—ó</td>
            </tr>
            <tr>
                <td><strong>MAE</strong></td>
                <td>${mae:.2f}</td>
                <td>–°–µ—Ä–µ–¥–Ω—è –∞–±—Å–æ–ª—é—Ç–Ω–∞ –ø–æ–º–∏–ª–∫–∞</td>
            </tr>
            <tr>
                <td><strong>RMSE</strong></td>
                <td>${rmse:.2f}</td>
                <td>–ö–æ—Ä—ñ–Ω—å —Å–µ—Ä–µ–¥–Ω—å–æ–∫–≤–∞–¥—Ä–∞—Ç–∏—á–Ω–æ—ó –ø–æ–º–∏–ª–∫–∏</td>
            </tr>
            <tr>
                <td><strong>MAPE</strong></td>
                <td>{mape:.2f}%</td>
                <td>–°–µ—Ä–µ–¥–Ω—è –∞–±—Å–æ–ª—é—Ç–Ω–∞ –ø—Ä–æ—Ü–µ–Ω—Ç–Ω–∞ –ø–æ–º–∏–ª–∫–∞</td>
            </tr>
        </table>
        
        <div class="info-box">
            <h4 class="{quality_class}">‚úì {quality_text} –Ø–ö–Ü–°–¢–¨ –ú–û–î–ï–õ–Ü (R¬≤ = {r2:.4f})</h4>
            <p>–ú–æ–¥–µ–ª—å –ø–æ—è—Å–Ω—é—î {r2*100:.1f}% –≤–∞—Ä—ñ–∞—Ü—ñ—ó –≤ –¥–∞–Ω–∏—Ö LTV.</p>
        </div>
        
        <h2>4. –ü—Ä–æ–≥–Ω–æ–∑ LTV –Ω–∞ 52 —Ç–∏–∂–Ω—ñ</h2>
        <div class="metric">
            <div class="metric-label">–°–µ—Ä–µ–¥–Ω—ñ–π LTV (52 —Ç–∏–∂–Ω—ñ)</div>
            <div class="metric-value">${ltv_52.mean():.2f}</div>
        </div>
        <div class="metric">
            <div class="metric-label">–ú–µ–¥—ñ–∞–Ω–Ω–∏–π LTV</div>
            <div class="metric-value">${ltv_52.median():.2f}</div>
        </div>
        <div class="metric">
            <div class="metric-label">–î—ñ–∞–ø–∞–∑–æ–Ω LTV</div>
            <div class="metric-value">${ltv_52.min():.2f} - ${ltv_52.max():.2f}</div>
        </div>
        <div class="metric">
            <div class="metric-label">–ü—Ä–æ–≥–Ω–æ–∑–Ω–∏–π —Å—É–∫—É–ø–Ω–∏–π –¥–æ—Ö—ñ–¥</div>
            <div class="metric-value">${total_revenue_52:,.0f}</div>
        </div>
"""

if last_week is not None:
    html_content += f"""
        <div class="info-box">
            <h4>üìä –ü–æ—Ä—ñ–≤–Ω—è–Ω–Ω—è –∑ —Ñ–∞–∫—Ç–∏—á–Ω–∏–º–∏ –¥–∞–Ω–∏–º–∏</h4>
            <p><strong>–û—Å—Ç–∞–Ω–Ω—ñ–π —Ñ–∞–∫—Ç–∏—á–Ω–∏–π LTV (—Ç–∏–∂–¥–µ–Ω—å {int(last_week_num)}):</strong> ${avg_last_ltv:.2f}</p>
            <p><strong>–ü—Ä–æ–≥–Ω–æ–∑ –Ω–∞ 52 —Ç–∏–∂–Ω—ñ:</strong> ${ltv_52.mean():.2f}</p>
            <p><strong>–û—á—ñ–∫—É–≤–∞–Ω–∏–π –ø—Ä–∏—Ä—ñ—Å—Ç:</strong> ${ltv_52.mean() - avg_last_ltv:.2f} (<span class="highlight">{growth_expected:+.1f}%</span>)</p>
            <p><strong>–Ü–Ω—Ç–µ—Ä–ø—Ä–µ—Ç–∞—Ü—ñ—è:</strong> –ú–æ–¥–µ–ª—å –ø–µ—Ä–µ–¥–±–∞—á–∞—î {'–≤–∏—Ö—ñ–¥ –Ω–∞ –ø–ª–∞—Ç–æ' if growth_expected < 30 else '–ø–æ–º—ñ—Ä–Ω–µ –∑—Ä–æ—Å—Ç–∞–Ω–Ω—è'}</p>
        </div>
"""

cohort_predictions = week_52_data.groupby('cohort_week').agg({
    'ltv_predicted': 'mean',
    'cohort_size': 'first'
}).reset_index()
cohort_predictions['total_revenue'] = cohort_predictions['ltv_predicted'] * cohort_predictions['cohort_size']
cohort_predictions = cohort_predictions.sort_values('cohort_week')

html_content += f"""
        <h3>–ü—Ä–æ–≥–Ω–æ–∑ –ø–æ –∫–æ–≥–æ—Ä—Ç–∞—Ö (52 —Ç–∏–∂–Ω—ñ)</h3>
        <table>
            <tr>
                <th>–ö–æ–≥–æ—Ä—Ç–∞</th>
                <th>–†–æ–∑–º—ñ—Ä</th>
                <th>LTV (52 —Ç–∏–∂–Ω—ñ)</th>
                <th>–°—É–∫—É–ø–Ω–∏–π revenue</th>
            </tr>
"""

for _, row in cohort_predictions.head(15).iterrows():
    html_content += f"""
            <tr>
                <td>{row['cohort_week'].strftime('%Y-%m-%d')}</td>
                <td>{int(row['cohort_size']):,}</td>
                <td>${row['ltv_predicted']:.2f}</td>
                <td>${row['total_revenue']:,.0f}</td>
            </tr>
"""

html_content += f"""
        </table>
        
        <h2>5. –í—ñ–∑—É–∞–ª—ñ–∑–∞—Ü—ñ—è</h2>
        <img src="ltv_analysis_charts.png" alt="LTV Analysis Charts">
        
        <h2>6. –í–∏—Å–Ω–æ–≤–∫–∏</h2>
        <div class="info-box">
            <h4>üéì –ö–ª—é—á–æ–≤—ñ –≤–∏—Å–Ω–æ–≤–∫–∏:</h4>
            <ul>
                <li>–ú–æ–¥–µ–ª—å <strong>Shifted Gompertz Curve</strong> –¥–æ—Å—è–≥–ª–∞ —è–∫–æ—Å—Ç—ñ R¬≤ = {r2:.4f}</li>
                <li>–ü—Ä–æ–≥–Ω–æ–∑–Ω–∏–π LTV –Ω–∞ —Ä—ñ–∫: <strong>${ltv_52.mean():.2f}</strong></li>
                <li>–ü—Ä–æ–≥–Ω–æ–∑–Ω–∏–π —Å—É–∫—É–ø–Ω–∏–π –¥–æ—Ö—ñ–¥: <strong>${total_revenue_52:,.0f}</strong></li>
                <li>–ú–æ–¥–µ–ª—å –≤—Ä–∞—Ö–æ–≤—É—î –ø—Ä–∏—Ä–æ–¥–Ω–∏–π churn —Ç–∞ –≤–∏—Ö—ñ–¥ –Ω–∞ –ø–ª–∞—Ç–æ</li>
            </ul>
            
            <h4>üí° –†–µ–∫–æ–º–µ–Ω–¥–∞—Ü—ñ—ó:</h4>
            <ul>
                <li><strong>–ü–æ–∫—Ä–∞—â–∏—Ç–∏ retention</strong> –Ω–∞ —Ä–∞–Ω–Ω—ñ—Ö —Ç–∏–∂–Ω—è—Ö —á–µ—Ä–µ–∑ onboarding</li>
                <li><strong>–ó–±—ñ–ª—å—à–∏—Ç–∏ ARPU</strong> —á–µ—Ä–µ–∑ upselling —Ç–∞ premium features</li>
                <li><strong>–°–µ–≥–º–µ–Ω—Ç–∞—Ü—ñ—è</strong> - –æ–∫—Ä–µ–º—ñ –º–æ–¥–µ–ª—ñ –¥–ª—è —Ä—ñ–∑–Ω–∏—Ö —Å–µ–≥–º–µ–Ω—Ç—ñ–≤</li>
                <li><strong>–ú–æ–Ω—ñ—Ç–æ—Ä–∏–Ω–≥</strong> - –≤—ñ–¥—Å—Ç–µ–∂—É–≤–∞—Ç–∏ LTV –ø–æ –∫–æ–≥–æ—Ä—Ç–∞—Ö —â–æ—Ç–∏–∂–Ω—è</li>
            </ul>
        </div>
    </div>
</body>
</html>
"""

with open('ltv_prediction_report_2.html', 'w', encoding='utf-8') as f:
    f.write(html_content)

print("HTML –∑–≤—ñ—Ç –∑–±–µ—Ä–µ–∂–µ–Ω–æ: ltv_prediction_report_2.html")
print("\n–î–ª—è –ø–µ—Ä–µ–≥–ª—è–¥—É –≤—ñ–¥–∫—Ä–∏–π—Ç–µ —Ñ–∞–π–ª ltv_prediction_report_2.html —É –±—Ä–∞—É–∑–µ—Ä—ñ")
print("\n" + "="*80)

LTV PREDICTION ANALYSIS - SHIFTED GOMPERTZ CURVE ‚Äî Hanna Todorova

1. –ü–Ü–î–ì–û–¢–û–í–ö–ê –î–ê–ù–ò–•
--------------------------------------------------------------------------------
–ó–∞–≥–∞–ª—å–Ω–∞ –∫—ñ–ª—å–∫—ñ—Å—Ç—å —Ç—Ä–∞–Ω–∑–∞–∫—Ü—ñ–π: 6,086
–ö—ñ–ª—å–∫—ñ—Å—Ç—å —É–Ω—ñ–∫–∞–ª—å–Ω–∏—Ö –∫–æ—Ä–∏—Å—Ç—É–≤–∞—á—ñ–≤: 934
–ü–µ—Ä—ñ–æ–¥ –¥–∞–Ω–∏—Ö: 2023-02-26 00:00:00 - 2023-10-08 00:00:00
–ú–∞–∫—Å–∏–º–∞–ª—å–Ω–∏–π lifetime_weeks: 32


2. –†–û–ó–†–ê–•–£–ù–û–ö –§–ê–ö–¢–ò–ß–ù–û–ì–û LTV (–ö–£–ú–£–õ–Ø–¢–ò–í–ù–ò–ô)
--------------------------------------------------------------------------------

–ü—Ä–∏–∫–ª–∞–¥ —Ñ–∞–∫—Ç–∏—á–Ω–æ–≥–æ LTV (–ø–µ—Ä—à–∞ –∫–æ–≥–æ—Ä—Ç–∞, –ø–µ—Ä—à—ñ 8 —Ç–∏–∂–Ω—ñ–≤):
 lifetime_weeks     ltv  active_users  retention
              0  2.4900            20       1.00
              1  3.9840            12       0.60
              2  5.2290            10       0.50
              3  6.4740            10       0.50
              4  7.4700             8       0.40
              5  8.3