In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
from statsmodels.stats import proportion
from statsmodels.stats.power import NormalIndPower
import warnings
warnings.filterwarnings('ignore')


In [None]:
# –ù–∞—Å—Ç—Ä–æ–π–∫–∞ –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–π
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 12

In [None]:
np.random.seed(42)


#df = pd.read_csv('ab_test_data.csv')



In [None]:
n_users = 10000  
days = 14  
dates = pd.date_range(start='2024-01-01', periods=days, freq='D')
user_ids = range(1, n_users + 1)
groups = np.random.choice(['A', 'B'], size=n_users, p=[0.5, 0.5])

In [None]:
conversion_probs = np.where(groups == 'A', 0.117, 0.125)
conversions = np.random.binomial(1, conversion_probs)

In [None]:
revenue = np.zeros(n_users)
purchased_idx = np.where(conversions == 1)[0]
revenue[purchased_idx] = np.random.lognormal(mean=8.5, sigma=1.2, size=len(purchased_idx))

session_duration = np.random.exponential(30, n_users) + 10

In [None]:
df = pd.DataFrame({
    'user_id': user_ids,
    'date': np.random.choice(dates, n_users),
    'group': groups,
    'conversion': conversions,
    'revenue': revenue,
    'session_duration': session_duration
})

In [None]:
traffic_sources = ['organic', 'direct', 'social', 'email', 'paid']
df['traffic_source'] = np.random.choice(traffic_sources, n_users, p=[0.4, 0.3, 0.15, 0.1, 0.05])

In [None]:
print("–†–∞–∑–º–µ—Ä –¥–∞—Ç–∞—Å–µ—Ç–∞:", df.shape)
print("\n –ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫:")
print(df.head())

print("\n–ë–∞–∑–æ–≤–∞—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –æ –¥–∞–Ω–Ω—ã—Ö:")
print(df.info())

print("\n –û–ø–∏—Å–∞—Ç–µ–ª—å–Ω–∞—è —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞:")
print(df.describe())

In [None]:
print("–ü—Ä–æ–≤–µ—Ä–∫–∞ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –ø–æ –≥—Ä—É–ø–ø–∞–º:")
group_distribution = df['group'].value_counts()
print(group_distribution)
print(f"\n–î–æ–ª—è –≥—Ä—É–ø–ø—ã A: {group_distribution['A']/len(df):.2%}")
print(f"–î–æ–ª—è –≥—Ä—É–ø–ø—ã B: {group_distribution['B']/len(df):.2%}")

In [None]:
print("\n –ü—Ä–æ–≤–µ—Ä–∫–∞ —Å–ø–ª–∏—Ç–æ–≤–∞–Ω–∏—è –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º —Ç—Ä–∞—Ñ–∏–∫–∞:")
traffic_split = pd.crosstab(df['traffic_source'], df['group'], normalize='columns')
print(traffic_split * 100)

In [None]:
chi2, p_value, _, _ = stats.chi2_contingency(pd.crosstab(df['traffic_source'], df['group']))
print(f"\n –•–∏-–∫–≤–∞–¥—Ä–∞—Ç —Ç–µ—Å—Ç –Ω–∞ —Ä–∞–≤–Ω–æ–º–µ—Ä–Ω–æ—Å—Ç—å —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è:")
print(f"œá¬≤ = {chi2:.3f}, p-value = {p_value:.3f}")
print("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —Ä–∞–≤–Ω–æ–º–µ—Ä–Ω–æ–µ" if p_value > 0.05 else "–í–æ–∑–º–æ–∂–Ω—ã –ø—Ä–æ–±–ª–µ–º—ã —Å–æ —Å–ø–ª–∏—Ç–æ–≤–∞–Ω–∏–µ–º")

# %%
print("\n –ü—Ä–æ–≤–µ—Ä–∫–∞ –≤—Ä–µ–º–µ–Ω–Ω–æ–≥–æ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è:")
df['date'] = pd.to_datetime(df['date'])
df['day_of_week'] = df['date'].dt.day_name()
df['is_weekend'] = df['date'].dt.dayofweek >= 5

In [None]:
weekend_split = pd.crosstab(df['is_weekend'], df['group'], normalize='columns')
print("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –≤—ã—Ö–æ–¥–Ω—ã–º/–±—É–¥–Ω—è–º (%):")
print(weekend_split * 100)

In [None]:
def calculate_ci(data, confidence=0.95):
    mean = np.mean(data)
    sem = stats.sem(data)
    margin = sem * stats.t.ppf((1 + confidence) / 2., len(data)-1)
    return mean, mean - margin, mean + margin

In [None]:
metrics = {}

for group in ['A', 'B']:
    group_data = df[df['group'] == group]
    n_users = len(group_data)
    n_conversions = group_data['conversion'].sum()
    conversion_rate = n_conversions / n_users
    total_revenue = group_data['revenue'].sum()
    avg_revenue_per_user = total_revenue / n_users
    avg_revenue_per_paying = total_revenue / n_conversions if n_conversions > 0 else 0
    
    avg_session_duration = group_data['session_duration'].mean()
    ci_low, ci_high = proportion.proportion_confint(
        n_conversions, n_users, alpha=0.05, method='wilson'
    )
    
    metrics[group] = {
        'n_users': n_users,
        'n_conversions': n_conversions,
        'conversion_rate': conversion_rate,
        'conversion_ci_low': ci_low,
        'conversion_ci_high': ci_high,
        'total_revenue': total_revenue,
        'avg_revenue_per_user': avg_revenue_per_user,
        'avg_revenue_per_paying': avg_revenue_per_paying,
        'avg_session_duration': avg_session_duration
    }

metrics_df = pd.DataFrame(metrics).T
print("üìä –û—Å–Ω–æ–≤–Ω—ã–µ –º–µ—Ç—Ä–∏–∫–∏ –ø–æ –≥—Ä—É–ø–ø–∞–º:")
print(metrics_df[['n_users', 'n_conversions', 'conversion_rate', 
                  'avg_revenue_per_user', 'avg_session_duration']])


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))


In [None]:
ax1 = axes[0, 0]
groups = ['A', 'B']
conversion_rates = [metrics[g]['conversion_rate'] for g in groups]
ci_lows = [metrics[g]['conversion_ci_low'] for g in groups]
ci_highs = [metrics[g]['conversion_ci_high'] for g in groups]

bars = ax1.bar(groups, conversion_rates, color=['#3498db', '#2ecc71'], alpha=0.7)
ax1.errorbar(groups, conversion_rates, 
            yerr=[np.array(conversion_rates) - ci_lows, ci_highs - np.array(conversion_rates)],
            fmt='none', color='black', capsize=10, linewidth=2)


In [None]:
for i, (bar, rate) in enumerate(zip(bars, conversion_rates)):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001,
            f'{rate:.2%}', ha='center', va='bottom', fontsize=12, fontweight='bold')

ax1.set_ylabel('–ö–æ–Ω–≤–µ—Ä—Å–∏—è (%)', fontsize=12)
ax1.set_title('–ö–æ–Ω–≤–µ—Ä—Å–∏—è –≤ –ø–æ–∫—É–ø–∫—É –ø–æ –≥—Ä—É–ø–ø–∞–º (—Å 95% –¥–æ–≤–µ—Ä–∏—Ç–µ–ª—å–Ω—ã–º–∏ –∏–Ω—Ç–µ—Ä–≤–∞–ª–∞–º–∏)', 
              fontsize=14, fontweight='bold', pad=20)
ax1.set_ylim(0, max(conversion_rates) * 1.2)
ax1.grid(True, alpha=0.3)

In [None]:
ax2 = axes[0, 1]
daily_conversion = df.groupby(['date', 'group'])['conversion'].mean().unstack()
daily_conversion.plot(ax=ax2, linewidth=3, marker='o')
ax2.set_ylabel('–ö–æ–Ω–≤–µ—Ä—Å–∏—è (%)', fontsize=12)
ax2.set_title('–î–∏–Ω–∞–º–∏–∫–∞ –∫–æ–Ω–≤–µ—Ä—Å–∏–∏ –ø–æ –¥–Ω—è–º', fontsize=14, fontweight='bold', pad=20)
ax2.legend(title='–ì—Ä—É–ø–ø–∞')
ax2.grid(True, alpha=0.3)
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45)

In [None]:
ax3 = axes[1, 0]
revenue_data_a = df[df['group'] == 'A']['revenue']
revenue_data_b = df[df['group'] == 'B']['revenue']

paying_a = revenue_data_a[revenue_data_a > 0]
paying_b = revenue_data_b[revenue_data_b > 0]

ax3.hist([paying_a, paying_b], bins=30, alpha=0.7, 
         label=['–ì—Ä—É–ø–ø–∞ A', '–ì—Ä—É–ø–ø–∞ B'], density=True)
ax3.set_xlabel('–î–æ—Ö–æ–¥ —Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è (—Ä—É–±.)', fontsize=12)
ax3.set_ylabel('–ü–ª–æ—Ç–Ω–æ—Å—Ç—å', fontsize=12)
ax3.set_title('–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –¥–æ—Ö–æ–¥–∞ —Å –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π', fontsize=14, fontweight='bold', pad=20)
ax3.legend()
ax3.grid(True, alpha=0.3)

In [None]:
ax4 = axes[1, 1]
metrics_comparison = pd.DataFrame({
    '–ö–æ–Ω–≤–µ—Ä—Å–∏—è': [metrics['A']['conversion_rate'], metrics['B']['conversion_rate']],
    '–°—Ä. —á–µ–∫': [metrics['A']['avg_revenue_per_paying'], metrics['B']['avg_revenue_per_paying']],
    '–î–æ—Ö–æ–¥ –Ω–∞ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è': [metrics['A']['avg_revenue_per_user'], metrics['B']['avg_revenue_per_user']]
}, index=['–ì—Ä—É–ø–ø–∞ A', '–ì—Ä—É–ø–ø–∞ B'])



In [None]:
metrics_normalized = metrics_comparison / metrics_comparison.max()

In [None]:
angles = np.linspace(0, 2*np.pi, len(metrics_normalized.columns), endpoint=False).tolist()
angles += angles[:1]  # –ó–∞–º—ã–∫–∞–µ–º –∫—Ä—É–≥

for idx, group in enumerate(metrics_normalized.index):
    values = metrics_normalized.loc[group].values.tolist()
    values += values[:1]
    ax4.plot(angles, values, 'o-', linewidth=2, label=group)
    ax4.fill(angles, values, alpha=0.1)

ax4.set_xticks(angles[:-1])
ax4.set_xticklabels(metrics_normalized.columns)
ax4.set_title('–°—Ä–∞–≤–Ω–µ–Ω–∏–µ –º–µ—Ç—Ä–∏–∫ (–Ω–æ—Ä–º–∞–ª–∏–∑–æ–≤–∞–Ω–Ω—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è)', 
              fontsize=14, fontweight='bold', pad=20)
ax4.legend(loc='upper right')
ax4.grid(True)
plt.tight_layout()
plt.show()

In [None]:
print("–°–¢–ê–¢–ò–°–¢–ò–ß–ï–°–ö–ò–ô –ê–ù–ê–õ–ò–ó –†–ê–ó–õ–ò–ß–ò–ô")
print("="*50)

# 6.1 Z-—Ç–µ—Å—Ç –¥–ª—è —Ä–∞–∑–Ω–∏—Ü—ã –¥–æ–ª–µ–π (–∫–æ–Ω–≤–µ—Ä—Å–∏—è)
print("\n1. Z-—Ç–µ—Å—Ç –¥–ª—è —Ä–∞–∑–Ω–∏—Ü—ã –∫–æ–Ω–≤–µ—Ä—Å–∏–π:")
success_a, nobs_a = metrics['A']['n_conversions'], metrics['A']['n_users']
success_b, nobs_b = metrics['B']['n_conversions'], metrics['B']['n_users']

In [None]:
z_score, p_value = proportion.proportions_ztest(
    [success_a, success_b], 
    [nobs_a, nobs_b],
    alternative='smaller'
)

print(f"   –ö–æ–Ω–≤–µ—Ä—Å–∏—è A: {metrics['A']['conversion_rate']:.3%}")
print(f"   –ö–æ–Ω–≤–µ—Ä—Å–∏—è B: {metrics['B']['conversion_rate']:.3%}")
print(f"   –ê–±—Å–æ–ª—é—Ç–Ω–∞—è —Ä–∞–∑–Ω–∏—Ü–∞: {(metrics['B']['conversion_rate'] - metrics['A']['conversion_rate']):.3%}")
print(f"   –û—Ç–Ω–æ—Å–∏—Ç–µ–ª—å–Ω–∞—è —Ä–∞–∑–Ω–∏—Ü–∞: {(metrics['B']['conversion_rate']/metrics['A']['conversion_rate'] - 1):.2%}")
print(f"   Z-—Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞: {z_score:.3f}")
print(f"   P-value: {p_value:.4f}")

In [None]:
ci_low, ci_high = proportion.confint_proportions_2indep(
    success_b, nobs_b, success_a, nobs_a, method='wald'
)
print(f"   95% –î–ò –¥–ª—è —Ä–∞–∑–Ω–∏—Ü—ã: [{ci_low:.4f}, {ci_high:.4f}]")

print(f"\n   –í—ã–≤–æ–¥: –†–∞–∑–Ω–∏—Ü–∞ {'–ù–ï ' if p_value > 0.05 else ''}—è–≤–ª—è–µ—Ç—Å—è —Å—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏ –∑–Ω–∞—á–∏–º–æ–π (Œ±=0.05)")

In [None]:
# T-—Ç–µ—Å—Ç –¥–ª—è —Å—Ä–µ–¥–Ω–µ–≥–æ —á–µ–∫–∞ (—Ç–æ–ª—å–∫–æ –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏)
print("\n2. T-—Ç–µ—Å—Ç –¥–ª—è —Å—Ä–µ–¥–Ω–µ–≥–æ —á–µ–∫–∞:")

paying_users_a = df[(df['group'] == 'A') & (df['conversion'] == 1)]['revenue']
paying_users_b = df[(df['group'] == 'B') & (df['conversion'] == 1)]['revenue']

if len(paying_users_a) > 1 and len(paying_users_b) > 1:
    t_stat, p_value_t = stats.ttest_ind(paying_users_a, paying_users_b, equal_var=False)
    
    print(f"   –°—Ä–µ–¥–Ω–∏–π —á–µ–∫ A: {metrics['A']['avg_revenue_per_paying']:.2f} —Ä—É–±.")
    print(f"   –°—Ä–µ–¥–Ω–∏–π —á–µ–∫ B: {metrics['B']['avg_revenue_per_paying']:.2f} —Ä—É–±.")
    print(f"   –†–∞–∑–Ω–∏—Ü–∞: {(metrics['B']['avg_revenue_per_paying'] - metrics['A']['avg_revenue_per_paying']):.2f} —Ä—É–±.")
    print(f"   T-—Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞: {t_stat:.3f}")
    print(f"   P-value: {p_value_t:.4f}")
    print(f"    –í—ã–≤–æ–¥: –†–∞–∑–Ω–∏—Ü–∞ –≤ —Å—Ä–µ–¥–Ω–µ–º —á–µ–∫–µ {'–ù–ï ' if p_value_t > 0.05 else ''}—è–≤–ª—è–µ—Ç—Å—è —Å—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏ –∑–Ω–∞—á–∏–º–æ–π")
else:
    print("   –ù–µ–¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è t-—Ç–µ—Å—Ç–∞")


In [None]:
# –†–∞—Å—á–µ—Ç –º–æ—â–Ω–æ—Å—Ç–∏ —Ç–µ—Å—Ç–∞
print("\n3. –ê–Ω–∞–ª–∏–∑ –º–æ—â–Ω–æ—Å—Ç–∏ —Ç–µ—Å—Ç–∞:")

effect_size = proportion.proportion_effectsize(
    metrics['A']['conversion_rate'], 
    metrics['B']['conversion_rate']
)

power_analysis = NormalIndPower()
required_n = power_analysis.solve_power(
    effect_size=effect_size,
    power=0.8,
    alpha=0.05,
    ratio=metrics['B']['n_users']/metrics['A']['n_users']
)

actual_power = power_analysis.solve_power(
    effect_size=effect_size,
    nobs1=metrics['A']['n_users'],
    alpha=0.05,
    ratio=metrics['B']['n_users']/metrics['A']['n_users']
)

print(f"   –†–∞–∑–º–µ—Ä —ç—Ñ—Ñ–µ–∫—Ç–∞ (Cohen's h): {effect_size:.3f}")
print(f"   –¢—Ä–µ–±—É–µ–º—ã–π —Ä–∞–∑–º–µ—Ä –≤—ã–±–æ—Ä–∫–∏ –¥–ª—è –º–æ—â–Ω–æ—Å—Ç–∏ 80%: {int(required_n)} –Ω–∞ –≥—Ä—É–ø–ø—É")
print(f"   –§–∞–∫—Ç–∏—á–µ—Å–∫–∞—è –º–æ—â–Ω–æ—Å—Ç—å —Ç–µ—Å—Ç–∞: {actual_power:.2%}")
print(f"   –§–∞–∫—Ç–∏—á–µ—Å–∫–∏–π —Ä–∞–∑–º–µ—Ä –≤—ã–±–æ—Ä–∫–∏: {metrics['A']['n_users']} (A), {metrics['B']['n_users']} (B)")

if actual_power < 0.8:
    print("     –í–Ω–∏–º–∞–Ω–∏–µ: –ú–æ—â–Ω–æ—Å—Ç—å —Ç–µ—Å—Ç–∞ –Ω–∏–∂–µ —Ä–µ–∫–æ–º–µ–Ω–¥—É–µ–º–æ–≥–æ —É—Ä–æ–≤–Ω—è 80%")
    print("   –†–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏—è: –£–≤–µ–ª–∏—á–∏—Ç—å –¥–ª–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å —Ç–µ—Å—Ç–∞ –∏–ª–∏ —Ä–∞–∑–º–µ—Ä –≤—ã–±–æ—Ä–∫–∏")


In [None]:
# ## –ê–Ω–∞–ª–∏–∑ —á—É–≤—Å—Ç–≤–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏ –∏ –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–µ –ø—Ä–æ–≤–µ—Ä–∫–∏

In [None]:
# –ê–Ω–∞–ª–∏–∑ –ø–æ —Å–µ–≥–º–µ–Ω—Ç–∞–º
print("üìà –ê–ù–ê–õ–ò–ó –ü–û –°–ï–ì–ú–ï–ù–¢–ê–ú")
print("="*50)

segments = ['traffic_source', 'is_weekend']
for segment in segments:
    print(f"\n –ü–æ —Å–µ–≥–º–µ–Ω—Ç—É: {segment}")
    segment_results = []
    
    for value in df[segment].unique():
        segment_data = df[df[segment] == value]
        if len(segment_data) < 50:  # –ü—Ä–æ–ø—É—Å–∫–∞–µ–º –º–∞–ª–µ–Ω—å–∫–∏–µ —Å–µ–≥–º–µ–Ω—Ç—ã
            continue
            
        conv_a = segment_data[segment_data['group'] == 'A']['conversion'].mean()
        conv_b = segment_data[segment_data['group'] == 'B']['conversion'].mean()
        
        if not (np.isnan(conv_a) or np.isnan(conv_b)):
            segment_results.append({
                'segment_value': value,
                'conv_a': conv_a,
                'conv_b': conv_b,
                'abs_diff': conv_b - conv_a,
                'rel_diff': (conv_b / conv_a - 1) if conv_a > 0 else 0
            })
    
    results_df = pd.DataFrame(segment_results)
    if not results_df.empty:
        print(results_df.sort_values('abs_diff', ascending=False).to_string(index=False))
        
        # –ü—Ä–æ–≤–µ—Ä—è–µ–º, –µ—Å—Ç—å –ª–∏ —Å–µ–≥–º–µ–Ω—Ç—ã —Å –ø—Ä–æ—Ç–∏–≤–æ–ø–æ–ª–æ–∂–Ω—ã–º —ç—Ñ—Ñ–µ–∫—Ç–æ–º
        mixed_effects = (results_df['abs_diff'] > 0).any() and (results_df['abs_diff'] < 0).any()
        if mixed_effects:
            print("     –û–±–Ω–∞—Ä—É–∂–µ–Ω—ã —Å–µ–≥–º–µ–Ω—Ç—ã —Å –ø—Ä–æ—Ç–∏–≤–æ–ø–æ–ª–æ–∂–Ω—ã–º —ç—Ñ—Ñ–µ–∫—Ç–æ–º!")


In [None]:
# –ê–Ω–∞–ª–∏–∑ –≤–æ—Ä–æ–Ω–∫–∏ (–µ—Å–ª–∏ –µ—Å—Ç—å –¥–∞–Ω–Ω—ã–µ –æ –¥–µ–π—Å—Ç–≤–∏—è—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π)
print("\nüìä –ê–ù–ê–õ–ò–ó –ü–û–í–ï–î–ï–ù–ß–ï–°–ö–ò–• –ú–ï–¢–†–ò–ö")
print("="*50)

print("1. –í—Ä–µ–º—è –Ω–∞ —Å–∞–π—Ç–µ:")
duration_a = df[df['group'] == 'A']['session_duration']
duration_b = df[df['group'] == 'B']['session_duration']

t_stat_dur, p_val_dur = stats.ttest_ind(duration_a, duration_b, equal_var=False)
print(f"   –ì—Ä—É–ø–ø–∞ A: {duration_a.mean():.1f} –º–∏–Ω.")
print(f"   –ì—Ä—É–ø–ø–∞ B: {duration_b.mean():.1f} –º–∏–Ω.")
print(f"   P-value: {p_val_dur:.4f}")
print(f"   –í—ã–≤–æ–¥: {'–ï—Å—Ç—å' if p_val_dur < 0.05 else '–ù–µ—Ç'} –∑–Ω–∞—á–∏–º–æ–π —Ä–∞–∑–Ω–∏—Ü—ã –≤–æ –≤—Ä–µ–º–µ–Ω–∏ –Ω–∞ —Å–∞–π—Ç–µ")

In [None]:
 ## 8. –ë–∏–∑–Ω–µ—Å-–æ—Ü–µ–Ω–∫–∞ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤

In [None]:

print(" –ë–ò–ó–ù–ï–°-–û–¶–ï–ù–ö–ê –†–ï–ó–£–õ–¨–¢–ê–¢–û–í")
print("="*50)

# –†–∞—Å—á–µ—Ç –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª—å–Ω–æ–≥–æ —ç—Ñ—Ñ–µ–∫—Ç–∞
baseline_conversion = metrics['A']['conversion_rate']
new_conversion = metrics['B']['conversion_rate']
monthly_users = 100000  # –ø—Ä–∏–º–µ—Ä–Ω–æ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –≤ –º–µ—Å—è—Ü
avg_order_value = metrics['A']['avg_revenue_per_paying']

if p_value < 0.05:  # –ï—Å–ª–∏ —ç—Ñ—Ñ–µ–∫—Ç –∑–Ω–∞—á–∏–º
    additional_conversions = monthly_users * 0.5 * (new_conversion - baseline_conversion)
    additional_revenue = additional_conversions * avg_order_value
    
    print(f"\n –ü—Ä–∏ –≤–Ω–µ–¥—Ä–µ–Ω–∏–∏ –∏–∑–º–µ–Ω–µ–Ω–∏–π:")
    print(f"   –î–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã—Ö –ø–æ–∫—É–ø–æ–∫ –≤ –º–µ—Å—è—Ü: {additional_conversions:.0f}")
    print(f"   –î–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–π –¥–æ—Ö–æ–¥ –≤ –º–µ—Å—è—Ü: {additional_revenue:,.0f} —Ä—É–±.")
    print(f"   –î–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–π –¥–æ—Ö–æ–¥ –≤ –≥–æ–¥: {additional_revenue * 12:,.0f} —Ä—É–±.")
    
    # –†–∞—Å—á–µ—Ç ROI (–µ—Å–ª–∏ –∏–∑–≤–µ—Å—Ç–Ω–∞ —Å—Ç–æ–∏–º–æ—Å—Ç—å –≤–Ω–µ–¥—Ä–µ–Ω–∏—è)
    implementation_cost = 500000  # –ø—Ä–∏–º–µ—Ä–Ω–∞—è —Å—Ç–æ–∏–º–æ—Å—Ç—å
    if implementation_cost > 0:
        months_to_roi = implementation_cost / additional_revenue
        print(f"   –°—Ä–æ–∫ –æ–∫—É–ø–∞–µ–º–æ—Å—Ç–∏: {months_to_roi:.1f} –º–µ—Å—è—Ü–µ–≤")
else:
    print(f"\n  –°—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏ –∑–Ω–∞—á–∏–º–æ–≥–æ —ç—Ñ—Ñ–µ–∫—Ç–∞ –Ω–µ –æ–±–Ω–∞—Ä—É–∂–µ–Ω–æ")
    print(f"   –í–Ω–µ–¥—Ä–µ–Ω–∏–µ –∏–∑–º–µ–Ω–µ–Ω–∏–π –º–æ–∂–µ—Ç –Ω–µ –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ —Ä–æ—Å—Ç—É –≤—ã—Ä—É—á–∫–∏")
    
    # –û—Ü–µ–Ω–∫–∞ —Ä–∏—Å–∫–∞ –ø–æ—Ç–µ—Ä—å
    worst_case_scenario = ci_low  # –Ω–∏–∂–Ω—è—è –≥—Ä–∞–Ω–∏—Ü–∞ –î–ò –¥–ª—è —Ä–∞–∑–Ω–∏—Ü—ã
    if worst_case_scenario < 0:
        potential_loss = monthly_users * 0.5 * abs(worst_case_scenario) * avg_order_value
        print(f"   –ü–æ—Ç–µ–Ω—Ü–∏–∞–ª—å–Ω—ã–µ –ø–æ—Ç–µ—Ä–∏ (worst-case): {potential_loss:,.0f} —Ä—É–±. –≤ –º–µ—Å—è—Ü")


In [None]:
 ## 9. –í—ã–≤–æ–¥—ã –∏ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏


print(" –ò–¢–û–ì–û–í–´–ï –í–´–í–û–î–´ –ò –†–ï–ö–û–ú–ï–ù–î–ê–¶–ò–ò")
print("="*50)

print(f"\n –°—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏–µ –≤—ã–≤–æ–¥—ã:")
print(f"   1. –ö–æ–Ω–≤–µ—Ä—Å–∏—è –≤ –≥—Ä—É–ø–ø–µ B –≤—ã—à–µ –Ω–∞ {(new_conversion/baseline_conversion - 1)*100:.1f}%")
print(f"   2. P-value = {p_value:.4f} ‚Üí {'–°—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏ –∑–Ω–∞—á–∏–º–æ' if p_value < 0.05 else '–ù–µ —Å—Ç–∞—Ç–∏—Å—Ç–∏—á–µ—Å–∫–∏ –∑–Ω–∞—á–∏–º–æ'}")
print(f"   3. –ú–æ—â–Ω–æ—Å—Ç—å —Ç–µ—Å—Ç–∞: {actual_power:.1%} ({'–¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ' if actual_power >= 0.8 else '–Ω–µ–¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ'})")

print(f"\nüìà –ë–∏–∑–Ω–µ—Å-–≤—ã–≤–æ–¥—ã:")
if p_value < 0.05:
    print("    –†–ï–ö–û–ú–ï–ù–î–ê–¶–ò–Ø: –í–Ω–µ–¥—Ä–∏—Ç—å –Ω–æ–≤—É—é —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ç–µ–ª—å–Ω—É—é —Å–∏—Å—Ç–µ–º—É")
    print(f"   –û–∂–∏–¥–∞–µ–º—ã–π —ç—Ñ—Ñ–µ–∫—Ç: +{additional_revenue:,.0f} —Ä—É–±./–º–µ—Å.")
else:
    print("     –†–ï–ö–û–ú–ï–ù–î–ê–¶–ò–Ø: –û—Ç–∫–ª–æ–Ω–∏—Ç—å –∏–∑–º–µ–Ω–µ–Ω–∏—è –∏–ª–∏ –ø—Ä–æ–≤–µ—Å—Ç–∏ –Ω–æ–≤—ã–π —Ç–µ—Å—Ç")
    print(f"   –ü—Ä–∏—á–∏–Ω–∞: –ù–µ–¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –¥–æ–∫–∞–∑–∞—Ç–µ–ª—å—Å—Ç–≤ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏")

print(f"\nüîç –†–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏ –ø–æ —Å–ª–µ–¥—É—é—â–∏–º —à–∞–≥–∞–º:")
if p_value < 0.05:
    print("   1. –ó–∞–ø–ª–∞–Ω–∏—Ä–æ–≤–∞—Ç—å –ø–æ—Å—Ç–µ–ø–µ–Ω–Ω—ã–π rollout (10% ‚Üí 50% ‚Üí 100%)")
    print("   2. –ú–æ–Ω–∏—Ç–æ—Ä–∏—Ç—å –∫–ª—é—á–µ–≤—ã–µ –º–µ—Ç—Ä–∏–∫–∏ –ø–æ—Å–ª–µ –≤–Ω–µ–¥—Ä–µ–Ω–∏—è")
    print("   3. –ü—Ä–æ–≤–µ—Å—Ç–∏ A/A —Ç–µ—Å—Ç —á–µ—Ä–µ–∑ –º–µ—Å—è—Ü –¥–ª—è –ø—Ä–æ–≤–µ—Ä–∫–∏ —Å—Ç–∞–±–∏–ª—å–Ω–æ—Å—Ç–∏")
else:
    print("   1. –ü—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä–æ–≤–∞—Ç—å –ø—Ä–∏—á–∏–Ω—ã –≤–æ–∑–º–æ–∂–Ω–æ–π –Ω–µ—ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ—Å—Ç–∏")
    print("   2. –ü—Ä–æ–≤–µ—Å—Ç–∏ –∫–∞—á–µ—Å—Ç–≤–µ–Ω–Ω–æ–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏–µ (—é–∑–∞–±–∏–ª–∏—Ç–∏-—Ç–µ—Å—Ç—ã)")
    print("   3. –†–∞–∑—Ä–∞–±–æ—Ç–∞—Ç—å –Ω–æ–≤—ã–µ –≥–∏–ø–æ—Ç–µ–∑—ã –¥–ª—è —É–ª—É—á—à–µ–Ω–∏—è")
    
if actual_power < 0.8:
    print("   4. –ü–æ–≤—Ç–æ—Ä–∏—Ç—å —Ç–µ—Å—Ç —Å –±–æ–ª—å—à–µ–π –≤—ã–±–æ—Ä–∫–æ–π –∏–ª–∏ –¥–ª–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å—é")



# –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –≤ —Ñ–∞–π–ª—ã
df.to_csv('ab_test_data.csv', index=False, encoding='utf-8-sig')


In [None]:
# –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Å–≤–æ–¥–Ω–æ–π —Ç–∞–±–ª–∏—Ü—ã —Å –º–µ—Ç—Ä–∏–∫–∞–º–∏
summary_report = pd.DataFrame({
    '–ú–µ—Ç—Ä–∏–∫–∞': [
        '–†–∞–∑–º–µ—Ä –≥—Ä—É–ø–ø—ã', 
        '–ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø–æ–∫—É–ø–æ–∫', 
        '–ö–æ–Ω–≤–µ—Ä—Å–∏—è', 
        '–ö–æ–Ω–≤–µ—Ä—Å–∏—è (–Ω–∏–∂–Ω—è—è –≥—Ä–∞–Ω–∏—Ü–∞ 95% –î–ò)',
        '–ö–æ–Ω–≤–µ—Ä—Å–∏—è (–≤–µ—Ä—Ö–Ω—è—è –≥—Ä–∞–Ω–∏—Ü–∞ 95% –î–ò)',
        '–û–±—â–∏–π –¥–æ—Ö–æ–¥',
        '–î–æ—Ö–æ–¥ –Ω–∞ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è',
        '–°—Ä–µ–¥–Ω–∏–π —á–µ–∫'
    ],
    '–ì—Ä—É–ø–ø–∞ A': [
        metrics['A']['n_users'],
        metrics['A']['n_conversions'],
        f"{metrics['A']['conversion_rate']:.3%}",
        f"{metrics['A']['conversion_ci_low']:.3%}",
        f"{metrics['A']['conversion_ci_high']:.3%}",
        f"{metrics['A']['total_revenue']:,.0f} —Ä—É–±.",
        f"{metrics['A']['avg_revenue_per_user']:.2f} —Ä—É–±.",
        f"{metrics['A']['avg_revenue_per_paying']:.2f} —Ä—É–±."
    ],
    '–ì—Ä—É–ø–ø–∞ B': [
        metrics['B']['n_users'],
        metrics['B']['n_conversions'],
        f"{metrics['B']['conversion_rate']:.3%}",
        f"{metrics['B']['conversion_ci_low']:.3%}",
        f"{metrics['B']['conversion_ci_high']:.3%}",
        f"{metrics['B']['total_revenue']:,.0f} —Ä—É–±.",
        f"{metrics['B']['avg_revenue_per_user']:.2f} —Ä—É–±.",
        f"{metrics['B']['avg_revenue_per_paying']:.2f} —Ä—É–±."
    ],
    '–†–∞–∑–Ω–∏—Ü–∞ (B - A)': [
        f"{metrics['B']['n_users'] - metrics['A']['n_users']}",
        f"{metrics['B']['n_conversions'] - metrics['A']['n_conversions']}",
        f"{(metrics['B']['conversion_rate'] - metrics['A']['conversion_rate']):.3%}",
        '-',
        '-',
        f"{(metrics['B']['total_revenue'] - metrics['A']['total_revenue']):,.0f} —Ä—É–±.",
        f"{(metrics['B']['avg_revenue_per_user'] - metrics['A']['avg_revenue_per_user']):.2f} —Ä—É–±.",
        f"{(metrics['B']['avg_revenue_per_paying'] - metrics['A']['avg_revenue_per_paying']):.2f} —Ä—É–±."
    ]
})

print("\n –°–≤–æ–¥–Ω—ã–π –æ—Ç—á–µ—Ç –ø–æ —Ç–µ—Å—Ç—É:")
print(summary_report.to_string(index=False))

# 2. –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –≤ CSV
summary_report.to_csv('ab_test_summary.csv', index=False, encoding='utf-8-sig')

# 3. –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –≥—Ä–∞—Ñ–∏–∫–æ–≤
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(summary_report))
width = 0.35

# –ì—Ä–∞—Ñ–∏–∫ –¥–ª—è –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏ –∫–ª—é—á–µ–≤—ã—Ö –º–µ—Ç—Ä–∏–∫
key_metrics = ['–ö–æ–Ω–≤–µ—Ä—Å–∏—è', '–î–æ—Ö–æ–¥ –Ω–∞ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è', '–°—Ä–µ–¥–Ω–∏–π —á–µ–∫']
key_data = summary_report[summary_report['–ú–µ—Ç—Ä–∏–∫–∞'].isin(key_metrics)]

for i, metric in enumerate(key_metrics):
    metric_data = key_data[key_data['–ú–µ—Ç—Ä–∏–∫–∞'] == metric]
    if not metric_data.empty:
        # –ò–∑–≤–ª–µ–∫–∞–µ–º —á–∏—Å–ª–æ–≤—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è (—É–±–∏—Ä–∞–µ–º —Ç–µ–∫—Å—Ç)
        val_a = float(metric_data['–ì—Ä—É–ø–ø–∞ A'].iloc[0].replace(' —Ä—É–±.', '').replace('%', '').replace(' —Ä—É–±', ''))
        val_b = float(metric_data['–ì—Ä—É–ø–ø–∞ B'].iloc[0].replace(' —Ä—É–±.', '').replace('%', '').replace(' —Ä—É–±', ''))
        
        ax.bar(i - width/2, val_a, width, label='–ì—Ä—É–ø–ø–∞ A' if i == 0 else "", 
               color='#3498db', alpha=0.7)
        ax.bar(i + width/2, val_b, width, label='–ì—Ä—É–ø–ø–∞ B' if i == 0 else "", 
               color='#2ecc71', alpha=0.7)

ax.set_xlabel('–ú–µ—Ç—Ä–∏–∫–∏', fontsize=12)
ax.set_ylabel('–ó–Ω–∞—á–µ–Ω–∏–µ', fontsize=12)
ax.set_title('–°—Ä–∞–≤–Ω–µ–Ω–∏–µ –∫–ª—é—á–µ–≤—ã—Ö –º–µ—Ç—Ä–∏–∫ A/B —Ç–µ—Å—Ç–∞', fontsize=14, fontweight='bold')
ax.set_xticks(range(len(key_metrics)))
ax.set_xticklabels(key_metrics, rotation=45, ha='right')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('ab_test_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n –†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã:")
print("   - ab_test_summary.csv - —Å–≤–æ–¥–Ω—ã–π –æ—Ç—á–µ—Ç")
print("   - ab_test_comparison.png - –≥—Ä–∞—Ñ–∏–∫ —Å—Ä–∞–≤–Ω–µ–Ω–∏—è")
print("   - –í—Å–µ –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏ –¥–æ—Å—Ç—É–ø–Ω—ã –≤—ã—à–µ –≤ –Ω–æ—É—Ç–±—É–∫–µ")