In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# --- 1. 2025 DNA Profiles (with Realistic Counts) ---
# NOTE: The 'count' key defines the number of customers in this segment (Total = 4,000)
DNA_PROFILES = {
    'Whale_FlyIn':      {'mu': 5000.0, 'bankroll': 50000, 'gap': 120, 'hotel_aff': 1.8, 'fb_aff': 2.0, 'show_aff': 1.5, 'churn_prob': 0.02, 'count': 200},
    'Whale_Local':      {'mu': 1200.0, 'bankroll': 15000, 'gap': 7,   'hotel_aff': 0.1, 'fb_aff': 1.2, 'show_aff': 0.5, 'churn_prob': 0.01, 'count': 800},
    'Conference_VIP': {'mu': 100.0,  'bankroll': 1000,  'gap': 180, 'hotel_aff': 2.5, 'fb_aff': 2.2, 'show_aff': 1.0, 'churn_prob': 0.10, 'count': 1000},
    'Tourist_Social': {'mu': 150.0,  'bankroll': 1500,  'gap': 250, 'hotel_aff': 2.0, 'fb_aff': 2.5, 'show_aff': 3.0, 'churn_prob': 0.05, 'count': 2000}
}

all_transactions = [] # Reset transactions list

# --- 2. Simulation Loop (4,000 Total Customers with Realistic Ratios) ---
for dna_name, profile in DNA_PROFILES.items():
    
    # Iterate through the specific number of customers defined in the 'count' key
    for i in range(profile['count']): 
        customer_id = f"RESORT_{dna_name[:3]}_{i:04d}"
        
        # Set start date
        current_date = datetime(2025, 1, 1)
        
        # Loop through the year 2025
        while current_date < datetime(2026, 1, 1):
            
            # Churn check (customer stops visiting)
            if np.random.random() < profile['churn_prob']:
                break

            # Determine if the visit includes an overnight stay
            is_staying_overnight = False if profile['hotel_aff'] < 0.2 else True
            trip_duration = np.random.randint(2, 6) if is_staying_overnight else 1
            
            for d in range(trip_duration):
                visit_date = current_date + timedelta(days=d)
                
                # End condition for 2025
                if visit_date >= datetime(2026, 1, 1): break 
                
                # Seasonality & Weekend logic multiplier
                total_multiplier = (1.3 if visit_date.month in [6, 7, 12] else 1.0) * (1.2 if visit_date.weekday() >= 4 else 1.0)

                # --- Revenue Streams ---
                # Theoretical Win (Gaming Theo)
                theo_win = np.random.gamma(shape=5, scale=profile['mu']/5) * total_multiplier
                
                # Actual Win (Theo + Luck/Variance)
                actual_win = theo_win + np.random.normal(0, profile['bankroll'] * 0.15)
                
                # Non-Gaming Revenue
                hotel_rev = (280.0 * profile['hotel_aff'] * total_multiplier) if is_staying_overnight else 0 
                fb_rev = np.random.normal(175, 50) * profile['fb_aff'] * total_multiplier
                show_rev = 145.0 * profile['show_aff'] if np.random.random() > 0.4 else 0
                
                # --- Reinvestment Logic (Comp Cost) ---
                base_rate = 0.18 if "Whale" in dna_name else 0.12
                
                # Base Comp Cost (based on Theo Win and non-gaming revenue)
                base_comp_cost = (theo_win * base_rate) + (hotel_rev * 0.03) + (fb_rev * 0.05)
                
                # Recovery Comp (Comp given after a significant loss)
                loss_threshold = profile['mu'] * 2
                recovery_comp = abs(actual_win) * 0.03 if actual_win < -loss_threshold else 0
                
                # Final Calculations
                total_comp_cost = base_comp_cost + recovery_comp
                total_resort_rev = theo_win + hotel_rev + fb_rev + show_rev
                
                all_transactions.append({
                    'Customer_ID': customer_id,
                    'Date': visit_date,
                    'DNA': dna_name,
                    'Gaming_Theo': round(theo_win, 2),
                    'Gaming_Actual': round(actual_win, 2),
                    'Total_Resort_Rev': round(total_resort_rev, 2),
                    'Comp_Cost': round(total_comp_cost, 2),
                    'Net_Profit': round(total_resort_rev - total_comp_cost, 2)
                })
            
            # Calculate the next visit date
            current_date += timedelta(days=int(np.random.exponential(profile['gap'])) + trip_duration)

# Final DataFrame for 2025
df_2025 = pd.DataFrame(all_transactions)

# Print a confirmation and basic head of the DataFrame (Optional)
print(f"Simulation Complete. Total transactions: {len(df_2025):,} rows.")
print(df_2025.head())

Simulation Complete. Total transactions: 56,089 rows.
       Customer_ID       Date          DNA  Gaming_Theo  Gaming_Actual  \
0  RESORT_Wha_0000 2025-01-01  Whale_FlyIn      3059.04        3151.08   
1  RESORT_Wha_0000 2025-01-02  Whale_FlyIn      5714.90        5688.65   
2  RESORT_Wha_0000 2025-01-03  Whale_FlyIn      5642.41       -4042.26   
3  RESORT_Wha_0000 2025-01-04  Whale_FlyIn     13946.75       16300.10   
4  RESORT_Wha_0000 2025-02-25  Whale_FlyIn      1513.28       10168.03   

   Total_Resort_Rev  Comp_Cost  Net_Profit  
0           4000.92     587.64     3413.28  
1           6838.08    1063.89     5774.19  
2           6644.61    1053.65     5590.97  
3          15271.75    2553.69    12718.06  
4           2561.51     303.85     2257.67  
