In [None]:
import pandas as pd
import numpy as np
from ctgan import CTGAN

np.random.seed(42)
n_samples = 1000  # Number of visitors

#transaction amount is taken from this website https://www.tampabay.com/fun/how-much-are-people-willing-to-spend-at-theme-parks-more-every-year-20190530/
data = pd.DataFrame({
    'Visitor_ID': np.arange(1, n_samples + 1),
    'Step_Count': np.random.normal(loc=12000, scale=3000, size=n_samples).astype(int),
    'Transaction_Amount': np.random.normal(loc=125, scale=30, size=n_samples).round(2),
    'Check_In_Time': np.random.uniform(9, 13, n_samples).round(2),  # Check-in between 9 AM - 1 PM
    'Check_Out_Time': np.random.uniform(17, 22, n_samples).round(2),  # Check-out between 5 PM - 10 PM
    'Loyalty_Member': np.random.choice(['Yes', 'No'], size=n_samples, p=[0.3, 0.7]),  # 30% are members
    'Weather_Condition': np.random.choice(['Sunny', 'Cloudy', 'Rainy'], size=n_samples, p=[0.6, 0.3, 0.1]),
    'Age': np.random.normal(loc=35, scale=10, size=n_samples).astype(int),  # Avg visitor age ~35
    'Gender': np.random.choice(['Male', 'Female'], size=n_samples, p=[0.5, 0.5]),
    'Guest_Satisfaction_Score': np.random.uniform(1, 5, n_samples).round(1)  # 1-5 rating
})

# Define categorical (discrete) columns
discrete_columns = ['Loyalty_Member', 'Weather_Condition', 'Gender']

# Train an SDV model
model = CTGAN(epochs=500)  # Increase epochs for better learning
model.fit(data, discrete_columns=discrete_columns)

# Generate synthetic data
synthetic_data = model.sample(n_samples)

# Show sample synthetic data
print(synthetic_data.head())

# Save synthetic data to CSV
synthetic_data.to_csv("synthetic_theme_park_data.csv", index=False)


   Visitor_ID  Step_Count  Transaction_Amount  Check_In_Time  Check_Out_Time  \
0        1073       15674           -2.578657      11.247566       18.505297   
1         452       21978           -2.760604       8.417061       16.959721   
2         295       15541           34.221996      12.058301       16.639478   
3         589       16417           11.501552       9.947303       17.410129   
4         127        8249           16.153954      12.315556       19.222081   

  Loyalty_Member Weather_Condition  Age      Gender  Guest_Satisfaction_Score  
0             No             Sunny   16        Male                  3.621753  
1            Yes             Sunny   40  Non-Binary                  3.523501  
2             No             Sunny   11      Female                  4.481646  
3             No             Rainy   36        Male                  3.623221  
4            Yes             Sunny   40      Female                  4.125973  
