In [2]:
import pandas as pd
import numpy as np
import random

# ----------------------------
# Decay Functions (Factor Form)
# ----------------------------

def exponential_decay_factor(t, decay_rate):
    """
    Exponential decay factor that starts at 1 (no decay) and decreases over time.
    
    Args:
        t (int): Time index (e.g., day count).
        decay_rate (float): Decay rate.
    
    Returns:
        float: Decay factor.
    """
    return np.exp(-decay_rate * t)

def sigmoid_decay_factor(t, midpoint, steepness):
    """
    Sigmoid decay factor that transitions from near 1 to lower values over time.
    
    Args:
        t (int): Time index (e.g., day count).
        midpoint (float): The time at which the decay is halfway.
        steepness (float): Controls how steep the decay is.
    
    Returns:
        float: Decay factor.
    """
    # Shifted and scaled sigmoid so that at t=0 the factor is ~1.
    # Adjust the formula if necessary.
    return 1 / (1 + np.exp(steepness * (t - midpoint)))

# ----------------------------
# User Class Definition
# ----------------------------

class User:
    def __init__(self, user_id, group, base_params):
        """
        Initializes a user with a unique ID, group, and baseline parameters.
        
        Args:
            user_id (str): Unique identifier for the user.
            group (str): User group (e.g., 'Baseline', 'Drifters', 'Power Users').
            base_params (dict): Baseline parameters for each metric.
                For example:
                {
                  "engagement": {"mode": 50, "std": 7},
                  "ctr": {"low": 0.01, "high": 0.05}
                }
        """
        self.user_id = user_id
        self.group = group
        self.base_params = base_params  # These remain unchanged (Time A)
    
    def generate_data(self, start_date, end_date):
        """
        Generate daily data using the base (Time A) parameters.
        
        Args:
            start_date (str): Start date in 'YYYY-MM-DD'.
            end_date (str): End date in 'YYYY-MM-DD'.
        
        Returns:
            DataFrame: Simulated data for the user.
        """
        dates = pd.date_range(start=start_date, end=end_date)
        data = []
        for d in dates:
            # Simulate Engagement using a log-normal distribution
            engagement = np.random.lognormal(
                mean=np.log(self.base_params["engagement"]["mode"]),
                sigma=self.base_params["engagement"]["std"] / 100
            )
            # Simulate CTR using a uniform distribution
            ctr = np.random.uniform(
                self.base_params["ctr"]["low"],
                self.base_params["ctr"]["high"]
            )
            data.append({
                "Date": d,
                "UserID": self.user_id,
                "Group": self.group,
                "Engagement": engagement,
                "CTR": ctr
            })
        return pd.DataFrame(data)
    
    def generate_data_with_decay(self, start_date, end_date, decay_func, decay_params):
        """
        Generate daily data for Time B using a decay function that gradually changes the baseline.
        
        For each day, the engagement parameter is decayed according to its time index,
        so early days look like Time A and later days reflect the decay.
        
        Args:
            start_date (str): Start date in 'YYYY-MM-DD' for Time B.
            end_date (str): End date in 'YYYY-MM-DD' for Time B.
            decay_func (function): Decay factor function (e.g., exponential_decay_factor).
            decay_params (dict): Additional parameters for the decay function.
        
        Returns:
            DataFrame: Simulated Time B data.
        """
        dates = pd.date_range(start=start_date, end=end_date)
        data = []
        original_mode = self.base_params["engagement"]["mode"]
        
        for i, d in enumerate(dates):
            # Compute decay factor for this day; at t=0, factor should be close to 1.
            decay_factor = decay_func(t=i, **decay_params)
            # Decay the engagement mode over time
            decayed_mode = original_mode * decay_factor
            
            engagement = np.random.lognormal(
                mean=np.log(decayed_mode),
                sigma=self.base_params["engagement"]["std"] / 100
            )
            ctr = np.random.uniform(
                self.base_params["ctr"]["low"],
                self.base_params["ctr"]["high"]
            )
            data.append({
                "Date": d,
                "UserID": self.user_id,
                "Group": self.group,
                "Engagement": engagement,
                "CTR": ctr
            })
        return pd.DataFrame(data)

# ----------------------------
# Helper Functions for Data Generation
# ----------------------------

def generate_users(n_users):
    """
    Generates a list of users with randomized baseline parameters.
    
    Args:
        n_users (int): Number of users.
    
    Returns:
        list: List of User objects.
    """
    users = []
    for i in range(n_users):
        user_id = f"U{i:04d}"
        group = random.choice(["Baseline", "Drifters", "Power Users"])
        base_params = {
            "engagement": {"mode": random.uniform(30, 65), "std": random.uniform(5, 10)},
            "ctr": {"low": random.uniform(0.01, 0.03), "high": random.uniform(0.03, 0.05)}
        }
        users.append(User(user_id, group, base_params))
    return users

def generate_time_series(users, start_date, end_date, use_decay=False, decay_func=None, decay_params=None):
    """
    Combines individual user data into one DataFrame.
    
    Args:
        users (list): List of User objects.
        start_date (str): Start date for the data.
        end_date (str): End date for the data.
        use_decay (bool): If True, generate data with decay (Time B).
        decay_func (function): Decay factor function to use.
        decay_params (dict): Additional parameters for the decay function.
    
    Returns:
        DataFrame: Combined data.
    """
    df_list = []
    for user in users:
        if use_decay and decay_func is not None and decay_params is not None:
            user_df = user.generate_data_with_decay(start_date, end_date, decay_func, decay_params)
        else:
            user_df = user.generate_data(start_date, end_date)
        df_list.append(user_df)
    return pd.concat(df_list, ignore_index=True)

# ----------------------------
# Example Workflow: Generating Time A and Time B
# ----------------------------

# Generate Time A data (baseline)
start_date_a = "2024-01-01"
end_date_a = "2024-06-30"
users = generate_users(n_users=100)
time_a_df = generate_time_series(users, start_date_a, end_date_a)

# For Time B, we want to apply decay gradually.
# For example, using exponential decay with a decay_rate such that at t=0 the factor is 1.
decay_rate = 0.005  # Adjust this value to control the decay speed.
decay_params = {"decay_rate": decay_rate}

# Generate Time B data with decay.
start_date_b = "2024-07-01"
end_date_b = "2024-12-31"
time_b_df = generate_time_series(users, start_date_b, end_date_b, 
                                 use_decay=True, decay_func=exponential_decay_factor, decay_params=decay_params)




In [3]:
time_a_df

Unnamed: 0,Date,UserID,Group,Engagement,CTR
0,2024-01-01,U0000,Power Users,60.358948,0.027977
1,2024-01-02,U0000,Power Users,64.371170,0.030835
2,2024-01-03,U0000,Power Users,63.724530,0.028078
3,2024-01-04,U0000,Power Users,62.438689,0.024274
4,2024-01-05,U0000,Power Users,61.913182,0.029039
...,...,...,...,...,...
18195,2024-06-26,U0099,Power Users,62.956422,0.028276
18196,2024-06-27,U0099,Power Users,54.907619,0.035501
18197,2024-06-28,U0099,Power Users,55.461092,0.031733
18198,2024-06-29,U0099,Power Users,53.713808,0.032056


In [4]:
time_b_df

Unnamed: 0,Date,UserID,Group,Engagement,CTR
0,2024-07-01,U0000,Power Users,68.758394,0.030613
1,2024-07-02,U0000,Power Users,59.612667,0.023913
2,2024-07-03,U0000,Power Users,57.459183,0.027183
3,2024-07-04,U0000,Power Users,51.405583,0.030775
4,2024-07-05,U0000,Power Users,63.221820,0.024881
...,...,...,...,...,...
18395,2024-12-27,U0099,Power Users,22.816671,0.028978
18396,2024-12-28,U0099,Power Users,20.189788,0.035213
18397,2024-12-29,U0099,Power Users,22.097571,0.024170
18398,2024-12-30,U0099,Power Users,19.059666,0.028747


In [12]:
time_a_df.to_csv(r'c:\Users\JPMorgan\Desktop\Data For Drift\time_a_df_test.csv')

In [13]:
time_b_df.to_csv(r'c:\Users\JPMorgan\Desktop\Data For Drift\time_b_df_test.csv')