In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)


dates = pd.date_range(start="2019-01-01", end="2023-12-31", freq='M')

seasonal_effect = np.cos(2 * np.pi * dates.month / 12) * 0.15 + 1
marketing_expenditures = np.random.normal(loc=25000, scale=5000, size=len(dates)) * seasonal_effect
marketing_expenditures = np.convolve(marketing_expenditures, [0.2, 0.3, 0.5], mode='same')[:len(dates)]

data = {
    "Date": dates,
    "Marketing_Expenditure": marketing_expenditures.astype(int)
}
df = pd.DataFrame(data)

df['Web_Traffic'] = (df['Marketing_Expenditure'].shift(-1) * np.random.normal(loc=1, scale=0.4, size=len(dates))).fillna(0).astype(int).clip(lower=10000)

conversion_rate = np.random.normal(loc=0.002, scale=0.001, size=len(dates))
df['New_Customers'] = (df['Web_Traffic'].shift(-1) * conversion_rate).fillna(0).clip(lower=10).astype(int)

churn_rate = np.random.uniform(0.05, 0.12, size=len(dates))
df['Beginning_Customers'] = (df['New_Customers'].cumsum() - (df['New_Customers'].cumsum().shift(1) * churn_rate).fillna(0)).fillna(0).astype(int)

df['Monthly_Revenue'] = ((df['New_Customers'] + df['Beginning_Customers']) * np.random.normal(loc=400, scale=80, size=len(dates)) * seasonal_effect).astype(int)

df['Employee_Training_Hours'] = np.random.normal(loc=120, scale=30, size=len(dates)).astype(int)

immediate_impact = np.interp(df['Employee_Training_Hours'], [80, 160], [0.05, 0.1])
future_impact = np.interp(df['Employee_Training_Hours'].shift(1), [80, 160], [0.1, 0.2])
future_impact = np.nan_to_num(future_impact)  # Replace NaN values with 0

impact_factor = 0.2 + immediate_impact + future_impact

df['Total_Customer_Respondents'] = np.random.randint(150, 300, size=len(dates))
df['High_Customer_Score_Respondents'] = (df['Total_Customer_Respondents'] * impact_factor).round().clip(0, df['Total_Customer_Respondents']).astype(int)

df['Total_Employee_Respondents'] = np.random.randint(50, 100, size=len(dates))
promotion_rate = 0.2 + future_impact
detractor_rate = 0.1 - immediate_impact / 2

df['Promoters_Employee_Score_Respondents'] = (df['Total_Employee_Respondents'] * promotion_rate).round().clip(0, df['Total_Employee_Respondents']).astype(int)
df['Detractors_Employee_Score_Respondents'] = (df['Total_Employee_Respondents'] * detractor_rate).round().clip(0, df['Total_Employee_Respondents']).astype(int)

df.to_csv("data/optitech_solutions_data.csv", index=False)