In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Seed for reproducibility
np.random.seed(42)

# Observed probabilities from the given data
observed_data = {
    "Froid": 0.45,    # Proportion of 1s in Froid
    "Regle": 0.25,    # Proportion of 1s in Regle
    "Pole": 0.30,     # Proportion of 1s in Pole
    "Workout": 0.10,  # Proportion of 1s in Workout
    "Stress": 0.10,   # Proportion of 1s in Stress
    "Atele": 0.05,    # Proportion of 1s in Atele
    "Etirements": 0.15,  # Proportion of 1s in Etirements
}

# Observed distribution for Pain
pain_weights = [0.25, 0.35, 0.30, 0.10]  # Approx distribution for 1, 2, 3, 4

# Generate dates for the next 4 months (Dec 2024 to Mar 2025)
start_date = datetime(2024, 12, 3)  # Day after the last example
end_date = datetime(2025, 3, 31)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

# Number of days to generate data for
num_days = len(date_range)

# Generate data
data = {
    "Froid": np.random.choice([0, 1], size=num_days, p=[1-observed_data["Froid"], observed_data["Froid"]]),
    "Regle": np.random.choice([0, 1], size=num_days, p=[1-observed_data["Regle"], observed_data["Regle"]]),
    "Pole": np.random.choice([0, 1], size=num_days, p=[1-observed_data["Pole"], observed_data["Pole"]]),
    "Workout": np.random.choice([0, 1], size=num_days, p=[1-observed_data["Workout"], observed_data["Workout"]]),
    "Stress": np.random.choice([0, 1], size=num_days, p=[1-observed_data["Stress"], observed_data["Stress"]]),
    "Atele": np.random.choice([0, 1], size=num_days, p=[1-observed_data["Atele"], observed_data["Atele"]]),
    "Etirements": np.random.choice([0, 1], size=num_days, p=[1-observed_data["Etirements"], observed_data["Etirements"]]),
    "Pain": np.random.choice([1, 2, 3, 4], size=num_days, p=pain_weights),
    "day_of_year": date_range
}

# Create DataFrame
generated_data = pd.DataFrame(data)

# Preview the first few rows to verify
generated_data.head()


Unnamed: 0,Froid,Regle,Pole,Workout,Stress,Atele,Etirements,Pain,day_of_year
0,0,0,0,0,0,0,0,1,2024-12-03
1,1,1,0,1,0,0,1,2,2024-12-04
2,1,1,1,0,1,0,0,2,2024-12-05
3,1,0,1,0,0,0,1,4,2024-12-06
4,0,0,1,0,0,0,0,2,2024-12-07


In [4]:
generated_data.to_csv('generated_data.csv', index=False)