In [4]:
import pandas as pd
import numpy as np
import datetime
import os

# Set the random seed for reproducibility
np.random.seed(42)

# Generate a date range
date_range = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')

# Initial max values for each exercise at the beginning of the year
initial_max_values = {
    'squat': 150,
    'deadlift': 200,
    'bench press': 135
}

# Define the percentage increase over the year
annual_increase_percentage = 0.10

# Generate periodic increases over the year
def get_periodic_increase(day_of_year):
    periods = 12  # monthly increases
    increase_step = annual_increase_percentage / periods
    period_length = 365 / periods
    current_period = int(day_of_year / period_length)
    return 1 + (current_period * increase_step)

# Function to generate weight based on exercise, reps, and day of year
def generate_weight(exercise, reps, day_of_year):
    initial_max = initial_max_values[exercise]
    max_weight = initial_max * get_periodic_increase(day_of_year)
    # Example weight percentages based on reps
    if reps <= 3:
        weight = max_weight * 0.95
    elif reps <= 6:
        weight = max_weight * 0.85
    elif reps <= 9:
        weight = max_weight * 0.75
    else:
        weight = max_weight * 0.65
    return round(weight * 2) / 2  # Round to the nearest 0.5 kg

# Generate the training plan
training_days_per_week = np.random.choice([3, 4], size=52, replace=True)
training_days = np.concatenate([np.random.choice(np.arange(7), size=days, replace=False)
                + i*7 for i, days in enumerate(training_days_per_week)])
training_days = sorted(training_days[training_days < len(date_range)])

# Introduce 3 phases of 2-3 weeks break
break_periods = np.random.choice(np.arange(365), size=3, replace=False)
for break_start in break_periods:
    break_length = np.random.randint(14, 21)
    break_days = np.arange(break_start, min(break_start + break_length, 365))
    training_days = [day for day in training_days if day not in break_days]

# Generate dummy data
data = {
    'date': np.repeat(date_range[training_days], 3),
    'exercise': ['squat', 'bench press', 'deadlift'] * len(training_days),
    'reps': np.random.randint(1, 15, size=len(training_days) * 3)
}

# Create a DataFrame
dummy_data = pd.DataFrame(data)

# Calculate weights based on exercise, reps, and day of year
dummy_data['weight'] = dummy_data.apply(lambda row: generate_weight(row['exercise'], 
                        row['reps'], row['date'].timetuple().tm_yday), axis=1)

# Calculate total weight lifted
dummy_data['total_weight'] = dummy_data['reps'] * dummy_data['weight']

# Display the first few rows of the dummy data
print(dummy_data.head())

# Ensure the directory exists
output_dir = 'data'
os.makedirs(output_dir, exist_ok=True)

# Save the dummy data to a CSV file for later use
dummy_data.to_csv(os.path.join(output_dir, 'dummy_training_data.csv'), index=False)


        date     exercise  reps  weight  total_weight
0 2023-01-01        squat     5   127.5         637.5
1 2023-01-01  bench press     8   101.0         808.0
2 2023-01-01     deadlift    10   130.0        1300.0
3 2023-01-02        squat    12    97.5        1170.0
4 2023-01-02  bench press     4   115.0         460.0
