<a href="https://colab.research.google.com/github/ZulfiiaDitto/Mixed-effect-linear-models/blob/main/Generating_dataset_for_mixed_effect_linear_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Creating the fake dataset for mixed effect regressions.

In [None]:
import pandas as pd
import numpy as np

# Seed for reproducibility
np.random.seed(42)

# Constants
num_patients = 50
measure_types = ["Blood Pressure", "Pulse", "Respiratory Rate"]
place_types = ["Clinic", "Home"]
genders = ["Female", "Male"]
ages = np.random.randint(18, 68, size=num_patients)

# Value ranges for each measure type
clinic_ranges = {
    "Blood Pressure": (140, 180),
    "Pulse": (90, 110),
    "Respiratory Rate": (22, 30)
}
home_ranges = {
    "Blood Pressure": (120, 135),
    "Pulse": (70, 85),
    "Respiratory Rate": (15, 21)
}

# Gaussian noise parameters
noise_sd_clinic = 5
noise_sd_home = 3

# Data generation with increased variability
data = []
patient_ids = np.random.randint(1000, 1050, size=num_patients)

for patient_id, age, gender in zip(patient_ids, ages, np.random.choice(genders, size=num_patients)):
    # Select at least two different measures for every patient
    selected_measures = np.random.choice(measure_types, size=2, replace=False)

    # Random patient-specific baseline shift for variability
    patient_shift = np.random.uniform(-5, 5)

    for measure_type in selected_measures:
        for place in ["Clinic", "Home"]:
            # Add multiple repetitions for each measure
            repetitions = np.random.randint(2, 4)  # 2 to 3 repetitions per measure
            for _ in range(repetitions):
                value_range = clinic_ranges[measure_type] if place == "Clinic" else home_ranges[measure_type]
                base_value = np.random.randint(value_range[0], value_range[1] + 1)

                # Add variability through patient shift and Gaussian noise
                noise = np.random.normal(0, noise_sd_clinic if place == "Clinic" else noise_sd_home)
                value = int(base_value + patient_shift + noise)

                # Ensure realistic limits for values
                value = max(0, value)

                data.append([patient_id, age, gender, measure_type, place, value])

# Create DataFrame
columns = ["Patient ID", "Age", "Gender", "Measure Type", "Place", "Value"]
df = pd.DataFrame(data, columns=columns)

In [None]:
# if you use Colab, before saving - mouth your drive and define the directory where to save
df.to_csv("patients_measurments.csv", index=False)
