pip install pandas numpy tensorflow keras timegan scikit-learn matplotlib


In [4]:
import pandas as pd

# Load the dataset
df = pd.read_csv('dataset.csv')

# Display the first few rows of the dataset
print(df.head())


  Patient ID         Day  Resting heart rate  Active heart rate   HRV  \
0       P001  01/01/2024                  78                 90  44.7   
1       P001  02/01/2024                  74                127  63.4   
2       P001  03/01/2024                  76                111  42.5   
3       P001  04/01/2024                  71                111  31.0   
4       P001  05/01/2024                  61                113  37.2   

   Sleep duration  Respiratory rate  Body temperature  \
0             6.6              12.4              37.2   
1             6.3              19.8              36.9   
2             7.0              19.1              36.5   
3             8.4              15.1              37.1   
4             6.3              18.1              37.3   

   Blood oxygen saturation  Activity levels  ...  Fetal movement (kick count)  \
0                       95             2084  ...                            0   
1                       97             7041  ...        

In [2]:
# Preprocess your dataset (Convert dates, handle missing values, etc.)
df['Day'] = pd.to_datetime(df['Day'], format='%d/%m/%Y')
df = df.sort_values(by=['Patient ID', 'Day'])
df['Resting heart rate'] = pd.to_numeric(df['Resting heart rate'], errors='coerce')

NameError: name 'pd' is not defined

Time Series Data Generation Using TimeGAN
----can use TimeGAN for generating synthetic time series data. Below is a Jupyter code that initializes TimeGAN, preprocesses data, and trains the model:

In [None]:
import numpy as np
from timegan import TimeGAN

# Preprocess data for TimeGAN (reshape to 3D array)
def create_time_series_windows(df, time_steps):
    data = []
    for patient_id in df['Patient ID'].unique():
        patient_data = df[df['Patient ID'] == patient_id][['Resting heart rate', 'Active heart rate']].values
        for i in range(len(patient_data) - time_steps):
            data.append(patient_data[i:i + time_steps])
    return np.array(data)

time_steps = 10
time_series_data = create_time_series_windows(df, time_steps)

# Initialize and train the TimeGAN model
num_features = time_series_data.shape[2]  # Number of features
model = TimeGAN(time_steps=time_steps, features=num_features)
model.fit(time_series_data)

# Generate synthetic data
synthetic_data = model.sample(1000)
print(synthetic_data.shape)


Using LSTM for Time Series Forecasting
---- can use LSTM (Long Short-Term Memory) networks to predict future values in a time series (e.g., predicting future heart rates).

In [1]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Prepare data for LSTM (reshape to 3D array)
X_train, y_train = prepare_data_for_lstm(df, time_steps)  # Define custom function

# Define LSTM model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(time_steps, num_features)))
model.add(Dense(num_features))
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32)

# Predict synthetic future data
synthetic_data = model.predict(X_train)


ModuleNotFoundError: No module named 'keras'

Variational Autoencoder (VAE) for Time Series Generation
-----can also use Variational Autoencoders (VAEs), which are deep learning models designed for unsupervised learning. They are great for generating synthetic data by learning a compressed representation of your data.



In [None]:
from keras.layers import Input, Dense, LSTM, RepeatVector
from keras.models import Model

# Define VAE architecture
input_layer = Input(shape=(time_steps, num_features))
encoded = LSTM(64, activation='relu')(input_layer)
decoded = RepeatVector(time_steps)(encoded)
decoded = LSTM(num_features, activation='sigmoid', return_sequences=True)(decoded)

vae = Model(input_layer, decoded)
vae.compile(optimizer='adam', loss='mse')

# Train the VAE model
vae.fit(X_train, y_train, epochs=50, batch_size=32)

# Generate synthetic data
synthetic_data = vae.predict(X_train)


REST/OLD CODE BELOW....

In [5]:
# Convert 'Day' column to datetime
df['Day'] = pd.to_datetime(df['Day'], format='%d/%m/%Y')

# Sort by Patient ID and Day to ensure chronological order
df = df.sort_values(by=['Patient ID', 'Day'])

# Convert continuous columns to numeric if necessary
continuous_columns = ['Resting heart rate', 'Active heart rate', 'HRV', 'Sleep duration', 
                      'Respiratory rate', 'Body temperature', 'Blood oxygen saturation', 
                      'Activity levels', 'Calories burnt', 'Weight gain (kg)']
df[continuous_columns] = df[continuous_columns].apply(pd.to_numeric, errors='coerce')


In [6]:
import numpy as np

# Function to create time series windows (e.g., 10 days of data per patient)
def create_time_series_windows(df, time_steps):
    data = []
    for patient_id in df['Patient ID'].unique():
        patient_data = df[df['Patient ID'] == patient_id][continuous_columns].values
        for i in range(len(patient_data) - time_steps):
            data.append(patient_data[i:i + time_steps])
    return np.array(data)

time_steps = 10  # Example: 10 days of data
time_series_data = create_time_series_windows(df, time_steps)
print(time_series_data.shape)  # Should print something like (num_samples, 10, num_features)


(270, 10, 10)


In [7]:
from timegan import TimeGAN

# Initialize the TimeGAN model with the appropriate time steps and feature count
num_features = len(continuous_columns)  # Number of features (e.g., 10 features)
model = TimeGAN(time_steps=time_steps, features=num_features)

# Train the model on the time series data
model.fit(time_series_data)

# Generate synthetic time series data (e.g., 1000 samples)
synthetic_data = model.sample(1000)
print(synthetic_data.shape)  # Should print (1000, time_steps, num_features)


ModuleNotFoundError: No module named 'timegan'

In [None]:
# Convert the synthetic data into a DataFrame for easy manipulation
synthetic_df = pd.DataFrame(synthetic_data.reshape(-1, num_features), columns=continuous_columns)

# Save the synthetic data to a new CSV file
synthetic_df.to_csv('synthetic_pregnancy_data.csv', index=False)
