In [None]:
# !pip install git+https://github.com/gretelai/gretel-synthetics.git
!pip install tensorflow==2.11 # for LSTM
!pip install sdv<0.18 # for ACTGAN
!pip install torch==2.0 # for Timeseries DGAN

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.dates as md

import torch

!pip install gretel-synthetics
from gretel_synthetics.timeseries_dgan.dgan import DGAN
from gretel_synthetics.timeseries_dgan.config import DGANConfig, OutputType

In [None]:
# Define the path to the file
file_path = '...'

# Load the data into a Pandas dataframe
df = pd.read_csv(file_path)

# Print the first few rows of the dataframe to verify that the data has been loaded correctly
print(df.head())

In [None]:
# Define the path to the file
file_path2 = '...'
demo = pd.read_csv(file_path2)
print(demo.head())

In [None]:
# find unique value of SEQN
unique_seqn = df['SEQN'].unique()
print(unique_seqn)

In [None]:
counts = df['SEQN'].value_counts()
print(counts)

In [None]:
all_counts_equal = all(counts == 10080)
print(all_counts_equal)

In [None]:
random_num = 50
np.random.seed(123)
random_seqn = np.random.choice(unique_seqn, size=random_num, replace=False)
print(random_seqn)

In [None]:
ts = df[['SEQN','PAXMTSM']]

In [None]:
subset_df = ts[ts['SEQN'].isin(random_seqn)]
print(subset_df)

In [None]:
subset_demo = demo[demo['SEQN'].isin(random_seqn)]
print(subset_demo)

In [None]:
day_len = 1440
day_num = 7

In [None]:
num_patients = len(subset_df) // (day_len*day_num)  # calculate the number of patients based on the length of the DataFrame
print(num_patients)

In [None]:
# DGAN needs many example time series to train. Split into 1-day slices to
# create multiple examples.
features = subset_df[['PAXMTSM']].to_numpy()
# Obsevations every 1 minutes, so 1440 * 1 minutes = 1 day
n = features.shape[0] // day_len
features = features[:(n*day_len),:].reshape(-1, day_len, features.shape[1])
# Shape is now (# examples, # time points, # features)
#features[:, :, -1] = np.trunc(features[:, :, -1]).astype(np.int64)
print(features.shape)
print(features[0])

In [None]:
fig, ax = plt.subplots(50, 1, figsize=(10, 100))  # create a 50x1 grid of subplots

for j in range(50):
    for i in range(7):
        ax[j].plot(np.arange(0, 1440, 1), features[j*7+i,:,:])  # plot data on the j-th subplot

plt.show()  # display the figure

In [None]:
#attributes: gender, age, race
o_a = subset_demo[['RIAGENDR', 'RIDAGEYR', 'RIDRETH1']].to_numpy()

In [None]:
attributes = np.repeat(o_a, day_num, axis=0)

In [None]:
model_wa = DGAN(DGANConfig(
        max_sequence_len=day_len,
        sample_len=10,
        batch_size= 30,
        apply_feature_scaling=min(1000,features.shape[0]),
        apply_example_scaling=True,
        use_attribute_discriminator=True,
        generator_learning_rate=1e-4,
        discriminator_learning_rate=1e-4,
        epochs=1,
    ))
model_wa.train_numpy(
    attributes = attributes,
    features = features,
    attribute_types=[OutputType.DISCRETE, OutputType.DISCRETE, OutputType.DISCRETE],
    #feature_types=[OutputType.CONTINUOUS, OutputType.DISCRETE],
    feature_types=[OutputType.CONTINUOUS],
)

In [None]:
aaa,_= model_wa.generate_numpy(350)