## Importing necessary libraries

In [7]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import sklearn.cluster as cluster
from numpy import array, random, sum, unique
from pandas import DataFrame, read_csv

from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
from ydata_synthetic.synthesizers.regular import WGAN_GP
from pandas_profiling import ProfileReport

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn import preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

## Loading the data

In [2]:
df = pd.read_csv('data/diabetes_binary_health_indicators_BRFSS2015.csv')

## Running pandas-profiling to explore the data

In [12]:
profile = ProfileReport(df, title="Pandas Profiling Report for Diabetes Dataset", explorative=True)

In [13]:
profile.to_widgets()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

  (2 * xtie * ytie) / m + x0 * y0 / (9 * m * (size - 2)))


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

### Saving the report as a HTML file

In [6]:
profile.to_file(r"pandas_profiling_reports/Diabetes_Pandas_Profiling_Report.html")

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

### Dropping duplicate rows

In [10]:
df.drop_duplicates(inplace = True)

## Creating train test splits

In [11]:
#Creating a test split from 20% of the data for testing the model
X_train_df, X_test_df, y_train_df, y_test_df = train_test_split(df.drop(['Diabetes_binary'], axis = 1), df['Diabetes_binary'], test_size = 0.2, stratify = df['Diabetes_binary'], random_state = 0)

In [14]:
#Adding the labels to the X_train_df
X_train_df['Diabetes_binary'] = y_train_df

## Data Synthesizing

### Creating synthetic data for minority class

In [15]:
#WGAN with Gradient Penalty is chosen as the GAN architecture
model = WGAN_GP

In [17]:
#Choosing the categorical and numerical columns
num_cols = ['BMI', 'MentHlth', 'PhysHlth']
cat_cols = list(X_train_df.drop(num_cols, axis = 1).columns)

In [18]:
#Selecting the rows of minority class from the data
train_data = X_train_df.loc[ X_train_df['Diabetes_binary']==1 ].copy()

In [23]:
print(f"Dataset info: Number of rows - {train_data.shape[0]}") 
print(f"Number of columns - {train_data.shape[1]}")

Dataset info: Number of rows - 28078
Number of columns - 22


In [24]:
#Setting the parameters of the GAN model
noise_dim = 32
dim = 128
batch_size = 128

log_step = 100
epochs = 200+1
learning_rate = 5e-4
beta_1 = 0.5
beta_2 = 0.9
models_dir = './cache'

In [25]:
#Setting the model parameters and the training step parameters of the GAN model
gan_args = ModelParameters(batch_size=batch_size, lr=learning_rate, betas=(beta_1, beta_2),
                           noise_dim=noise_dim,layers_dim=dim)

train_args = TrainParameters(epochs=epochs, sample_interval=log_step)

In [26]:
#Initializing the GAN model
synthesizer = model(gan_args, n_critic = 10)

2022-07-26 18:52:25.222149: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set


In [27]:
#Training the GAN model
synthesizer.train(data = train_data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)

2022-07-26 18:52:46.109420: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  0%|                                                   | 0/201 [00:00<?, ?it/s]2022-07-26 18:52:53.493765: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
  0%|▏                                          | 1/201 [00:17<59:09, 17.75s/it]

Epoch: 0 | disc_loss: -0.295232892036438 | gen_loss: 0.21595656871795654


  1%|▍                                          | 2/201 [00:27<42:27, 12.80s/it]

Epoch: 1 | disc_loss: -0.32885152101516724 | gen_loss: 0.24259492754936218


  1%|▋                                          | 3/201 [00:36<37:06, 11.25s/it]

Epoch: 2 | disc_loss: -0.33223965764045715 | gen_loss: 0.25200000405311584


  2%|▊                                          | 4/201 [00:45<34:13, 10.42s/it]

Epoch: 3 | disc_loss: -0.333035945892334 | gen_loss: 0.2423405945301056


  2%|█                                          | 5/201 [00:54<31:52,  9.76s/it]

Epoch: 4 | disc_loss: -0.3298850953578949 | gen_loss: 0.21635237336158752


  3%|█▎                                         | 6/201 [01:02<30:07,  9.27s/it]

Epoch: 5 | disc_loss: -0.31792211532592773 | gen_loss: 0.20940636098384857


  3%|█▍                                         | 7/201 [01:10<29:02,  8.98s/it]

Epoch: 6 | disc_loss: -0.3387649953365326 | gen_loss: 0.2159009873867035


  4%|█▋                                         | 8/201 [01:19<28:22,  8.82s/it]

Epoch: 7 | disc_loss: -0.3431745767593384 | gen_loss: 0.2081538885831833


  4%|█▉                                         | 9/201 [01:28<27:59,  8.75s/it]

Epoch: 8 | disc_loss: -0.34366604685783386 | gen_loss: 0.19913315773010254


  5%|██                                        | 10/201 [01:36<27:43,  8.71s/it]

Epoch: 9 | disc_loss: -0.3434409499168396 | gen_loss: 0.21571102738380432


  5%|██▎                                       | 11/201 [01:45<27:25,  8.66s/it]

Epoch: 10 | disc_loss: -0.350713312625885 | gen_loss: 0.20276305079460144


  6%|██▌                                       | 12/201 [01:53<27:01,  8.58s/it]

Epoch: 11 | disc_loss: -0.3274095952510834 | gen_loss: 0.19942526519298553


  6%|██▋                                       | 13/201 [02:01<26:40,  8.51s/it]

Epoch: 12 | disc_loss: -0.3398491144180298 | gen_loss: 0.2033594250679016


  7%|██▉                                       | 14/201 [02:10<26:24,  8.47s/it]

Epoch: 13 | disc_loss: -0.351159006357193 | gen_loss: 0.21410183608531952


  7%|███▏                                      | 15/201 [02:18<26:13,  8.46s/it]

Epoch: 14 | disc_loss: -0.3566835820674896 | gen_loss: 0.21302154660224915


  8%|███▎                                      | 16/201 [02:26<25:47,  8.36s/it]

Epoch: 15 | disc_loss: -0.3418971002101898 | gen_loss: 0.20737488567829132


  8%|███▌                                      | 17/201 [02:35<25:31,  8.32s/it]

Epoch: 16 | disc_loss: -0.3519056439399719 | gen_loss: 0.20530563592910767


  9%|███▊                                      | 18/201 [02:43<25:18,  8.30s/it]

Epoch: 17 | disc_loss: -0.3474893867969513 | gen_loss: 0.21061262488365173


  9%|███▉                                      | 19/201 [02:51<25:04,  8.27s/it]

Epoch: 18 | disc_loss: -0.35973647236824036 | gen_loss: 0.21899285912513733


 10%|████▏                                     | 20/201 [02:59<25:03,  8.31s/it]

Epoch: 19 | disc_loss: -0.3504369258880615 | gen_loss: 0.20980805158615112


 10%|████▍                                     | 21/201 [03:08<25:29,  8.50s/it]

Epoch: 20 | disc_loss: -0.33161139488220215 | gen_loss: 0.2545534074306488


 11%|████▌                                     | 22/201 [03:17<25:43,  8.63s/it]

Epoch: 21 | disc_loss: -0.3381226360797882 | gen_loss: 0.2504003345966339


 11%|████▊                                     | 23/201 [03:26<25:52,  8.72s/it]

Epoch: 22 | disc_loss: -0.35255759954452515 | gen_loss: 0.24277594685554504


 12%|█████                                     | 24/201 [03:35<26:05,  8.84s/it]

Epoch: 23 | disc_loss: -0.35659125447273254 | gen_loss: 0.25635772943496704


 12%|█████▏                                    | 25/201 [03:44<26:03,  8.89s/it]

Epoch: 24 | disc_loss: -0.3455243408679962 | gen_loss: 0.2528647184371948


 13%|█████▍                                    | 26/201 [03:53<25:53,  8.88s/it]

Epoch: 25 | disc_loss: -0.355316698551178 | gen_loss: 0.26135432720184326


 13%|█████▋                                    | 27/201 [04:02<25:48,  8.90s/it]

Epoch: 26 | disc_loss: -0.3425234258174896 | gen_loss: 0.2654622197151184


 14%|█████▊                                    | 28/201 [04:11<25:47,  8.95s/it]

Epoch: 27 | disc_loss: -0.3411030173301697 | gen_loss: 0.2548590898513794


 14%|██████                                    | 29/201 [04:20<25:44,  8.98s/it]

Epoch: 28 | disc_loss: -0.35386767983436584 | gen_loss: 0.2678129971027374


 15%|██████▎                                   | 30/201 [04:29<25:34,  8.98s/it]

Epoch: 29 | disc_loss: -0.34586432576179504 | gen_loss: 0.28252220153808594


 15%|██████▍                                   | 31/201 [04:39<25:47,  9.10s/it]

Epoch: 30 | disc_loss: -0.34749889373779297 | gen_loss: 0.2751329243183136


 16%|██████▋                                   | 32/201 [04:48<25:37,  9.10s/it]

Epoch: 31 | disc_loss: -0.3476807773113251 | gen_loss: 0.2811010479927063


 16%|██████▉                                   | 33/201 [04:57<25:29,  9.10s/it]

Epoch: 32 | disc_loss: -0.35830599069595337 | gen_loss: 0.27570807933807373


 17%|███████                                   | 34/201 [05:06<25:27,  9.15s/it]

Epoch: 33 | disc_loss: -0.36225995421409607 | gen_loss: 0.2840060591697693


 17%|███████▎                                  | 35/201 [05:15<25:28,  9.21s/it]

Epoch: 34 | disc_loss: -0.3558581471443176 | gen_loss: 0.2853092551231384


 18%|███████▌                                  | 36/201 [05:25<25:19,  9.21s/it]

Epoch: 35 | disc_loss: -0.3411277234554291 | gen_loss: 0.2795737385749817


 18%|███████▋                                  | 37/201 [05:34<25:06,  9.19s/it]

Epoch: 36 | disc_loss: -0.35326895117759705 | gen_loss: 0.27289700508117676


 19%|███████▉                                  | 38/201 [05:43<24:53,  9.16s/it]

Epoch: 37 | disc_loss: -0.3328034281730652 | gen_loss: 0.2768014669418335


 19%|████████▏                                 | 39/201 [05:52<24:49,  9.19s/it]

Epoch: 38 | disc_loss: -0.35061031579971313 | gen_loss: 0.2830958962440491


 20%|████████▎                                 | 40/201 [06:01<24:44,  9.22s/it]

Epoch: 39 | disc_loss: -0.35457202792167664 | gen_loss: 0.27359622716903687


 20%|████████▌                                 | 41/201 [06:11<24:39,  9.25s/it]

Epoch: 40 | disc_loss: -0.3652198910713196 | gen_loss: 0.2803507149219513


 21%|████████▊                                 | 42/201 [06:20<24:29,  9.24s/it]

Epoch: 41 | disc_loss: -0.34784039855003357 | gen_loss: 0.2838902175426483


 21%|████████▉                                 | 43/201 [06:29<24:22,  9.25s/it]

Epoch: 42 | disc_loss: -0.35505813360214233 | gen_loss: 0.27838045358657837


 22%|█████████▏                                | 44/201 [06:39<24:12,  9.25s/it]

Epoch: 43 | disc_loss: -0.34149011969566345 | gen_loss: 0.2913830876350403


 22%|█████████▍                                | 45/201 [06:48<23:58,  9.22s/it]

Epoch: 44 | disc_loss: -0.3475496470928192 | gen_loss: 0.2839084267616272


 23%|█████████▌                                | 46/201 [06:57<23:46,  9.20s/it]

Epoch: 45 | disc_loss: -0.3484121263027191 | gen_loss: 0.2872214615345001


 23%|█████████▊                                | 47/201 [07:06<23:39,  9.22s/it]

Epoch: 46 | disc_loss: -0.3577401638031006 | gen_loss: 0.27618712186813354


 24%|██████████                                | 48/201 [07:16<23:48,  9.33s/it]

Epoch: 47 | disc_loss: -0.35575759410858154 | gen_loss: 0.2724439203739166


 24%|██████████▏                               | 49/201 [07:25<23:56,  9.45s/it]

Epoch: 48 | disc_loss: -0.3565860986709595 | gen_loss: 0.28235575556755066


 25%|██████████▍                               | 50/201 [07:35<23:48,  9.46s/it]

Epoch: 49 | disc_loss: -0.3462488651275635 | gen_loss: 0.2732750177383423


 25%|██████████▋                               | 51/201 [07:44<23:41,  9.48s/it]

Epoch: 50 | disc_loss: -0.34295809268951416 | gen_loss: 0.2852797508239746


 26%|██████████▊                               | 52/201 [07:54<23:29,  9.46s/it]

Epoch: 51 | disc_loss: -0.35204780101776123 | gen_loss: 0.273317813873291


 26%|███████████                               | 53/201 [08:03<23:17,  9.44s/it]

Epoch: 52 | disc_loss: -0.3546845614910126 | gen_loss: 0.27816981077194214


 27%|███████████▎                              | 54/201 [08:13<23:21,  9.53s/it]

Epoch: 53 | disc_loss: -0.3566223978996277 | gen_loss: 0.2838992476463318


 27%|███████████▍                              | 55/201 [08:23<23:15,  9.56s/it]

Epoch: 54 | disc_loss: -0.3576142489910126 | gen_loss: 0.2848673462867737


 28%|███████████▋                              | 56/201 [08:32<23:08,  9.58s/it]

Epoch: 55 | disc_loss: -0.3583110272884369 | gen_loss: 0.2943283021450043


 28%|███████████▉                              | 57/201 [08:42<22:54,  9.55s/it]

Epoch: 56 | disc_loss: -0.3363285958766937 | gen_loss: 0.2849716544151306


 29%|████████████                              | 58/201 [08:51<22:36,  9.49s/it]

Epoch: 57 | disc_loss: -0.3519401252269745 | gen_loss: 0.2924785017967224


 29%|████████████▎                             | 59/201 [09:00<22:24,  9.47s/it]

Epoch: 58 | disc_loss: -0.3577394187450409 | gen_loss: 0.2878255248069763


 30%|████████████▌                             | 60/201 [09:10<22:33,  9.60s/it]

Epoch: 59 | disc_loss: -0.321790874004364 | gen_loss: 0.27799174189567566


 30%|████████████▋                             | 61/201 [09:20<22:35,  9.68s/it]

Epoch: 60 | disc_loss: -0.35322701930999756 | gen_loss: 0.2936353385448456


 31%|████████████▉                             | 62/201 [09:30<22:33,  9.74s/it]

Epoch: 61 | disc_loss: -0.3599361777305603 | gen_loss: 0.2900901436805725


 31%|█████████████▏                            | 63/201 [09:40<22:30,  9.79s/it]

Epoch: 62 | disc_loss: -0.3511075973510742 | gen_loss: 0.30100005865097046


 32%|█████████████▎                            | 64/201 [09:51<23:00, 10.08s/it]

Epoch: 63 | disc_loss: -0.34705454111099243 | gen_loss: 0.2976318299770355


 32%|█████████████▌                            | 65/201 [10:01<23:12, 10.24s/it]

Epoch: 64 | disc_loss: -0.35181725025177 | gen_loss: 0.28886693716049194


 33%|█████████████▊                            | 66/201 [10:12<23:05, 10.26s/it]

Epoch: 65 | disc_loss: -0.35266003012657166 | gen_loss: 0.2909778952598572


 33%|██████████████                            | 67/201 [10:22<22:49, 10.22s/it]

Epoch: 66 | disc_loss: -0.343451589345932 | gen_loss: 0.2956501245498657


 34%|██████████████▏                           | 68/201 [10:32<22:25, 10.12s/it]

Epoch: 67 | disc_loss: -0.3522208034992218 | gen_loss: 0.29690873622894287


 34%|██████████████▍                           | 69/201 [10:42<22:09, 10.07s/it]

Epoch: 68 | disc_loss: -0.3549683690071106 | gen_loss: 0.28292006254196167


 35%|██████████████▋                           | 70/201 [10:52<21:53, 10.02s/it]

Epoch: 69 | disc_loss: -0.35910576581954956 | gen_loss: 0.2973777949810028


 35%|██████████████▊                           | 71/201 [11:01<21:38,  9.98s/it]

Epoch: 70 | disc_loss: -0.35342612862586975 | gen_loss: 0.2982792556285858


 36%|███████████████                           | 72/201 [11:11<21:23,  9.95s/it]

Epoch: 71 | disc_loss: -0.34807145595550537 | gen_loss: 0.29764771461486816


 36%|███████████████▎                          | 73/201 [11:21<21:12,  9.94s/it]

Epoch: 72 | disc_loss: -0.34547823667526245 | gen_loss: 0.293947696685791


 37%|███████████████▍                          | 74/201 [11:31<21:09, 10.00s/it]

Epoch: 73 | disc_loss: -0.35779640078544617 | gen_loss: 0.29390212893486023


 37%|███████████████▋                          | 75/201 [11:42<21:16, 10.13s/it]

Epoch: 74 | disc_loss: -0.3636573553085327 | gen_loss: 0.3085006773471832


 38%|███████████████▉                          | 76/201 [11:52<21:25, 10.28s/it]

Epoch: 75 | disc_loss: -0.3567398488521576 | gen_loss: 0.2970808148384094


 38%|████████████████                          | 77/201 [12:02<20:55, 10.12s/it]

Epoch: 76 | disc_loss: -0.35972586274147034 | gen_loss: 0.3055759072303772


 39%|████████████████▎                         | 78/201 [12:12<20:30, 10.01s/it]

Epoch: 77 | disc_loss: -0.3583064079284668 | gen_loss: 0.3097922205924988


 39%|████████████████▌                         | 79/201 [12:22<20:10,  9.92s/it]

Epoch: 78 | disc_loss: -0.34959861636161804 | gen_loss: 0.3103775978088379


 40%|████████████████▋                         | 80/201 [12:32<20:01,  9.93s/it]

Epoch: 79 | disc_loss: -0.35849374532699585 | gen_loss: 0.29933223128318787


 40%|████████████████▉                         | 81/201 [12:42<20:06, 10.06s/it]

Epoch: 80 | disc_loss: -0.3573637306690216 | gen_loss: 0.30815964937210083


 41%|█████████████████▏                        | 82/201 [12:52<20:04, 10.13s/it]

Epoch: 81 | disc_loss: -0.3570522367954254 | gen_loss: 0.3086245656013489


 41%|█████████████████▎                        | 83/201 [13:02<19:51, 10.10s/it]

Epoch: 82 | disc_loss: -0.3663170039653778 | gen_loss: 0.3088674545288086


 42%|█████████████████▌                        | 84/201 [13:12<19:39, 10.08s/it]

Epoch: 83 | disc_loss: -0.35460007190704346 | gen_loss: 0.30500227212905884


 42%|█████████████████▊                        | 85/201 [13:22<19:26, 10.06s/it]

Epoch: 84 | disc_loss: -0.35839933156967163 | gen_loss: 0.3132109045982361


 43%|█████████████████▉                        | 86/201 [13:33<19:33, 10.21s/it]

Epoch: 85 | disc_loss: -0.35775092244148254 | gen_loss: 0.3118685185909271


 43%|██████████████████▏                       | 87/201 [13:44<19:47, 10.41s/it]

Epoch: 86 | disc_loss: -0.35672956705093384 | gen_loss: 0.31234851479530334


 44%|██████████████████▍                       | 88/201 [13:54<19:39, 10.44s/it]

Epoch: 87 | disc_loss: -0.33847591280937195 | gen_loss: 0.30878013372421265


 44%|██████████████████▌                       | 89/201 [14:04<19:09, 10.26s/it]

Epoch: 88 | disc_loss: -0.3618304431438446 | gen_loss: 0.33267727494239807


 45%|██████████████████▊                       | 90/201 [14:14<18:47, 10.15s/it]

Epoch: 89 | disc_loss: -0.3531113862991333 | gen_loss: 0.30793455243110657


 45%|███████████████████                       | 91/201 [14:24<18:15,  9.96s/it]

Epoch: 90 | disc_loss: -0.3527563512325287 | gen_loss: 0.3155673146247864


 46%|███████████████████▏                      | 92/201 [14:33<18:04,  9.95s/it]

Epoch: 91 | disc_loss: -0.3553127944469452 | gen_loss: 0.30932360887527466


 46%|███████████████████▍                      | 93/201 [14:44<18:01, 10.02s/it]

Epoch: 92 | disc_loss: -0.3620396852493286 | gen_loss: 0.3180046081542969


 47%|███████████████████▋                      | 94/201 [14:54<17:59, 10.09s/it]

Epoch: 93 | disc_loss: -0.35701465606689453 | gen_loss: 0.31489115953445435


 47%|███████████████████▊                      | 95/201 [15:04<17:56, 10.16s/it]

Epoch: 94 | disc_loss: -0.3544953167438507 | gen_loss: 0.3201444447040558


 48%|████████████████████                      | 96/201 [15:15<17:55, 10.25s/it]

Epoch: 95 | disc_loss: -0.34970349073410034 | gen_loss: 0.32504743337631226


 48%|████████████████████▎                     | 97/201 [15:25<17:50, 10.29s/it]

Epoch: 96 | disc_loss: -0.3579467236995697 | gen_loss: 0.32809749245643616


 49%|████████████████████▍                     | 98/201 [15:35<17:34, 10.24s/it]

Epoch: 97 | disc_loss: -0.3584633469581604 | gen_loss: 0.3287595510482788


 49%|████████████████████▋                     | 99/201 [15:45<17:22, 10.22s/it]

Epoch: 98 | disc_loss: -0.3559454679489136 | gen_loss: 0.321548193693161


 50%|████████████████████▍                    | 100/201 [15:56<17:15, 10.25s/it]

Epoch: 99 | disc_loss: -0.3538293242454529 | gen_loss: 0.31389451026916504


 50%|████████████████████▌                    | 101/201 [16:06<17:07, 10.27s/it]

Epoch: 100 | disc_loss: -0.3564746379852295 | gen_loss: 0.3197025954723358


 51%|████████████████████▊                    | 102/201 [16:16<16:49, 10.20s/it]

Epoch: 101 | disc_loss: -0.3468685746192932 | gen_loss: 0.31602320075035095


 51%|█████████████████████                    | 103/201 [16:26<16:35, 10.15s/it]

Epoch: 102 | disc_loss: -0.35787099599838257 | gen_loss: 0.3253038227558136


 52%|█████████████████████▏                   | 104/201 [16:37<16:34, 10.25s/it]

Epoch: 103 | disc_loss: -0.3251531720161438 | gen_loss: 0.3146566152572632


 52%|█████████████████████▍                   | 105/201 [16:47<16:26, 10.27s/it]

Epoch: 104 | disc_loss: -0.35468462109565735 | gen_loss: 0.3373345732688904


 53%|█████████████████████▌                   | 106/201 [16:57<16:07, 10.18s/it]

Epoch: 105 | disc_loss: -0.34534409642219543 | gen_loss: 0.33381345868110657


 53%|█████████████████████▊                   | 107/201 [17:07<15:54, 10.15s/it]

Epoch: 106 | disc_loss: -0.35219067335128784 | gen_loss: 0.32748639583587646


 54%|██████████████████████                   | 108/201 [17:17<15:51, 10.24s/it]

Epoch: 107 | disc_loss: -0.34160956740379333 | gen_loss: 0.33340656757354736


 54%|██████████████████████▏                  | 109/201 [17:29<16:17, 10.63s/it]

Epoch: 108 | disc_loss: -0.35922569036483765 | gen_loss: 0.3394031226634979


 55%|██████████████████████▍                  | 110/201 [17:41<16:34, 10.93s/it]

Epoch: 109 | disc_loss: -0.3574560284614563 | gen_loss: 0.33258870244026184


 55%|██████████████████████▋                  | 111/201 [17:52<16:30, 11.01s/it]

Epoch: 110 | disc_loss: -0.35417506098747253 | gen_loss: 0.32454463839530945


 56%|██████████████████████▊                  | 112/201 [18:03<16:15, 10.96s/it]

Epoch: 111 | disc_loss: -0.35984092950820923 | gen_loss: 0.3327657878398895


 56%|███████████████████████                  | 113/201 [18:14<16:06, 10.99s/it]

Epoch: 112 | disc_loss: -0.33570703864097595 | gen_loss: 0.3309744596481323


 57%|███████████████████████▎                 | 114/201 [18:25<15:57, 11.00s/it]

Epoch: 113 | disc_loss: -0.35629621148109436 | gen_loss: 0.342543363571167


 57%|███████████████████████▍                 | 115/201 [18:36<15:42, 10.96s/it]

Epoch: 114 | disc_loss: -0.35933029651641846 | gen_loss: 0.33548837900161743


 58%|███████████████████████▋                 | 116/201 [18:46<15:28, 10.92s/it]

Epoch: 115 | disc_loss: -0.36250290274620056 | gen_loss: 0.33616918325424194


 58%|███████████████████████▊                 | 117/201 [18:57<15:01, 10.74s/it]

Epoch: 116 | disc_loss: -0.3612898886203766 | gen_loss: 0.34030815958976746


 59%|████████████████████████                 | 118/201 [19:08<15:00, 10.85s/it]

Epoch: 117 | disc_loss: -0.3609199523925781 | gen_loss: 0.33107954263687134


 59%|████████████████████████▎                | 119/201 [19:18<14:35, 10.67s/it]

Epoch: 118 | disc_loss: -0.34469953179359436 | gen_loss: 0.3372840881347656


 60%|████████████████████████▍                | 120/201 [19:29<14:21, 10.63s/it]

Epoch: 119 | disc_loss: -0.34778252243995667 | gen_loss: 0.3433229327201843


 60%|████████████████████████▋                | 121/201 [19:40<14:19, 10.74s/it]

Epoch: 120 | disc_loss: -0.36675581336021423 | gen_loss: 0.34941503405570984


 61%|████████████████████████▉                | 122/201 [19:51<14:14, 10.82s/it]

Epoch: 121 | disc_loss: -0.3619782626628876 | gen_loss: 0.3433099389076233


 61%|█████████████████████████                | 123/201 [20:01<13:50, 10.65s/it]

Epoch: 122 | disc_loss: -0.33779558539390564 | gen_loss: 0.3334653973579407


 62%|█████████████████████████▎               | 124/201 [20:12<14:00, 10.92s/it]

Epoch: 123 | disc_loss: -0.35879218578338623 | gen_loss: 0.3592904806137085


 62%|█████████████████████████▍               | 125/201 [20:24<14:11, 11.20s/it]

Epoch: 124 | disc_loss: -0.35225793719291687 | gen_loss: 0.3410906493663788


 63%|█████████████████████████▋               | 126/201 [20:35<13:50, 11.07s/it]

Epoch: 125 | disc_loss: -0.359304279088974 | gen_loss: 0.3419215679168701


 63%|█████████████████████████▉               | 127/201 [20:46<13:43, 11.12s/it]

Epoch: 126 | disc_loss: -0.3456505537033081 | gen_loss: 0.34038209915161133


 64%|██████████████████████████               | 128/201 [20:57<13:27, 11.06s/it]

Epoch: 127 | disc_loss: -0.35228458046913147 | gen_loss: 0.33469635248184204


 64%|██████████████████████████▎              | 129/201 [21:08<13:17, 11.08s/it]

Epoch: 128 | disc_loss: -0.35605770349502563 | gen_loss: 0.3449271321296692


 65%|██████████████████████████▌              | 130/201 [21:20<13:25, 11.35s/it]

Epoch: 129 | disc_loss: -0.34891653060913086 | gen_loss: 0.3395532965660095


 65%|██████████████████████████▋              | 131/201 [21:35<14:32, 12.47s/it]

Epoch: 130 | disc_loss: -0.3498745262622833 | gen_loss: 0.34658750891685486


 66%|██████████████████████████▉              | 132/201 [21:59<18:09, 15.79s/it]

Epoch: 131 | disc_loss: -0.3613757789134979 | gen_loss: 0.3457269072532654


 66%|███████████████████████████▏             | 133/201 [22:15<18:07, 15.99s/it]

Epoch: 132 | disc_loss: -0.3629513680934906 | gen_loss: 0.35252243280410767


 67%|███████████████████████████▎             | 134/201 [22:31<17:43, 15.87s/it]

Epoch: 133 | disc_loss: -0.36189013719558716 | gen_loss: 0.3517598509788513


 67%|███████████████████████████▌             | 135/201 [22:45<16:47, 15.27s/it]

Epoch: 134 | disc_loss: -0.3517821431159973 | gen_loss: 0.34758031368255615


 68%|███████████████████████████▋             | 136/201 [22:56<15:22, 14.19s/it]

Epoch: 135 | disc_loss: -0.358174592256546 | gen_loss: 0.3370499014854431


 68%|███████████████████████████▉             | 137/201 [23:21<18:20, 17.20s/it]

Epoch: 136 | disc_loss: -0.3485240042209625 | gen_loss: 0.3511155843734741


 69%|████████████████████████████▏            | 138/201 [23:42<19:29, 18.56s/it]

Epoch: 137 | disc_loss: -0.36418068408966064 | gen_loss: 0.34670478105545044


 69%|████████████████████████████▎            | 139/201 [24:02<19:29, 18.87s/it]

Epoch: 138 | disc_loss: -0.3577798902988434 | gen_loss: 0.3576304614543915


 70%|████████████████████████████▌            | 140/201 [24:23<19:50, 19.52s/it]

Epoch: 139 | disc_loss: -0.34979447722435 | gen_loss: 0.339222252368927


 70%|████████████████████████████▊            | 141/201 [24:38<18:16, 18.28s/it]

Epoch: 140 | disc_loss: -0.367305189371109 | gen_loss: 0.34924620389938354


 71%|████████████████████████████▉            | 142/201 [24:53<16:45, 17.04s/it]

Epoch: 141 | disc_loss: -0.35789754986763 | gen_loss: 0.3520704209804535


 71%|█████████████████████████████▏           | 143/201 [25:15<17:57, 18.59s/it]

Epoch: 142 | disc_loss: -0.35523051023483276 | gen_loss: 0.3441638946533203


 72%|█████████████████████████████▎           | 144/201 [25:34<17:46, 18.71s/it]

Epoch: 143 | disc_loss: -0.36067262291908264 | gen_loss: 0.34661102294921875


 72%|█████████████████████████████▌           | 145/201 [25:59<19:21, 20.74s/it]

Epoch: 144 | disc_loss: -0.34500741958618164 | gen_loss: 0.33936768770217896


 73%|█████████████████████████████▊           | 146/201 [26:14<17:20, 18.91s/it]

Epoch: 145 | disc_loss: -0.3520908057689667 | gen_loss: 0.3403763175010681


 73%|█████████████████████████████▉           | 147/201 [26:26<15:06, 16.79s/it]

Epoch: 146 | disc_loss: -0.35906022787094116 | gen_loss: 0.3428090214729309


 74%|██████████████████████████████▏          | 148/201 [26:36<13:08, 14.88s/it]

Epoch: 147 | disc_loss: -0.3640799820423126 | gen_loss: 0.3480057716369629


 74%|██████████████████████████████▍          | 149/201 [26:47<11:47, 13.60s/it]

Epoch: 148 | disc_loss: -0.3581007122993469 | gen_loss: 0.353395938873291


 75%|██████████████████████████████▌          | 150/201 [27:14<14:58, 17.63s/it]

Epoch: 149 | disc_loss: -0.36421769857406616 | gen_loss: 0.34779664874076843


 75%|██████████████████████████████▊          | 151/201 [27:40<16:53, 20.28s/it]

Epoch: 150 | disc_loss: -0.352614164352417 | gen_loss: 0.34693074226379395


 76%|███████████████████████████████          | 152/201 [27:57<15:47, 19.33s/it]

Epoch: 151 | disc_loss: -0.3504592478275299 | gen_loss: 0.34972769021987915


 76%|███████████████████████████████▏         | 153/201 [28:09<13:35, 16.99s/it]

Epoch: 152 | disc_loss: -0.3542378544807434 | gen_loss: 0.3460187315940857


 77%|███████████████████████████████▍         | 154/201 [28:20<12:01, 15.36s/it]

Epoch: 153 | disc_loss: -0.35317832231521606 | gen_loss: 0.33168622851371765


 77%|███████████████████████████████▌         | 155/201 [28:39<12:28, 16.27s/it]

Epoch: 154 | disc_loss: -0.3485085070133209 | gen_loss: 0.3447679877281189


 78%|███████████████████████████████▊         | 156/201 [29:01<13:25, 17.91s/it]

Epoch: 155 | disc_loss: -0.3557274043560028 | gen_loss: 0.3516814410686493


 78%|████████████████████████████████         | 157/201 [29:20<13:28, 18.37s/it]

Epoch: 156 | disc_loss: -0.35552090406417847 | gen_loss: 0.34738004207611084


 79%|████████████████████████████████▏        | 158/201 [29:46<14:42, 20.53s/it]

Epoch: 157 | disc_loss: -0.3599390983581543 | gen_loss: 0.34645068645477295


 79%|████████████████████████████████▍        | 159/201 [30:05<14:11, 20.28s/it]

Epoch: 158 | disc_loss: -0.35912975668907166 | gen_loss: 0.3465859889984131


 80%|████████████████████████████████▋        | 160/201 [30:19<12:24, 18.16s/it]

Epoch: 159 | disc_loss: -0.3676122725009918 | gen_loss: 0.3447017967700958


 80%|████████████████████████████████▊        | 161/201 [30:30<10:45, 16.14s/it]

Epoch: 160 | disc_loss: -0.3580520749092102 | gen_loss: 0.34496575593948364


 81%|█████████████████████████████████        | 162/201 [30:41<09:24, 14.48s/it]

Epoch: 161 | disc_loss: -0.35615625977516174 | gen_loss: 0.3516501784324646


 81%|█████████████████████████████████▏       | 163/201 [30:52<08:29, 13.42s/it]

Epoch: 162 | disc_loss: -0.3562530279159546 | gen_loss: 0.339011549949646


 82%|█████████████████████████████████▍       | 164/201 [31:02<07:42, 12.49s/it]

Epoch: 163 | disc_loss: -0.357049822807312 | gen_loss: 0.3629109263420105


 82%|█████████████████████████████████▋       | 165/201 [31:16<07:49, 13.05s/it]

Epoch: 164 | disc_loss: -0.3365883231163025 | gen_loss: 0.3348325192928314


 83%|█████████████████████████████████▊       | 166/201 [31:28<07:24, 12.69s/it]

Epoch: 165 | disc_loss: -0.3542306423187256 | gen_loss: 0.3469848036766052


 83%|██████████████████████████████████       | 167/201 [31:39<06:57, 12.27s/it]

Epoch: 166 | disc_loss: -0.35918331146240234 | gen_loss: 0.34251195192337036


 84%|██████████████████████████████████▎      | 168/201 [31:50<06:28, 11.78s/it]

Epoch: 167 | disc_loss: -0.35202139616012573 | gen_loss: 0.3512115180492401


 84%|██████████████████████████████████▍      | 169/201 [32:01<06:06, 11.45s/it]

Epoch: 168 | disc_loss: -0.35022810101509094 | gen_loss: 0.3443182706832886


 85%|██████████████████████████████████▋      | 170/201 [32:11<05:44, 11.11s/it]

Epoch: 169 | disc_loss: -0.35275083780288696 | gen_loss: 0.34689414501190186


 85%|██████████████████████████████████▉      | 171/201 [32:22<05:35, 11.17s/it]

Epoch: 170 | disc_loss: -0.34862589836120605 | gen_loss: 0.3378028869628906


 86%|███████████████████████████████████      | 172/201 [32:34<05:33, 11.49s/it]

Epoch: 171 | disc_loss: -0.35999956727027893 | gen_loss: 0.34960824251174927


 86%|███████████████████████████████████▎     | 173/201 [32:45<05:15, 11.28s/it]

Epoch: 172 | disc_loss: -0.36573663353919983 | gen_loss: 0.3530030846595764


 87%|███████████████████████████████████▍     | 174/201 [33:11<07:02, 15.66s/it]

Epoch: 173 | disc_loss: -0.337373286485672 | gen_loss: 0.34011319279670715


 87%|███████████████████████████████████▋     | 175/201 [33:37<08:04, 18.62s/it]

Epoch: 174 | disc_loss: -0.3575408160686493 | gen_loss: 0.34599530696868896


 88%|███████████████████████████████████▉     | 176/201 [33:56<07:52, 18.92s/it]

Epoch: 175 | disc_loss: -0.3524780571460724 | gen_loss: 0.35474520921707153


 88%|████████████████████████████████████     | 177/201 [34:19<08:04, 20.18s/it]

Epoch: 176 | disc_loss: -0.3610341250896454 | gen_loss: 0.3455384373664856


 89%|████████████████████████████████████▎    | 178/201 [34:45<08:19, 21.71s/it]

Epoch: 177 | disc_loss: -0.3516962230205536 | gen_loss: 0.34770798683166504


 89%|████████████████████████████████████▌    | 179/201 [35:04<07:42, 21.02s/it]

Epoch: 178 | disc_loss: -0.3550267219543457 | gen_loss: 0.35218483209609985


 90%|████████████████████████████████████▋    | 180/201 [35:17<06:28, 18.52s/it]

Epoch: 179 | disc_loss: -0.35997283458709717 | gen_loss: 0.34831294417381287


 90%|████████████████████████████████████▉    | 181/201 [35:27<05:22, 16.12s/it]

Epoch: 180 | disc_loss: -0.3591634929180145 | gen_loss: 0.34540843963623047


 91%|█████████████████████████████████████    | 182/201 [35:38<04:35, 14.49s/it]

Epoch: 181 | disc_loss: -0.3550379276275635 | gen_loss: 0.3489157557487488


 91%|█████████████████████████████████████▎   | 183/201 [35:57<04:43, 15.75s/it]

Epoch: 182 | disc_loss: -0.3631512522697449 | gen_loss: 0.3424958288669586


 92%|█████████████████████████████████████▌   | 184/201 [36:24<05:26, 19.18s/it]

Epoch: 183 | disc_loss: -0.3664719760417938 | gen_loss: 0.3447364568710327


 92%|█████████████████████████████████████▋   | 185/201 [36:53<05:55, 22.20s/it]

Epoch: 184 | disc_loss: -0.3612292408943176 | gen_loss: 0.33776235580444336


 93%|█████████████████████████████████████▉   | 186/201 [37:14<05:26, 21.74s/it]

Epoch: 185 | disc_loss: -0.360906183719635 | gen_loss: 0.3374061584472656


 93%|██████████████████████████████████████▏  | 187/201 [37:27<04:28, 19.19s/it]

Epoch: 186 | disc_loss: -0.36039015650749207 | gen_loss: 0.34833234548568726


 94%|██████████████████████████████████████▎  | 188/201 [37:50<04:24, 20.35s/it]

Epoch: 187 | disc_loss: -0.3588378131389618 | gen_loss: 0.3465002775192261


 94%|██████████████████████████████████████▌  | 189/201 [38:09<03:59, 19.98s/it]

Epoch: 188 | disc_loss: -0.36216944456100464 | gen_loss: 0.3599752187728882


 95%|██████████████████████████████████████▊  | 190/201 [38:21<03:13, 17.61s/it]

Epoch: 189 | disc_loss: -0.3411768674850464 | gen_loss: 0.35308587551116943


 95%|██████████████████████████████████████▉  | 191/201 [38:40<03:00, 18.04s/it]

Epoch: 190 | disc_loss: -0.33947131037712097 | gen_loss: 0.342626690864563


 96%|███████████████████████████████████████▏ | 192/201 [38:59<02:44, 18.30s/it]

Epoch: 191 | disc_loss: -0.3604829013347626 | gen_loss: 0.34748440980911255


 96%|███████████████████████████████████████▎ | 193/201 [39:24<02:41, 20.18s/it]

Epoch: 192 | disc_loss: -0.3498295247554779 | gen_loss: 0.3427734375


 97%|███████████████████████████████████████▌ | 194/201 [39:37<02:06, 18.09s/it]

Epoch: 193 | disc_loss: -0.3472133278846741 | gen_loss: 0.3454011082649231


 97%|███████████████████████████████████████▊ | 195/201 [39:49<01:36, 16.11s/it]

Epoch: 194 | disc_loss: -0.35631173849105835 | gen_loss: 0.3494357466697693


 98%|███████████████████████████████████████▉ | 196/201 [40:00<01:13, 14.77s/it]

Epoch: 195 | disc_loss: -0.3579256236553192 | gen_loss: 0.34589409828186035


 98%|████████████████████████████████████████▏| 197/201 [40:11<00:54, 13.58s/it]

Epoch: 196 | disc_loss: -0.3555373251438141 | gen_loss: 0.3431354761123657


 99%|████████████████████████████████████████▍| 198/201 [40:21<00:37, 12.52s/it]

Epoch: 197 | disc_loss: -0.35800907015800476 | gen_loss: 0.34037160873413086


 99%|████████████████████████████████████████▌| 199/201 [40:31<00:23, 11.77s/it]

Epoch: 198 | disc_loss: -0.35798999667167664 | gen_loss: 0.3430030643939972


100%|████████████████████████████████████████▊| 200/201 [40:42<00:11, 11.65s/it]

Epoch: 199 | disc_loss: -0.3540396988391876 | gen_loss: 0.34568312764167786


100%|█████████████████████████████████████████| 201/201 [40:53<00:00, 12.21s/it]

Epoch: 200 | disc_loss: -0.359432190656662 | gen_loss: 0.34782177209854126





#### Generating 100k rows of minority data

In [30]:
#Generating synthetic data of 100k examples of diabetic patients
minority_synth_data = synthesizer.sample(100000)

Synthetic data generation: 100%|██████████████| 782/782 [00:09<00:00, 78.40it/s]


In [31]:
minority_synth_data.shape

(100096, 22)

### Generating 100k rows of majority data

In [None]:
#Selecting the rows of majority class from the data
train_data = X_train_df.loc[ X_train_df['Diabetes_binary']==0 ].copy()

#WGAN with Gradient Penalty is chosen as the GAN architecture
model = WGAN_GP

#Setting the parameters of the GAN model
noise_dim = 32
dim = 128
batch_size = 128

log_step = 100
epochs = 200+1
learning_rate = 5e-4
beta_1 = 0.5
beta_2 = 0.9
models_dir = './cache'

#Setting the model parameters and the training step parameters of the GAN model
gan_args = ModelParameters(batch_size=batch_size, lr=learning_rate, betas=(beta_1, beta_2),
                           noise_dim=noise_dim,layers_dim=dim)

train_args = TrainParameters(epochs=epochs, sample_interval=log_step)

#Initializing the GAN model
synthesizer = model(gan_args, n_critic = 10)

#Training the GAN model
synthesizer.train(data = train_data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)

#Generating synthetic data of 100k examples of non-diabetic patients
majority_synth_data = synthesizer.sample(100000)