In [None]:
!pip install ydata-synthetic
!pip install pandas-profiling

## Importing necessary libraries

In [1]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import sklearn.cluster as cluster
from numpy import array, random, sum, unique
from pandas import DataFrame, read_csv

from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
from ydata_synthetic.synthesizers.regular import WGAN_GP
from pandas_profiling import ProfileReport

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn import preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

## Loading the data

In [2]:
df = pd.read_csv(r'data/diabetes_binary_health_indicators_BRFSS2015.csv')

## Running pandas-profiling to explore the data

In [3]:
profile = ProfileReport(df, title="Pandas Profiling Report for Diabetes Dataset", explorative=True)

In [5]:
profile.to_widgets()

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

### Saving the report as a HTML file

In [6]:
profile.to_file(r"pandas_profiling_reports/Diabetes_Pandas_Profiling_Report.html")

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

### Dropping duplicate rows

In [7]:
df.drop_duplicates(inplace = True)

## Creating train test splits

In [8]:
#Creating a test split from 20% of the data for testing the model
X_train_df, X_test_df, y_train_df, y_test_df = train_test_split(df.drop(['Diabetes_binary'], axis = 1), df['Diabetes_binary'], test_size = 0.2, stratify = df['Diabetes_binary'], random_state = 0)

In [9]:
#Adding the labels to the X_train_df
X_train_df['Diabetes_binary'] = y_train_df

## Data Synthesizing

### Creating synthetic data for minority class

In [6]:
#WGAN with Gradient Penalty is chosen as the GAN architecture
model = WGAN_GP

In [7]:
#Choosing the categorical and numerical columns
num_cols = ['BMI', 'MentHlth', 'PhysHlth']
cat_cols = list(X_train_df.drop(num_cols, axis = 1).columns)

In [8]:
#Selecting the rows of minority class from the data
train_data = X_train_df.loc[ X_train_df['Diabetes_binary']==1 ].copy()

In [9]:
print(f"Dataset info: Number of rows - {train_data.shape[0]}") 
print(f"Number of columns - {train_data.shape[1]}")

Dataset info: Number of rows - 28078
Number of columns - 22


In [10]:
#Setting the parameters of the GAN model
noise_dim = 32
dim = 128
batch_size = 128

log_step = 100
epochs = 200+1
learning_rate = 5e-4
beta_1 = 0.5
beta_2 = 0.9
models_dir = './cache'

In [11]:
#Setting the model parameters and the training step parameters of the GAN model
gan_args = ModelParameters(batch_size=batch_size, lr=learning_rate, betas=(beta_1, beta_2),
                           noise_dim=noise_dim,layers_dim=dim)

train_args = TrainParameters(epochs=epochs, sample_interval=log_step)

In [12]:
#Initializing the GAN model
synthesizer = model(gan_args, n_critic = 10)

2022-07-28 08:33:11.501809: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set


In [13]:
#Training the GAN model
synthesizer.train(data = train_data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)

2022-07-28 08:33:13.257143: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  0%|                                                   | 0/201 [00:00<?, ?it/s]2022-07-28 08:33:20.267585: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
  0%|▏                                          | 1/201 [00:17<58:41, 17.61s/it]

Epoch: 0 | disc_loss: -0.30025714635849 | gen_loss: 0.15275044739246368


  1%|▍                                          | 2/201 [00:26<42:20, 12.77s/it]

Epoch: 1 | disc_loss: -0.3248370885848999 | gen_loss: 0.22272323071956635


  1%|▋                                          | 3/201 [00:36<37:06, 11.24s/it]

Epoch: 2 | disc_loss: -0.3290081024169922 | gen_loss: 0.2583654224872589


  2%|▊                                          | 4/201 [00:45<34:13, 10.42s/it]

Epoch: 3 | disc_loss: -0.3387337028980255 | gen_loss: 0.27260279655456543


  2%|█                                          | 5/201 [00:54<32:25,  9.92s/it]

Epoch: 4 | disc_loss: -0.326295405626297 | gen_loss: 0.28904977440834045


  3%|█▎                                         | 6/201 [01:03<31:05,  9.57s/it]

Epoch: 5 | disc_loss: -0.34140273928642273 | gen_loss: 0.30210259556770325


  3%|█▍                                         | 7/201 [01:11<29:43,  9.19s/it]

Epoch: 6 | disc_loss: -0.3439101278781891 | gen_loss: 0.30643463134765625


  4%|█▋                                         | 8/201 [01:20<29:02,  9.03s/it]

Epoch: 7 | disc_loss: -0.3436889350414276 | gen_loss: 0.29551535844802856


  4%|█▉                                         | 9/201 [01:29<28:34,  8.93s/it]

Epoch: 8 | disc_loss: -0.3441043198108673 | gen_loss: 0.2990148961544037


  5%|██                                        | 10/201 [01:38<28:40,  9.01s/it]

Epoch: 9 | disc_loss: -0.32533854246139526 | gen_loss: 0.28560176491737366


  5%|██▎                                       | 11/201 [01:47<28:43,  9.07s/it]

Epoch: 10 | disc_loss: -0.35206475853919983 | gen_loss: 0.28471845388412476


  6%|██▌                                       | 12/201 [01:56<28:30,  9.05s/it]

Epoch: 11 | disc_loss: -0.34427663683891296 | gen_loss: 0.2822386622428894


  6%|██▋                                       | 13/201 [02:05<28:24,  9.07s/it]

Epoch: 12 | disc_loss: -0.33343568444252014 | gen_loss: 0.2725589871406555


  7%|██▉                                       | 14/201 [02:14<28:13,  9.06s/it]

Epoch: 13 | disc_loss: -0.35371384024620056 | gen_loss: 0.2659175992012024


  7%|███▏                                      | 15/201 [02:23<27:50,  8.98s/it]

Epoch: 14 | disc_loss: -0.3498367369174957 | gen_loss: 0.26960042119026184


  8%|███▎                                      | 16/201 [02:32<27:18,  8.86s/it]

Epoch: 15 | disc_loss: -0.34282001852989197 | gen_loss: 0.2679457664489746


  8%|███▌                                      | 17/201 [02:41<27:16,  8.90s/it]

Epoch: 16 | disc_loss: -0.35613036155700684 | gen_loss: 0.26209521293640137


  9%|███▊                                      | 18/201 [02:50<27:12,  8.92s/it]

Epoch: 17 | disc_loss: -0.3467421531677246 | gen_loss: 0.2539403438568115


  9%|███▉                                      | 19/201 [02:58<26:53,  8.86s/it]

Epoch: 18 | disc_loss: -0.3368847668170929 | gen_loss: 0.2555229663848877


 10%|████▏                                     | 20/201 [03:07<26:42,  8.85s/it]

Epoch: 19 | disc_loss: -0.3554375469684601 | gen_loss: 0.2529699206352234


 10%|████▍                                     | 21/201 [03:16<26:36,  8.87s/it]

Epoch: 20 | disc_loss: -0.3513290584087372 | gen_loss: 0.2635110914707184


 11%|████▌                                     | 22/201 [03:25<26:45,  8.97s/it]

Epoch: 21 | disc_loss: -0.33645495772361755 | gen_loss: 0.2732663154602051


 11%|████▊                                     | 23/201 [03:35<27:17,  9.20s/it]

Epoch: 22 | disc_loss: -0.348248690366745 | gen_loss: 0.2680787146091461


 12%|█████                                     | 24/201 [03:45<27:29,  9.32s/it]

Epoch: 23 | disc_loss: -0.3302712142467499 | gen_loss: 0.27298399806022644


 12%|█████▏                                    | 25/201 [03:54<27:07,  9.25s/it]

Epoch: 24 | disc_loss: -0.3399849832057953 | gen_loss: 0.2612364888191223


 13%|█████▍                                    | 26/201 [04:03<26:42,  9.16s/it]

Epoch: 25 | disc_loss: -0.3614806532859802 | gen_loss: 0.2758869230747223


 13%|█████▋                                    | 27/201 [04:12<26:22,  9.10s/it]

Epoch: 26 | disc_loss: -0.3588751256465912 | gen_loss: 0.2713337540626526


 14%|█████▊                                    | 28/201 [04:21<26:03,  9.04s/it]

Epoch: 27 | disc_loss: -0.35066598653793335 | gen_loss: 0.2700253129005432


 14%|██████                                    | 29/201 [04:30<26:21,  9.20s/it]

Epoch: 28 | disc_loss: -0.35006487369537354 | gen_loss: 0.27022793889045715


 15%|██████▎                                   | 30/201 [04:40<26:45,  9.39s/it]

Epoch: 29 | disc_loss: -0.3661733865737915 | gen_loss: 0.2758219242095947


 15%|██████▍                                   | 31/201 [04:49<26:33,  9.37s/it]

Epoch: 30 | disc_loss: -0.33608749508857727 | gen_loss: 0.26999369263648987


 16%|██████▋                                   | 32/201 [04:59<26:23,  9.37s/it]

Epoch: 31 | disc_loss: -0.3605518639087677 | gen_loss: 0.27147597074508667


 16%|██████▉                                   | 33/201 [05:08<26:13,  9.37s/it]

Epoch: 32 | disc_loss: -0.34414929151535034 | gen_loss: 0.2653048634529114


 17%|███████                                   | 34/201 [05:17<25:42,  9.24s/it]

Epoch: 33 | disc_loss: -0.34851089119911194 | gen_loss: 0.26839399337768555


 17%|███████▎                                  | 35/201 [05:26<25:18,  9.15s/it]

Epoch: 34 | disc_loss: -0.34927791357040405 | gen_loss: 0.28234782814979553


 18%|███████▌                                  | 36/201 [05:35<25:13,  9.17s/it]

Epoch: 35 | disc_loss: -0.3581370711326599 | gen_loss: 0.267605185508728


 18%|███████▋                                  | 37/201 [05:44<25:09,  9.20s/it]

Epoch: 36 | disc_loss: -0.354092001914978 | gen_loss: 0.26802247762680054


 19%|███████▉                                  | 38/201 [05:54<25:01,  9.21s/it]

Epoch: 37 | disc_loss: -0.3557409346103668 | gen_loss: 0.2825958728790283


 19%|████████▏                                 | 39/201 [06:03<24:45,  9.17s/it]

Epoch: 38 | disc_loss: -0.3617915213108063 | gen_loss: 0.27050286531448364


 20%|████████▎                                 | 40/201 [06:12<24:21,  9.08s/it]

Epoch: 39 | disc_loss: -0.3491726219654083 | gen_loss: 0.26678985357284546


 20%|████████▌                                 | 41/201 [06:20<24:01,  9.01s/it]

Epoch: 40 | disc_loss: -0.35356011986732483 | gen_loss: 0.2734450697898865


 21%|████████▊                                 | 42/201 [06:30<23:59,  9.06s/it]

Epoch: 41 | disc_loss: -0.355394572019577 | gen_loss: 0.2783656120300293


 21%|████████▉                                 | 43/201 [06:39<23:59,  9.11s/it]

Epoch: 42 | disc_loss: -0.3574684262275696 | gen_loss: 0.2606889307498932


 22%|█████████▏                                | 44/201 [06:48<23:59,  9.17s/it]

Epoch: 43 | disc_loss: -0.35667482018470764 | gen_loss: 0.26455333828926086


 22%|█████████▍                                | 45/201 [06:57<23:51,  9.18s/it]

Epoch: 44 | disc_loss: -0.353400319814682 | gen_loss: 0.2813122868537903


 23%|█████████▌                                | 46/201 [07:07<23:45,  9.20s/it]

Epoch: 45 | disc_loss: -0.35758113861083984 | gen_loss: 0.2770354151725769


 23%|█████████▊                                | 47/201 [07:16<23:38,  9.21s/it]

Epoch: 46 | disc_loss: -0.34485650062561035 | gen_loss: 0.26886358857154846


 24%|██████████                                | 48/201 [07:25<23:30,  9.22s/it]

Epoch: 47 | disc_loss: -0.3567827641963959 | gen_loss: 0.2697070240974426


 24%|██████████▏                               | 49/201 [07:34<23:20,  9.21s/it]

Epoch: 48 | disc_loss: -0.34977683424949646 | gen_loss: 0.26638349890708923


 25%|██████████▍                               | 50/201 [07:44<23:12,  9.22s/it]

Epoch: 49 | disc_loss: -0.3528328239917755 | gen_loss: 0.27088451385498047


 25%|██████████▋                               | 51/201 [07:53<23:01,  9.21s/it]

Epoch: 50 | disc_loss: -0.36351174116134644 | gen_loss: 0.27608722448349


 26%|██████████▊                               | 52/201 [08:02<22:47,  9.18s/it]

Epoch: 51 | disc_loss: -0.35453492403030396 | gen_loss: 0.26092350482940674


 26%|███████████                               | 53/201 [08:11<22:52,  9.27s/it]

Epoch: 52 | disc_loss: -0.3619823157787323 | gen_loss: 0.28067564964294434


 27%|███████████▎                              | 54/201 [08:21<22:51,  9.33s/it]

Epoch: 53 | disc_loss: -0.35587090253829956 | gen_loss: 0.27245789766311646


 27%|███████████▍                              | 55/201 [08:30<22:37,  9.30s/it]

Epoch: 54 | disc_loss: -0.3600693643093109 | gen_loss: 0.2743615508079529


 28%|███████████▋                              | 56/201 [08:39<22:23,  9.27s/it]

Epoch: 55 | disc_loss: -0.362094521522522 | gen_loss: 0.27930209040641785


 28%|███████████▉                              | 57/201 [08:48<22:07,  9.22s/it]

Epoch: 56 | disc_loss: -0.34720274806022644 | gen_loss: 0.2739450931549072


 29%|████████████                              | 58/201 [08:58<22:00,  9.23s/it]

Epoch: 57 | disc_loss: -0.3558156192302704 | gen_loss: 0.2751762270927429


 29%|████████████▎                             | 59/201 [09:07<21:55,  9.27s/it]

Epoch: 58 | disc_loss: -0.35680249333381653 | gen_loss: 0.2806902527809143


 30%|████████████▌                             | 60/201 [09:16<21:54,  9.32s/it]

Epoch: 59 | disc_loss: -0.34928980469703674 | gen_loss: 0.2731374502182007


 30%|████████████▋                             | 61/201 [09:26<21:44,  9.32s/it]

Epoch: 60 | disc_loss: -0.3509976267814636 | gen_loss: 0.28747522830963135


 31%|████████████▉                             | 62/201 [09:35<21:27,  9.27s/it]

Epoch: 61 | disc_loss: -0.35932910442352295 | gen_loss: 0.2802126407623291


 31%|█████████████▏                            | 63/201 [09:44<21:19,  9.27s/it]

Epoch: 62 | disc_loss: -0.34792375564575195 | gen_loss: 0.283219575881958


 32%|█████████████▎                            | 64/201 [09:54<21:16,  9.32s/it]

Epoch: 63 | disc_loss: -0.3495067358016968 | gen_loss: 0.2806006968021393


 32%|█████████████▌                            | 65/201 [10:03<21:19,  9.41s/it]

Epoch: 64 | disc_loss: -0.34832262992858887 | gen_loss: 0.28550395369529724


 33%|█████████████▊                            | 66/201 [10:13<21:26,  9.53s/it]

Epoch: 65 | disc_loss: -0.35445502400398254 | gen_loss: 0.28396570682525635


 33%|██████████████                            | 67/201 [10:23<21:25,  9.59s/it]

Epoch: 66 | disc_loss: -0.36069342494010925 | gen_loss: 0.29371124505996704


 34%|██████████████▏                           | 68/201 [10:32<21:20,  9.62s/it]

Epoch: 67 | disc_loss: -0.35079580545425415 | gen_loss: 0.28423577547073364


 34%|██████████████▍                           | 69/201 [10:42<21:22,  9.72s/it]

Epoch: 68 | disc_loss: -0.34354740381240845 | gen_loss: 0.2914995551109314


 35%|██████████████▋                           | 70/201 [10:52<21:20,  9.77s/it]

Epoch: 69 | disc_loss: -0.35641348361968994 | gen_loss: 0.2953580319881439


 35%|██████████████▊                           | 71/201 [11:02<21:07,  9.75s/it]

Epoch: 70 | disc_loss: -0.35681843757629395 | gen_loss: 0.294653058052063


 36%|███████████████                           | 72/201 [11:11<20:51,  9.70s/it]

Epoch: 71 | disc_loss: -0.3442985713481903 | gen_loss: 0.29253071546554565


 36%|███████████████▎                          | 73/201 [11:21<20:23,  9.56s/it]

Epoch: 72 | disc_loss: -0.35710906982421875 | gen_loss: 0.29955023527145386


 37%|███████████████▍                          | 74/201 [11:30<20:12,  9.55s/it]

Epoch: 73 | disc_loss: -0.35295361280441284 | gen_loss: 0.2919563949108124


 37%|███████████████▋                          | 75/201 [11:40<20:07,  9.59s/it]

Epoch: 74 | disc_loss: -0.3270823061466217 | gen_loss: 0.29782193899154663


 38%|███████████████▉                          | 76/201 [11:50<20:02,  9.62s/it]

Epoch: 75 | disc_loss: -0.3474401831626892 | gen_loss: 0.2977334260940552


 38%|████████████████                          | 77/201 [12:00<20:04,  9.72s/it]

Epoch: 76 | disc_loss: -0.35760220885276794 | gen_loss: 0.30136680603027344


 39%|████████████████▎                         | 78/201 [12:09<20:02,  9.78s/it]

Epoch: 77 | disc_loss: -0.34503865242004395 | gen_loss: 0.30668771266937256


 39%|████████████████▌                         | 79/201 [12:19<19:47,  9.74s/it]

Epoch: 78 | disc_loss: -0.3504371643066406 | gen_loss: 0.2960353493690491


 40%|████████████████▋                         | 80/201 [12:29<19:43,  9.78s/it]

Epoch: 79 | disc_loss: -0.3372158408164978 | gen_loss: 0.3037274479866028


 40%|████████████████▉                         | 81/201 [12:39<19:28,  9.74s/it]

Epoch: 80 | disc_loss: -0.34804344177246094 | gen_loss: 0.31033799052238464


 41%|█████████████████▏                        | 82/201 [12:48<19:13,  9.69s/it]

Epoch: 81 | disc_loss: -0.35647282004356384 | gen_loss: 0.3164044916629791


 41%|█████████████████▎                        | 83/201 [12:58<19:01,  9.68s/it]

Epoch: 82 | disc_loss: -0.3586474359035492 | gen_loss: 0.3039528727531433


 42%|█████████████████▌                        | 84/201 [13:08<18:51,  9.67s/it]

Epoch: 83 | disc_loss: -0.33996087312698364 | gen_loss: 0.2856243848800659


 42%|█████████████████▊                        | 85/201 [13:17<18:41,  9.67s/it]

Epoch: 84 | disc_loss: -0.3556375205516815 | gen_loss: 0.31168192625045776


 43%|█████████████████▉                        | 86/201 [13:27<18:33,  9.69s/it]

Epoch: 85 | disc_loss: -0.35405343770980835 | gen_loss: 0.3019851744174957


 43%|██████████████████▏                       | 87/201 [13:37<18:25,  9.70s/it]

Epoch: 86 | disc_loss: -0.35113123059272766 | gen_loss: 0.3101649284362793


 44%|██████████████████▍                       | 88/201 [13:46<18:18,  9.72s/it]

Epoch: 87 | disc_loss: -0.3292653560638428 | gen_loss: 0.3066690266132355


 44%|██████████████████▌                       | 89/201 [13:57<18:33,  9.94s/it]

Epoch: 88 | disc_loss: -0.3519291281700134 | gen_loss: 0.3055153489112854


 45%|██████████████████▊                       | 90/201 [14:07<18:24,  9.95s/it]

Epoch: 89 | disc_loss: -0.3682807683944702 | gen_loss: 0.3175618648529053


 45%|███████████████████                       | 91/201 [14:17<18:09,  9.90s/it]

Epoch: 90 | disc_loss: -0.3689976632595062 | gen_loss: 0.3169434666633606


 46%|███████████████████▏                      | 92/201 [14:27<18:14, 10.04s/it]

Epoch: 91 | disc_loss: -0.35447466373443604 | gen_loss: 0.3178948760032654


 46%|███████████████████▍                      | 93/201 [14:38<18:28, 10.26s/it]

Epoch: 92 | disc_loss: -0.36193254590034485 | gen_loss: 0.3073262572288513


 47%|███████████████████▋                      | 94/201 [14:48<18:27, 10.35s/it]

Epoch: 93 | disc_loss: -0.34787437319755554 | gen_loss: 0.31689929962158203


 47%|███████████████████▊                      | 95/201 [14:59<18:12, 10.30s/it]

Epoch: 94 | disc_loss: -0.35731241106987 | gen_loss: 0.32215920090675354


 48%|████████████████████                      | 96/201 [15:08<17:49, 10.19s/it]

Epoch: 95 | disc_loss: -0.36013561487197876 | gen_loss: 0.3064434826374054


 48%|████████████████████▎                     | 97/201 [15:19<17:46, 10.25s/it]

Epoch: 96 | disc_loss: -0.35322579741477966 | gen_loss: 0.3155753016471863


 49%|████████████████████▍                     | 98/201 [15:29<17:30, 10.20s/it]

Epoch: 97 | disc_loss: -0.35690170526504517 | gen_loss: 0.3163266181945801


 49%|████████████████████▋                     | 99/201 [15:39<17:17, 10.17s/it]

Epoch: 98 | disc_loss: -0.3530632555484772 | gen_loss: 0.3091626465320587


 50%|████████████████████▍                    | 100/201 [15:49<17:02, 10.12s/it]

Epoch: 99 | disc_loss: -0.35461267828941345 | gen_loss: 0.310222864151001


 50%|████████████████████▌                    | 101/201 [15:59<16:34,  9.95s/it]

Epoch: 100 | disc_loss: -0.35356125235557556 | gen_loss: 0.32231104373931885


 51%|████████████████████▊                    | 102/201 [16:08<16:20,  9.90s/it]

Epoch: 101 | disc_loss: -0.33436721563339233 | gen_loss: 0.28292375802993774


 51%|█████████████████████                    | 103/201 [16:18<15:57,  9.77s/it]

Epoch: 102 | disc_loss: -0.36721813678741455 | gen_loss: 0.33871734142303467


 52%|█████████████████████▏                   | 104/201 [16:27<15:40,  9.70s/it]

Epoch: 103 | disc_loss: -0.3562518060207367 | gen_loss: 0.3210611343383789


 52%|█████████████████████▍                   | 105/201 [16:37<15:27,  9.66s/it]

Epoch: 104 | disc_loss: -0.3598548173904419 | gen_loss: 0.3320603668689728


 53%|█████████████████████▌                   | 106/201 [16:47<15:16,  9.65s/it]

Epoch: 105 | disc_loss: -0.355227530002594 | gen_loss: 0.33585453033447266


 53%|█████████████████████▊                   | 107/201 [16:56<15:08,  9.67s/it]

Epoch: 106 | disc_loss: -0.36189883947372437 | gen_loss: 0.33699291944503784


 54%|██████████████████████                   | 108/201 [17:06<14:56,  9.64s/it]

Epoch: 107 | disc_loss: -0.3585660457611084 | gen_loss: 0.33678972721099854


 54%|██████████████████████▏                  | 109/201 [17:16<14:47,  9.65s/it]

Epoch: 108 | disc_loss: -0.3546859622001648 | gen_loss: 0.33554092049598694


 55%|██████████████████████▍                  | 110/201 [17:25<14:45,  9.73s/it]

Epoch: 109 | disc_loss: -0.34947431087493896 | gen_loss: 0.3162505030632019


 55%|██████████████████████▋                  | 111/201 [17:35<14:27,  9.64s/it]

Epoch: 110 | disc_loss: -0.3598942160606384 | gen_loss: 0.3352642059326172


 56%|██████████████████████▊                  | 112/201 [17:44<14:09,  9.54s/it]

Epoch: 111 | disc_loss: -0.3524569869041443 | gen_loss: 0.31806936860084534


 56%|███████████████████████                  | 113/201 [17:54<13:58,  9.53s/it]

Epoch: 112 | disc_loss: -0.35505709052085876 | gen_loss: 0.3110310137271881


 57%|███████████████████████▎                 | 114/201 [18:03<13:48,  9.53s/it]

Epoch: 113 | disc_loss: -0.3573094308376312 | gen_loss: 0.31841129064559937


 57%|███████████████████████▍                 | 115/201 [18:13<13:39,  9.52s/it]

Epoch: 114 | disc_loss: -0.35288289189338684 | gen_loss: 0.3207641541957855


 58%|███████████████████████▋                 | 116/201 [18:22<13:29,  9.52s/it]

Epoch: 115 | disc_loss: -0.35175660252571106 | gen_loss: 0.3169664144515991


 58%|███████████████████████▊                 | 117/201 [18:32<13:19,  9.51s/it]

Epoch: 116 | disc_loss: -0.3625016510486603 | gen_loss: 0.317837655544281


 59%|████████████████████████                 | 118/201 [18:41<13:10,  9.52s/it]

Epoch: 117 | disc_loss: -0.3553096354007721 | gen_loss: 0.320465624332428


 59%|████████████████████████▎                | 119/201 [18:51<13:00,  9.51s/it]

Epoch: 118 | disc_loss: -0.35070133209228516 | gen_loss: 0.31151050329208374


 60%|████████████████████████▍                | 120/201 [19:00<12:49,  9.50s/it]

Epoch: 119 | disc_loss: -0.35686182975769043 | gen_loss: 0.33036351203918457


 60%|████████████████████████▋                | 121/201 [19:10<12:40,  9.50s/it]

Epoch: 120 | disc_loss: -0.3242156505584717 | gen_loss: 0.32265082001686096


 61%|████████████████████████▉                | 122/201 [19:19<12:31,  9.51s/it]

Epoch: 121 | disc_loss: -0.3575114905834198 | gen_loss: 0.32237377762794495


 61%|█████████████████████████                | 123/201 [19:29<12:22,  9.52s/it]

Epoch: 122 | disc_loss: -0.3255982995033264 | gen_loss: 0.3141116797924042


 62%|█████████████████████████▎               | 124/201 [19:38<12:13,  9.53s/it]

Epoch: 123 | disc_loss: -0.3489488959312439 | gen_loss: 0.33195656538009644


 62%|█████████████████████████▍               | 125/201 [19:48<12:04,  9.53s/it]

Epoch: 124 | disc_loss: -0.35895800590515137 | gen_loss: 0.31821900606155396


 63%|█████████████████████████▋               | 126/201 [19:57<11:55,  9.53s/it]

Epoch: 125 | disc_loss: -0.3549337685108185 | gen_loss: 0.31100279092788696


 63%|█████████████████████████▉               | 127/201 [20:07<11:54,  9.65s/it]

Epoch: 126 | disc_loss: -0.3514045178890228 | gen_loss: 0.31906747817993164


 64%|██████████████████████████               | 128/201 [20:17<11:44,  9.65s/it]

Epoch: 127 | disc_loss: -0.3449593484401703 | gen_loss: 0.308726042509079


 64%|██████████████████████████▎              | 129/201 [20:27<11:31,  9.61s/it]

Epoch: 128 | disc_loss: -0.3551281690597534 | gen_loss: 0.3147999942302704


 65%|██████████████████████████▌              | 130/201 [20:36<11:20,  9.59s/it]

Epoch: 129 | disc_loss: -0.34968680143356323 | gen_loss: 0.3212398588657379


 65%|██████████████████████████▋              | 131/201 [20:46<11:10,  9.58s/it]

Epoch: 130 | disc_loss: -0.35688668489456177 | gen_loss: 0.33012527227401733


 66%|██████████████████████████▉              | 132/201 [20:55<11:02,  9.60s/it]

Epoch: 131 | disc_loss: -0.3588646650314331 | gen_loss: 0.3247789740562439


 66%|███████████████████████████▏             | 133/201 [21:05<10:58,  9.68s/it]

Epoch: 132 | disc_loss: -0.3498777151107788 | gen_loss: 0.3162727952003479


 67%|███████████████████████████▎             | 134/201 [21:15<10:51,  9.73s/it]

Epoch: 133 | disc_loss: -0.33759990334510803 | gen_loss: 0.3121030628681183


 67%|███████████████████████████▌             | 135/201 [21:25<10:44,  9.76s/it]

Epoch: 134 | disc_loss: -0.3558303713798523 | gen_loss: 0.33216264843940735


 68%|███████████████████████████▋             | 136/201 [21:35<10:45,  9.93s/it]

Epoch: 135 | disc_loss: -0.35440874099731445 | gen_loss: 0.31802645325660706


 68%|███████████████████████████▉             | 137/201 [21:45<10:41, 10.03s/it]

Epoch: 136 | disc_loss: -0.33670440316200256 | gen_loss: 0.3158930242061615


 69%|████████████████████████████▏            | 138/201 [21:55<10:30, 10.00s/it]

Epoch: 137 | disc_loss: -0.3503878712654114 | gen_loss: 0.31732940673828125


 69%|████████████████████████████▎            | 139/201 [22:05<10:13,  9.90s/it]

Epoch: 138 | disc_loss: -0.3420007526874542 | gen_loss: 0.3214828670024872


 70%|████████████████████████████▌            | 140/201 [22:15<10:00,  9.84s/it]

Epoch: 139 | disc_loss: -0.35298195481300354 | gen_loss: 0.32026994228363037


 70%|████████████████████████████▊            | 141/201 [22:24<09:47,  9.79s/it]

Epoch: 140 | disc_loss: -0.3643712103366852 | gen_loss: 0.33228227496147156


 71%|████████████████████████████▉            | 142/201 [22:34<09:36,  9.77s/it]

Epoch: 141 | disc_loss: -0.3546290397644043 | gen_loss: 0.3184846341609955


 71%|█████████████████████████████▏           | 143/201 [22:44<09:26,  9.78s/it]

Epoch: 142 | disc_loss: -0.3574207127094269 | gen_loss: 0.31752240657806396


 72%|█████████████████████████████▎           | 144/201 [22:54<09:16,  9.77s/it]

Epoch: 143 | disc_loss: -0.34488365054130554 | gen_loss: 0.32325083017349243


 72%|█████████████████████████████▌           | 145/201 [23:04<09:09,  9.81s/it]

Epoch: 144 | disc_loss: -0.3483644127845764 | gen_loss: 0.32140207290649414


 73%|█████████████████████████████▊           | 146/201 [23:14<09:08,  9.98s/it]

Epoch: 145 | disc_loss: -0.35488077998161316 | gen_loss: 0.32688745856285095


 73%|█████████████████████████████▉           | 147/201 [23:24<09:07, 10.14s/it]

Epoch: 146 | disc_loss: -0.3567943572998047 | gen_loss: 0.3305119276046753


 74%|██████████████████████████████▏          | 148/201 [23:35<09:00, 10.20s/it]

Epoch: 147 | disc_loss: -0.35734066367149353 | gen_loss: 0.32229602336883545


 74%|██████████████████████████████▍          | 149/201 [23:45<08:51, 10.23s/it]

Epoch: 148 | disc_loss: -0.3550950884819031 | gen_loss: 0.32643938064575195


 75%|██████████████████████████████▌          | 150/201 [23:55<08:37, 10.15s/it]

Epoch: 149 | disc_loss: -0.35799238085746765 | gen_loss: 0.33824092149734497


 75%|██████████████████████████████▊          | 151/201 [24:05<08:23, 10.07s/it]

Epoch: 150 | disc_loss: -0.34634295105934143 | gen_loss: 0.31187358498573303


 76%|███████████████████████████████          | 152/201 [24:15<08:10, 10.01s/it]

Epoch: 151 | disc_loss: -0.3619929254055023 | gen_loss: 0.32192036509513855


 76%|███████████████████████████████▏         | 153/201 [24:25<07:59,  9.99s/it]

Epoch: 152 | disc_loss: -0.35523515939712524 | gen_loss: 0.3115116357803345


 77%|███████████████████████████████▍         | 154/201 [24:35<07:52, 10.06s/it]

Epoch: 153 | disc_loss: -0.34357723593711853 | gen_loss: 0.3150649070739746


 77%|███████████████████████████████▌         | 155/201 [24:45<07:44, 10.11s/it]

Epoch: 154 | disc_loss: -0.3493503928184509 | gen_loss: 0.317818820476532


 78%|███████████████████████████████▊         | 156/201 [24:55<07:33, 10.08s/it]

Epoch: 155 | disc_loss: -0.3556354343891144 | gen_loss: 0.31329649686813354


 78%|████████████████████████████████         | 157/201 [25:05<07:22, 10.05s/it]

Epoch: 156 | disc_loss: -0.3316900134086609 | gen_loss: 0.32470038533210754


 79%|████████████████████████████████▏        | 158/201 [25:15<07:10, 10.00s/it]

Epoch: 157 | disc_loss: -0.35492345690727234 | gen_loss: 0.3150497376918793


 79%|████████████████████████████████▍        | 159/201 [25:25<06:54,  9.87s/it]

Epoch: 158 | disc_loss: -0.35515353083610535 | gen_loss: 0.31660568714141846


 80%|████████████████████████████████▋        | 160/201 [25:34<06:43,  9.83s/it]

Epoch: 159 | disc_loss: -0.3596925437450409 | gen_loss: 0.3215765058994293


 80%|████████████████████████████████▊        | 161/201 [25:45<06:37,  9.94s/it]

Epoch: 160 | disc_loss: -0.3526199758052826 | gen_loss: 0.3075782060623169


 81%|█████████████████████████████████        | 162/201 [25:55<06:31, 10.03s/it]

Epoch: 161 | disc_loss: -0.3477787673473358 | gen_loss: 0.31840333342552185


 81%|█████████████████████████████████▏       | 163/201 [26:05<06:22, 10.06s/it]

Epoch: 162 | disc_loss: -0.34153762459754944 | gen_loss: 0.31004461646080017


 82%|█████████████████████████████████▍       | 164/201 [26:15<06:09,  9.98s/it]

Epoch: 163 | disc_loss: -0.3559386134147644 | gen_loss: 0.31817561388015747


 82%|█████████████████████████████████▋       | 165/201 [26:25<05:56,  9.92s/it]

Epoch: 164 | disc_loss: -0.3575979471206665 | gen_loss: 0.3120875954627991


 83%|█████████████████████████████████▊       | 166/201 [26:34<05:46,  9.90s/it]

Epoch: 165 | disc_loss: -0.3383139669895172 | gen_loss: 0.3140687942504883


 83%|██████████████████████████████████       | 167/201 [26:44<05:36,  9.89s/it]

Epoch: 166 | disc_loss: -0.3571482300758362 | gen_loss: 0.3159966468811035


 84%|██████████████████████████████████▎      | 168/201 [26:55<05:31, 10.03s/it]

Epoch: 167 | disc_loss: -0.3347640335559845 | gen_loss: 0.3164505958557129


 84%|██████████████████████████████████▍      | 169/201 [27:05<05:23, 10.11s/it]

Epoch: 168 | disc_loss: -0.35537081956863403 | gen_loss: 0.3161095976829529


 85%|██████████████████████████████████▋      | 170/201 [27:15<05:14, 10.16s/it]

Epoch: 169 | disc_loss: -0.3532426655292511 | gen_loss: 0.32176655530929565


 85%|██████████████████████████████████▉      | 171/201 [27:25<05:02, 10.08s/it]

Epoch: 170 | disc_loss: -0.3513672649860382 | gen_loss: 0.310901403427124


 86%|███████████████████████████████████      | 172/201 [27:35<04:50, 10.02s/it]

Epoch: 171 | disc_loss: -0.3494969308376312 | gen_loss: 0.321180522441864


 86%|███████████████████████████████████▎     | 173/201 [27:45<04:39,  9.97s/it]

Epoch: 172 | disc_loss: -0.36078980565071106 | gen_loss: 0.305586576461792


 87%|███████████████████████████████████▍     | 174/201 [27:55<04:28,  9.96s/it]

Epoch: 173 | disc_loss: -0.3478212058544159 | gen_loss: 0.3113842010498047


 87%|███████████████████████████████████▋     | 175/201 [28:05<04:21, 10.05s/it]

Epoch: 174 | disc_loss: -0.3547293245792389 | gen_loss: 0.3225541412830353


 88%|███████████████████████████████████▉     | 176/201 [28:15<04:13, 10.12s/it]

Epoch: 175 | disc_loss: -0.3612802028656006 | gen_loss: 0.3246152997016907


 88%|████████████████████████████████████     | 177/201 [28:26<04:04, 10.18s/it]

Epoch: 176 | disc_loss: -0.35592326521873474 | gen_loss: 0.32385751605033875


 89%|████████████████████████████████████▎    | 178/201 [28:36<03:53, 10.17s/it]

Epoch: 177 | disc_loss: -0.3377261161804199 | gen_loss: 0.3135078549385071


 89%|████████████████████████████████████▌    | 179/201 [28:46<03:42, 10.10s/it]

Epoch: 178 | disc_loss: -0.3291967511177063 | gen_loss: 0.31905272603034973


 90%|████████████████████████████████████▋    | 180/201 [28:56<03:30, 10.03s/it]

Epoch: 179 | disc_loss: -0.3488912582397461 | gen_loss: 0.31110280752182007


 90%|████████████████████████████████████▉    | 181/201 [29:05<03:19,  9.97s/it]

Epoch: 180 | disc_loss: -0.3359912931919098 | gen_loss: 0.31082504987716675


 91%|█████████████████████████████████████    | 182/201 [29:15<03:09, 10.00s/it]

Epoch: 181 | disc_loss: -0.3403835892677307 | gen_loss: 0.3173799514770508


 91%|█████████████████████████████████████▎   | 183/201 [29:26<03:03, 10.19s/it]

Epoch: 182 | disc_loss: -0.34595128893852234 | gen_loss: 0.3166693449020386


 92%|█████████████████████████████████████▌   | 184/201 [29:36<02:53, 10.23s/it]

Epoch: 183 | disc_loss: -0.35733020305633545 | gen_loss: 0.31719666719436646


 92%|█████████████████████████████████████▋   | 185/201 [29:47<02:44, 10.26s/it]

Epoch: 184 | disc_loss: -0.34422212839126587 | gen_loss: 0.32111281156539917


 93%|█████████████████████████████████████▉   | 186/201 [29:57<02:34, 10.27s/it]

Epoch: 185 | disc_loss: -0.3540023863315582 | gen_loss: 0.3196861743927002


 93%|██████████████████████████████████████▏  | 187/201 [30:07<02:23, 10.24s/it]

Epoch: 186 | disc_loss: -0.35466229915618896 | gen_loss: 0.310715913772583


 94%|██████████████████████████████████████▎  | 188/201 [30:17<02:12, 10.16s/it]

Epoch: 187 | disc_loss: -0.3547765016555786 | gen_loss: 0.3221346139907837


 94%|██████████████████████████████████████▌  | 189/201 [30:27<02:01, 10.09s/it]

Epoch: 188 | disc_loss: -0.35526126623153687 | gen_loss: 0.32207363843917847


 95%|██████████████████████████████████████▊  | 190/201 [30:37<01:51, 10.17s/it]

Epoch: 189 | disc_loss: -0.3495720624923706 | gen_loss: 0.31667470932006836


 95%|██████████████████████████████████████▉  | 191/201 [30:48<01:42, 10.21s/it]

Epoch: 190 | disc_loss: -0.3520171046257019 | gen_loss: 0.320884644985199


 96%|███████████████████████████████████████▏ | 192/201 [30:58<01:32, 10.23s/it]

Epoch: 191 | disc_loss: -0.35232049226760864 | gen_loss: 0.31878793239593506


 96%|███████████████████████████████████████▎ | 193/201 [31:08<01:22, 10.26s/it]

Epoch: 192 | disc_loss: -0.3500812351703644 | gen_loss: 0.31596070528030396


 97%|███████████████████████████████████████▌ | 194/201 [31:18<01:11, 10.17s/it]

Epoch: 193 | disc_loss: -0.3523370027542114 | gen_loss: 0.3135164976119995


 97%|███████████████████████████████████████▊ | 195/201 [31:28<01:00, 10.09s/it]

Epoch: 194 | disc_loss: -0.35766157507896423 | gen_loss: 0.32496029138565063


 98%|███████████████████████████████████████▉ | 196/201 [31:38<00:50, 10.06s/it]

Epoch: 195 | disc_loss: -0.3489379584789276 | gen_loss: 0.3212459683418274


 98%|████████████████████████████████████████▏| 197/201 [31:48<00:40, 10.11s/it]

Epoch: 196 | disc_loss: -0.3620857000350952 | gen_loss: 0.3246152997016907


 99%|████████████████████████████████████████▍| 198/201 [31:59<00:30, 10.14s/it]

Epoch: 197 | disc_loss: -0.34353864192962646 | gen_loss: 0.3228464424610138


 99%|████████████████████████████████████████▌| 199/201 [32:09<00:20, 10.19s/it]

Epoch: 198 | disc_loss: -0.34156543016433716 | gen_loss: 0.33427584171295166


100%|████████████████████████████████████████▊| 200/201 [32:19<00:10, 10.20s/it]

Epoch: 199 | disc_loss: -0.3508546054363251 | gen_loss: 0.323633074760437


100%|█████████████████████████████████████████| 201/201 [32:29<00:00,  9.70s/it]

Epoch: 200 | disc_loss: -0.3494935929775238 | gen_loss: 0.32680952548980713





#### Generating 100k rows of minority data

In [14]:
#Generating synthetic data of 100k examples of diabetic patients
minority_synth_data = synthesizer.sample(100000)

Synthetic data generation: 100%|██████████████| 782/782 [00:11<00:00, 67.44it/s]


In [15]:
minority_synth_data.isna().sum()

HighBP                  0
HighChol                0
CholCheck               0
BMI                     0
Smoker                  0
Stroke                  0
HeartDiseaseorAttack    0
PhysActivity            0
Fruits                  0
Veggies                 0
HvyAlcoholConsump       0
AnyHealthcare           0
NoDocbcCost             0
GenHlth                 0
MentHlth                0
PhysHlth                0
DiffWalk                0
Sex                     0
Age                     0
Education               0
Income                  0
Diabetes_binary         0
dtype: int64

In [16]:
minority_synth_data

Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,...,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income,Diabetes_binary
0,1.0,0.0,0.0,32.280758,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,2.0,0.308237,20.622051,1.0,1.0,7.0,1.0,1.0,1.0
1,0.0,1.0,0.0,29.729445,1.0,0.0,1.0,0.0,1.0,1.0,...,0.0,5.0,0.447694,6.936053,1.0,0.0,5.0,3.0,4.0,1.0
2,1.0,0.0,0.0,33.646648,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,3.0,-0.253246,10.605074,1.0,0.0,7.0,2.0,2.0,1.0
3,0.0,0.0,0.0,30.537703,0.0,1.0,0.0,0.0,1.0,1.0,...,1.0,1.0,1.033439,5.332734,1.0,0.0,8.0,2.0,7.0,1.0
4,1.0,1.0,1.0,30.573626,0.0,0.0,1.0,1.0,0.0,1.0,...,0.0,2.0,2.031053,14.967583,0.0,1.0,2.0,4.0,7.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100091,0.0,1.0,0.0,32.422222,1.0,1.0,1.0,1.0,0.0,0.0,...,1.0,1.0,0.527633,11.749678,1.0,0.0,1.0,3.0,4.0,1.0
100092,1.0,0.0,1.0,32.185829,1.0,0.0,1.0,0.0,1.0,1.0,...,1.0,2.0,1.175031,10.448512,1.0,0.0,7.0,5.0,1.0,1.0
100093,1.0,1.0,1.0,34.277805,1.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,-2.275481,25.303354,1.0,0.0,9.0,5.0,5.0,1.0
100094,0.0,0.0,1.0,35.670673,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,4.0,2.090659,5.257822,1.0,1.0,5.0,2.0,8.0,1.0


### Generating 100k rows of majority data

In [27]:
#Selecting the rows of majority class from the data
train_data = X_train_df.loc[ X_train_df['Diabetes_binary']==0 ].copy()

#WGAN with Gradient Penalty is chosen as the GAN architecture
model = WGAN_GP

#Setting the parameters of the GAN model
noise_dim = 32
dim = 128
batch_size = 128

log_step = 100
epochs = 100+1
learning_rate = 5e-4
beta_1 = 0.5
beta_2 = 0.9
models_dir = './cache'

#Setting the model parameters and the training step parameters of the GAN model
gan_args = ModelParameters(batch_size=batch_size, lr=learning_rate, betas=(beta_1, beta_2),
                           noise_dim=noise_dim,layers_dim=dim)

train_args = TrainParameters(epochs=epochs, sample_interval=log_step)

#Initializing the GAN model
synthesizer = model(gan_args, n_critic = 20)

#Training the GAN model
synthesizer.train(data = train_data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)

#Generating synthetic data of 100k examples of non-diabetic patients
majority_synth_data = synthesizer.sample(100000)

  1%|          | 1/101 [01:43<2:52:01, 103.22s/it]

Epoch: 0 | disc_loss: -0.3417395055294037 | gen_loss: 0.17075596749782562


  2%|▏         | 2/101 [03:00<2:24:52, 87.80s/it] 

Epoch: 1 | disc_loss: -0.35966137051582336 | gen_loss: 0.21103143692016602


  3%|▎         | 3/101 [04:16<2:14:41, 82.46s/it]

Epoch: 2 | disc_loss: -0.35169023275375366 | gen_loss: 0.2684181332588196


  4%|▍         | 4/101 [05:31<2:08:56, 79.76s/it]

Epoch: 3 | disc_loss: -0.35828518867492676 | gen_loss: 0.3123449981212616


  5%|▍         | 5/101 [06:48<2:05:40, 78.54s/it]

Epoch: 4 | disc_loss: -0.35699689388275146 | gen_loss: 0.30517855286598206


  6%|▌         | 6/101 [08:04<2:03:12, 77.82s/it]

Epoch: 5 | disc_loss: -0.34813302755355835 | gen_loss: 0.3065980076789856


  7%|▋         | 7/101 [09:20<2:00:43, 77.06s/it]

Epoch: 6 | disc_loss: -0.34181034564971924 | gen_loss: 0.32059037685394287


  8%|▊         | 8/101 [10:36<1:59:01, 76.79s/it]

Epoch: 7 | disc_loss: -0.3628128468990326 | gen_loss: 0.3349193036556244


  9%|▉         | 9/101 [11:51<1:57:02, 76.33s/it]

Epoch: 8 | disc_loss: -0.352329820394516 | gen_loss: 0.32703015208244324


 10%|▉         | 10/101 [13:08<1:55:51, 76.39s/it]

Epoch: 9 | disc_loss: -0.359203577041626 | gen_loss: 0.3319256603717804


 11%|█         | 11/101 [14:23<1:54:09, 76.10s/it]

Epoch: 10 | disc_loss: -0.33353739976882935 | gen_loss: 0.33884909749031067


 12%|█▏        | 12/101 [15:40<1:53:07, 76.26s/it]

Epoch: 11 | disc_loss: -0.3563274145126343 | gen_loss: 0.3281415104866028


 13%|█▎        | 13/101 [16:56<1:51:54, 76.31s/it]

Epoch: 12 | disc_loss: -0.34225285053253174 | gen_loss: 0.3269023895263672


 14%|█▍        | 14/101 [18:14<1:51:08, 76.65s/it]

Epoch: 13 | disc_loss: -0.3569873571395874 | gen_loss: 0.3339621424674988


 15%|█▍        | 15/101 [19:31<1:50:11, 76.88s/it]

Epoch: 14 | disc_loss: -0.3590335547924042 | gen_loss: 0.3381456136703491


 16%|█▌        | 16/101 [20:48<1:48:43, 76.75s/it]

Epoch: 15 | disc_loss: -0.3510229289531708 | gen_loss: 0.3291483521461487


 17%|█▋        | 17/101 [22:04<1:47:27, 76.75s/it]

Epoch: 16 | disc_loss: -0.3518991470336914 | gen_loss: 0.34827885031700134


 18%|█▊        | 18/101 [23:21<1:46:09, 76.74s/it]

Epoch: 17 | disc_loss: -0.3524854779243469 | gen_loss: 0.3325046896934509


 19%|█▉        | 19/101 [24:37<1:44:44, 76.64s/it]

Epoch: 18 | disc_loss: -0.34942835569381714 | gen_loss: 0.3299225866794586


 20%|█▉        | 20/101 [25:55<1:43:57, 77.00s/it]

Epoch: 19 | disc_loss: -0.3535231947898865 | gen_loss: 0.3392050862312317


 21%|██        | 21/101 [27:11<1:42:09, 76.62s/it]

Epoch: 20 | disc_loss: -0.3600747287273407 | gen_loss: 0.33762550354003906


 22%|██▏       | 22/101 [28:27<1:40:46, 76.53s/it]

Epoch: 21 | disc_loss: -0.3318978548049927 | gen_loss: 0.33457523584365845


 23%|██▎       | 23/101 [29:43<1:39:11, 76.30s/it]

Epoch: 22 | disc_loss: -0.356433629989624 | gen_loss: 0.33237627148628235


 24%|██▍       | 24/101 [31:00<1:38:01, 76.38s/it]

Epoch: 23 | disc_loss: -0.34467825293540955 | gen_loss: 0.3207027316093445


 25%|██▍       | 25/101 [32:16<1:36:43, 76.36s/it]

Epoch: 24 | disc_loss: -0.3512178957462311 | gen_loss: 0.3313784897327423


 26%|██▌       | 26/101 [33:32<1:35:11, 76.16s/it]

Epoch: 25 | disc_loss: -0.3576817810535431 | gen_loss: 0.3245852589607239


 27%|██▋       | 27/101 [34:48<1:34:02, 76.25s/it]

Epoch: 26 | disc_loss: -0.33946818113327026 | gen_loss: 0.3099546432495117


 28%|██▊       | 28/101 [36:04<1:32:34, 76.09s/it]

Epoch: 27 | disc_loss: -0.35053378343582153 | gen_loss: 0.3139893710613251


 29%|██▊       | 29/101 [37:20<1:31:21, 76.13s/it]

Epoch: 28 | disc_loss: -0.33998364210128784 | gen_loss: 0.31969764828681946


 30%|██▉       | 30/101 [38:36<1:29:51, 75.93s/it]

Epoch: 29 | disc_loss: -0.3622645139694214 | gen_loss: 0.33073800802230835


 31%|███       | 31/101 [39:52<1:28:36, 75.95s/it]

Epoch: 30 | disc_loss: -0.3638520836830139 | gen_loss: 0.33068257570266724


 32%|███▏      | 32/101 [41:08<1:27:30, 76.10s/it]

Epoch: 31 | disc_loss: -0.35849541425704956 | gen_loss: 0.3285537362098694


 33%|███▎      | 33/101 [42:23<1:26:00, 75.89s/it]

Epoch: 32 | disc_loss: -0.35041776299476624 | gen_loss: 0.32389187812805176


 34%|███▎      | 34/101 [43:42<1:25:45, 76.80s/it]

Epoch: 33 | disc_loss: -0.3503778576850891 | gen_loss: 0.324806809425354


 35%|███▍      | 35/101 [45:01<1:25:00, 77.28s/it]

Epoch: 34 | disc_loss: -0.34773749113082886 | gen_loss: 0.33053579926490784


 36%|███▌      | 36/101 [46:19<1:24:11, 77.72s/it]

Epoch: 35 | disc_loss: -0.3572191596031189 | gen_loss: 0.33549049496650696


 37%|███▋      | 37/101 [47:38<1:23:07, 77.93s/it]

Epoch: 36 | disc_loss: -0.351124107837677 | gen_loss: 0.3615914285182953


 38%|███▊      | 38/101 [48:56<1:21:45, 77.86s/it]

Epoch: 37 | disc_loss: -0.32464709877967834 | gen_loss: 0.33520087599754333


 39%|███▊      | 39/101 [50:14<1:20:32, 77.94s/it]

Epoch: 38 | disc_loss: -0.3520853817462921 | gen_loss: 0.329795777797699


 40%|███▉      | 40/101 [51:31<1:19:07, 77.83s/it]

Epoch: 39 | disc_loss: -0.35168084502220154 | gen_loss: 0.3358234167098999


 41%|████      | 41/101 [52:50<1:17:57, 77.96s/it]

Epoch: 40 | disc_loss: -0.36177167296409607 | gen_loss: 0.3473645746707916


 42%|████▏     | 42/101 [54:08<1:16:41, 77.99s/it]

Epoch: 41 | disc_loss: -0.3536114990711212 | gen_loss: 0.33173632621765137


 43%|████▎     | 43/101 [55:25<1:15:14, 77.84s/it]

Epoch: 42 | disc_loss: -0.3580881655216217 | gen_loss: 0.325894832611084


 44%|████▎     | 44/101 [56:43<1:14:04, 77.97s/it]

Epoch: 43 | disc_loss: -0.35188138484954834 | gen_loss: 0.3325440287590027


 45%|████▍     | 45/101 [58:00<1:12:17, 77.46s/it]

Epoch: 44 | disc_loss: -0.3602774441242218 | gen_loss: 0.3349372446537018


 46%|████▌     | 46/101 [59:18<1:11:11, 77.66s/it]

Epoch: 45 | disc_loss: -0.35363712906837463 | gen_loss: 0.33707568049430847


 47%|████▋     | 47/101 [1:00:36<1:09:59, 77.77s/it]

Epoch: 46 | disc_loss: -0.3511183261871338 | gen_loss: 0.3197973370552063


 48%|████▊     | 48/101 [1:01:53<1:08:35, 77.65s/it]

Epoch: 47 | disc_loss: -0.3586497902870178 | gen_loss: 0.3415604531764984


 49%|████▊     | 49/101 [1:03:11<1:07:25, 77.79s/it]

Epoch: 48 | disc_loss: -0.3525041341781616 | gen_loss: 0.32856202125549316


 50%|████▉     | 50/101 [1:04:29<1:06:07, 77.80s/it]

Epoch: 49 | disc_loss: -0.36180728673934937 | gen_loss: 0.3448362946510315


 50%|█████     | 51/101 [1:05:46<1:04:39, 77.60s/it]

Epoch: 50 | disc_loss: -0.35355132818222046 | gen_loss: 0.32087308168411255


 51%|█████▏    | 52/101 [1:07:04<1:03:28, 77.73s/it]

Epoch: 51 | disc_loss: -0.34700194001197815 | gen_loss: 0.33832383155822754


 52%|█████▏    | 53/101 [1:08:21<1:02:01, 77.53s/it]

Epoch: 52 | disc_loss: -0.35602864623069763 | gen_loss: 0.3323778212070465


 53%|█████▎    | 54/101 [1:09:39<1:00:47, 77.60s/it]

Epoch: 53 | disc_loss: -0.3459472358226776 | gen_loss: 0.3682972192764282


 54%|█████▍    | 55/101 [1:10:57<59:34, 77.70s/it]  

Epoch: 54 | disc_loss: -0.34131860733032227 | gen_loss: 0.36641639471054077


 55%|█████▌    | 56/101 [1:12:14<58:09, 77.55s/it]

Epoch: 55 | disc_loss: -0.36600497364997864 | gen_loss: 0.3589633107185364


 56%|█████▋    | 57/101 [1:13:32<56:55, 77.63s/it]

Epoch: 56 | disc_loss: -0.3613569438457489 | gen_loss: 0.3721551299095154


 57%|█████▋    | 58/101 [1:14:49<55:34, 77.54s/it]

Epoch: 57 | disc_loss: -0.3566440939903259 | gen_loss: 0.3565489649772644


 58%|█████▊    | 59/101 [1:16:07<54:20, 77.62s/it]

Epoch: 58 | disc_loss: -0.34404850006103516 | gen_loss: 0.35904815793037415


 59%|█████▉    | 60/101 [1:17:25<53:04, 77.68s/it]

Epoch: 59 | disc_loss: -0.3527061939239502 | gen_loss: 0.3524354100227356


 60%|██████    | 61/101 [1:18:42<51:41, 77.53s/it]

Epoch: 60 | disc_loss: -0.3621724843978882 | gen_loss: 0.34930282831192017


 61%|██████▏   | 62/101 [1:20:00<50:28, 77.66s/it]

Epoch: 61 | disc_loss: -0.3296316862106323 | gen_loss: 0.3556821048259735


 62%|██████▏   | 63/101 [1:21:18<49:12, 77.69s/it]

Epoch: 62 | disc_loss: -0.34523770213127136 | gen_loss: 0.3487168550491333


 63%|██████▎   | 64/101 [1:22:34<47:37, 77.23s/it]

Epoch: 63 | disc_loss: -0.3542047142982483 | gen_loss: 0.34639811515808105


 64%|██████▍   | 65/101 [1:23:51<46:15, 77.11s/it]

Epoch: 64 | disc_loss: -0.346452921628952 | gen_loss: 0.35214200615882874


 65%|██████▌   | 66/101 [1:25:08<44:59, 77.13s/it]

Epoch: 65 | disc_loss: -0.36160317063331604 | gen_loss: 0.3415398597717285


 66%|██████▋   | 67/101 [1:26:26<43:51, 77.39s/it]

Epoch: 66 | disc_loss: -0.3555296063423157 | gen_loss: 0.3386250138282776


 67%|██████▋   | 68/101 [1:27:44<42:39, 77.56s/it]

Epoch: 67 | disc_loss: -0.3488486409187317 | gen_loss: 0.3543930649757385


 68%|██████▊   | 69/101 [1:29:01<41:16, 77.38s/it]

Epoch: 68 | disc_loss: -0.35490095615386963 | gen_loss: 0.3564991354942322


 69%|██████▉   | 70/101 [1:30:17<39:50, 77.11s/it]

Epoch: 69 | disc_loss: -0.3586244285106659 | gen_loss: 0.34383392333984375


 70%|███████   | 71/101 [1:31:36<38:46, 77.56s/it]

Epoch: 70 | disc_loss: -0.35623618960380554 | gen_loss: 0.3313450813293457


 71%|███████▏  | 72/101 [1:32:54<37:30, 77.61s/it]

Epoch: 71 | disc_loss: -0.3681429624557495 | gen_loss: 0.34827151894569397


 72%|███████▏  | 73/101 [1:34:11<36:06, 77.39s/it]

Epoch: 72 | disc_loss: -0.33402007818222046 | gen_loss: 0.3390551507472992


 73%|███████▎  | 74/101 [1:35:29<34:58, 77.72s/it]

Epoch: 73 | disc_loss: -0.3489326238632202 | gen_loss: 0.33294612169265747


 74%|███████▍  | 75/101 [1:36:49<33:58, 78.41s/it]

Epoch: 74 | disc_loss: -0.3440345823764801 | gen_loss: 0.333057701587677


 75%|███████▌  | 76/101 [1:38:09<32:53, 78.94s/it]

Epoch: 75 | disc_loss: -0.3525370657444 | gen_loss: 0.346027672290802


 76%|███████▌  | 77/101 [1:39:28<31:29, 78.72s/it]

Epoch: 76 | disc_loss: -0.3296760320663452 | gen_loss: 0.327872097492218


 77%|███████▋  | 78/101 [1:40:47<30:15, 78.93s/it]

Epoch: 77 | disc_loss: -0.353699266910553 | gen_loss: 0.3480694591999054


 78%|███████▊  | 79/101 [1:42:06<28:56, 78.95s/it]

Epoch: 78 | disc_loss: -0.3545827567577362 | gen_loss: 0.35641422867774963


 79%|███████▉  | 80/101 [1:43:24<27:32, 78.67s/it]

Epoch: 79 | disc_loss: -0.35548463463783264 | gen_loss: 0.35050728917121887


 80%|████████  | 81/101 [1:44:43<26:13, 78.69s/it]

Epoch: 80 | disc_loss: -0.3496447503566742 | gen_loss: 0.3466152846813202


 81%|████████  | 82/101 [1:46:01<24:55, 78.69s/it]

Epoch: 81 | disc_loss: -0.34414756298065186 | gen_loss: 0.3481123447418213


 82%|████████▏ | 83/101 [1:47:18<23:27, 78.17s/it]

Epoch: 82 | disc_loss: -0.33683842420578003 | gen_loss: 0.334022581577301


 83%|████████▎ | 84/101 [1:48:37<22:12, 78.40s/it]

Epoch: 83 | disc_loss: -0.3470902144908905 | gen_loss: 0.33738768100738525


 84%|████████▍ | 85/101 [1:49:56<20:57, 78.57s/it]

Epoch: 84 | disc_loss: -0.34694960713386536 | gen_loss: 0.3366965055465698


 85%|████████▌ | 86/101 [1:51:14<19:34, 78.33s/it]

Epoch: 85 | disc_loss: -0.3598842918872833 | gen_loss: 0.3427453935146332


 86%|████████▌ | 87/101 [1:52:32<18:16, 78.30s/it]

Epoch: 86 | disc_loss: -0.3516960144042969 | gen_loss: 0.32184290885925293


 87%|████████▋ | 88/101 [1:53:51<17:00, 78.52s/it]

Epoch: 87 | disc_loss: -0.3446378707885742 | gen_loss: 0.3286157250404358


 88%|████████▊ | 89/101 [1:55:09<15:40, 78.39s/it]

Epoch: 88 | disc_loss: -0.35030075907707214 | gen_loss: 0.3209429383277893


 89%|████████▉ | 90/101 [1:56:28<14:23, 78.52s/it]

Epoch: 89 | disc_loss: -0.3460026681423187 | gen_loss: 0.32536113262176514


 90%|█████████ | 91/101 [1:57:47<13:04, 78.45s/it]

Epoch: 90 | disc_loss: -0.34264087677001953 | gen_loss: 0.3366703987121582


 91%|█████████ | 92/101 [1:59:05<11:46, 78.52s/it]

Epoch: 91 | disc_loss: -0.3435715436935425 | gen_loss: 0.345994234085083


 92%|█████████▏| 93/101 [2:00:24<10:28, 78.57s/it]

Epoch: 92 | disc_loss: -0.3535369336605072 | gen_loss: 0.36218929290771484


 93%|█████████▎| 94/101 [2:01:42<09:09, 78.45s/it]

Epoch: 93 | disc_loss: -0.33106353878974915 | gen_loss: 0.36268481612205505


 94%|█████████▍| 95/101 [2:03:01<07:51, 78.60s/it]

Epoch: 94 | disc_loss: -0.35146284103393555 | gen_loss: 0.34258320927619934


 95%|█████████▌| 96/101 [2:04:20<06:33, 78.70s/it]

Epoch: 95 | disc_loss: -0.35144317150115967 | gen_loss: 0.363172322511673


 96%|█████████▌| 97/101 [2:05:38<05:14, 78.65s/it]

Epoch: 96 | disc_loss: -0.3541819751262665 | gen_loss: 0.35763972997665405


 97%|█████████▋| 98/101 [2:06:57<03:56, 78.74s/it]

Epoch: 97 | disc_loss: -0.3332362771034241 | gen_loss: 0.364384263753891


 98%|█████████▊| 99/101 [2:08:16<02:37, 78.83s/it]

Epoch: 98 | disc_loss: -0.34932687878608704 | gen_loss: 0.35988837480545044


 99%|█████████▉| 100/101 [2:09:34<01:18, 78.30s/it]

Epoch: 99 | disc_loss: -0.3502044081687927 | gen_loss: 0.3703605532646179


100%|██████████| 101/101 [2:10:53<00:00, 77.75s/it]


Epoch: 100 | disc_loss: -0.3563690781593323 | gen_loss: 0.3735984265804291


Synthetic data generation: 100%|██████████| 782/782 [00:16<00:00, 48.75it/s]


In [29]:
majority_synth_data.isna().sum()

HighBP                  0
HighChol                0
CholCheck               0
BMI                     0
Smoker                  0
Stroke                  0
HeartDiseaseorAttack    0
PhysActivity            0
Fruits                  0
Veggies                 0
HvyAlcoholConsump       0
AnyHealthcare           0
NoDocbcCost             0
GenHlth                 0
MentHlth                0
PhysHlth                0
DiffWalk                0
Sex                     0
Age                     0
Education               0
Income                  0
Diabetes_binary         0
dtype: int64

In [31]:
majority_synth_data.shape

(100096, 22)

In [34]:
minority_synth_data.shape

(100096, 22)

In [32]:
synth_df = pd.concat([majority_synth_data, minority_synth_data], ignore_index=True)

In [35]:
synth_df.shape

(200192, 22)

In [33]:
synth_df.to_csv(r'./generated_data/diabetes_binary_health_synthetic_data.csv', index = False)

### Generating pandas-profiling report for synthetic data

In [10]:
synth_df = pd.read_csv(r'./generated_data/diabetes_binary_health_synthetic_data.csv')

In [11]:
profile = ProfileReport(synth_df, title="Pandas Profiling Report for Synthetic Diabetes Dataset", explorative=True)

In [12]:
profile.to_widgets()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

  (2 * xtie * ytie) / m + x0 * y0 / (9 * m * (size - 2)))


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

In [13]:
#Saving the profile report of synthetic data as HTML file
profile.to_file(r"pandas_profiling_reports/Synthetic_Diabetes_Pandas_Profiling_Report.html")

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## Modelling

### Replacing the negative values in columns "MentHlth" and "PhysHlth" with zero

In [78]:
synth_df['MentHlth'][synth_df['MentHlth'] < 0] = 0
synth_df['PhysHlth'][synth_df['PhysHlth'] < 0] = 0

In [79]:
synth_df.MentHlth.describe()

count    200192.000000
mean          1.868842
std           4.080144
min           0.000000
25%           0.000000
50%           0.000000
75%           1.407046
max          29.953527
Name: MentHlth, dtype: float64

In [80]:
synth_df.PhysHlth.describe()

count    200192.000000
mean         15.033990
std          12.740442
min           0.000000
25%           2.229514
50%          19.804619
75%          27.963614
max          29.140305
Name: PhysHlth, dtype: float64

### Splitting the X and y from the synthetic data for modelling

In [81]:
X = synth_df.drop(['Diabetes_binary'], axis = 1)
y = synth_df['Diabetes_binary']

### Building a ML model (using XGBoost)

In [127]:
from lightgbm import LGBMClassifier
from sklearn.svm import SVC

In [136]:
model = LGBMClassifier(random_state = 0)

In [137]:
model.fit(X, y)

LGBMClassifier(random_state=0)

### Evaluating the model on the test data

In [138]:
y_pred = model.predict(X_test_df)

In [140]:
from sklearn.metrics import classification_report
print(classification_report(y_test_df, y_pred))

              precision    recall  f1-score   support

         0.0       0.86      0.89      0.88     38876
         1.0       0.27      0.23      0.25      7019

    accuracy                           0.79     45895
   macro avg       0.57      0.56      0.56     45895
weighted avg       0.77      0.79      0.78     45895

