In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input, Dense, LeakyReLU, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

X = pd.read_csv('computedDescriptors.csv')

latent_dim = 128

generator_input = Input(shape=(latent_dim,))
x = Dense(128)(generator_input)
x = LeakyReLU(alpha=0.01)(x)
x = Dropout(0.5)(x)
x = Dense(128)(x)
x = LeakyReLU(alpha=0.01)(x)
x = Dropout(0.5)(x)
x = Dense(X.shape[1])(x)
generator_output = x
generator = Model(generator_input, generator_output)

discriminator_input = Input(shape=(X.shape[1],))
x = Dense(128)(discriminator_input)
x = LeakyReLU(alpha=0.01)(x)
x = Dropout(0.5)(x)
x = Dense(128)(x)
x = LeakyReLU(alpha=0.01)(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
discriminator_output = x
discriminator = Model(discriminator_input, discriminator_output)

discriminator.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False

gan_input = Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = Model(gan_input, gan_output)

gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

num_epochs = 1000 
batch_size = 30#128
num_batches = int(X.shape[0] / batch_size)

losses = []
for epoch in range(num_epochs):
  epoch_loss = []
  for batch in range(num_batches):
    real_data = X[batch*batch_size:(batch+1)*batch_size]
    
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    synthetic_data = generator.predict(noise)
    
    data = np.concatenate((real_data, synthetic_data))
    
    labels = np.concatenate((np.ones(batch_size), np.zeros(batch_size)))
    
    d_loss, d_acc = discriminator.train_on_batch(data, labels)
    
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    
    g_loss = gan.train_on_batch(noise, np.ones(batch_size))
    
    epoch_loss.append(d_loss + g_loss)
    
epoch_loss = np.mean(epoch_loss)
losses.append(epoch_loss)

print(f'Epoch {epoch+1}/{num_epochs}: loss {epoch_loss:.4f}')

In [None]:
num_synthetic_data = 187
noise = np.random.normal(0, 1, (num_synthetic_data, latent_dim))
synthetic_data = generator.predict(noise)

synthetic_data = np.round(synthetic_data, decimals=3)

data = np.concatenate((X, synthetic_data))

df = pd.DataFrame(data, columns=X.columns)

df = df.astype(str)

for col in X.columns:
    if X[col].dtype == 'float64':
        df[col] = df[col].apply(lambda x: '{:.3f}'.format(float(x)))

df.to_csv('resultFile.csv', index=False)

In [None]:
plt.scatter(X.values[:, 0], X.values[:, 1], color='blue', alpha=0.5, label='Real data')
plt.scatter(synthetic_data[:, 0], synthetic_data[:, 1], color='red', alpha=0.5, label='Synthetic data')

plt.legend()

plt.title('###')

plt.savefig('fig.png', dpi=600)

plt.show()