# Augmentation_9

Use Generative Adversarial Network to synthesize data

## Import libraries

In [1]:
pip install numpy pandas matplotlib tensorflow



In [2]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Reshape, Flatten, Input
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

In [4]:
merged=pd.read_csv('/content/gdrive/MyDrive/GAN/merged.csv')

In [5]:
petting_d= merged[merged['Petting'] == 1]
non_petting_d= merged[merged['Petting'] == 0]

## GAN - Petting

In [6]:
data = petting_d

# Normalize data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

# Generator model
def build_generator(latent_dim, output_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(output_dim, activation='tanh'))
    return model

# Discriminator model
def build_discriminator(input_dim):
    model = Sequential()
    model.add(Dense(256, input_dim=input_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(128))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

# GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = Input(shape=(latent_dim,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    gan = Model(gan_input, gan_output)
    gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')
    return gan

# Training parameters
epochs = 1000
batch_size = 32
latent_dim = 100
output_dim = normalized_data.shape[1]

# Build and compile models
generator = build_generator(latent_dim, output_dim)
discriminator = build_discriminator(output_dim)
gan = build_gan(generator, discriminator)

# Compile models
discriminator.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')
gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

# Training loop
for epoch in range(epochs):
    idx = np.random.randint(0, normalized_data.shape[0], batch_size)
    real_samples = normalized_data[idx]

    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    generated_samples = generator.predict(noise)

    real_labels = np.ones((batch_size, 1))
    fake_labels = np.zeros((batch_size, 1))

    d_loss_real = discriminator.train_on_batch(real_samples, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_samples, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    valid_labels = np.ones((batch_size, 1))

    g_loss = gan.train_on_batch(noise, valid_labels)

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, D Loss: {d_loss}, G Loss: {g_loss}")

# Generate synthetic samples
num_samples = 85 # Create 85 more Petting == 1 data
noise = np.random.normal(0, 1, (num_samples, latent_dim))
generated_data = generator.predict(noise)

# Denormalize the generated data
generated_data = scaler.inverse_transform(generated_data)

# Save the generated data
generated_data_df = pd.DataFrame(generated_data, columns=data.columns)

Epoch 0, D Loss: 0.6392971277236938, G Loss: 0.7238576412200928


In [7]:
generated_data_df['Petting']=1
petting_data=generated_data_df

## GAN - Non_Petting

In [8]:
data = non_petting_d

# Normalize data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

# Generator model
def build_generator(latent_dim, output_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(output_dim, activation='tanh'))
    return model

# Discriminator model
def build_discriminator(input_dim):
    model = Sequential()
    model.add(Dense(256, input_dim=input_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(128))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

# GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = Input(shape=(latent_dim,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    gan = Model(gan_input, gan_output)
    gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')
    return gan

# Training parameters
epochs = 1000
batch_size = 32
latent_dim = 100
output_dim = normalized_data.shape[1]

# Build and compile models
generator = build_generator(latent_dim, output_dim)
discriminator = build_discriminator(output_dim)
gan = build_gan(generator, discriminator)

# Compile models
discriminator.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')
gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

# Training loop
for epoch in range(epochs):
    idx = np.random.randint(0, normalized_data.shape[0], batch_size)
    real_samples = normalized_data[idx]

    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    generated_samples = generator.predict(noise)

    real_labels = np.ones((batch_size, 1))
    fake_labels = np.zeros((batch_size, 1))

    d_loss_real = discriminator.train_on_batch(real_samples, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_samples, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    valid_labels = np.ones((batch_size, 1))

    g_loss = gan.train_on_batch(noise, valid_labels)

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, D Loss: {d_loss}, G Loss: {g_loss}")

# Generate synthetic samples
num_samples = 80  # Create 80 more Petting == 0 Data
noise = np.random.normal(0, 1, (num_samples, latent_dim))
generated_data = generator.predict(noise)

# Denormalize the generated data
generated_data = scaler.inverse_transform(generated_data)

# Save the generated data
generated_data_df = pd.DataFrame(generated_data, columns=data.columns)

Epoch 0, D Loss: 0.7355219125747681, G Loss: 0.6401695013046265


In [9]:
generated_data_df['Petting']=0
non_petting_data=generated_data_df

In [10]:
# Concatenate Petting and Non-petting data
merged_data=pd.concat([merged,petting_data,non_petting_data])

In [11]:
# Save to csv file
merged_data.to_csv('/content/gdrive/MyDrive/GAN/generated_data.csv', index=False)

# Random Forest ACC,AUC

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

In [14]:
data=merged_data

X = data.drop('Petting', axis=1)
y = data['Petting']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Calculate ACC
accuracy = accuracy_score(y_test, y_pred)
print(f"ACC: {accuracy:.4f}")

# Calculate AUC
y_prob = rf_classifier.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_prob)
print(f"AUC: {roc_auc:.4f}")

ACC: 0.9815
AUC: 0.9959
