In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import tensorflow as tf
from keras.layers import Input, Dense, Lambda
from keras.models import Model

# Load data
df = pd.read_csv("dataset.csv")
X = df.drop(columns=["Unnamed: 0", "label"])
y = df["label"]

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define sampling function using TensorFlow ops
def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=tf.shape(z_mean))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# VAE architecture
input_dim = X_scaled.shape[1]
inputs = Input(shape=(input_dim,))
h = Dense(64, activation='relu')(inputs)
z_mean = Dense(2)(h)
z_log_var = Dense(2)(h)
z = Lambda(sampling)([z_mean, z_log_var])

decoder_h = Dense(64, activation='relu')(z)
outputs = Dense(input_dim)(decoder_h)

vae = Model(inputs, outputs)
vae.compile(optimizer='adam', loss='mse')

# Train VAE
vae.fit(X_scaled, X_scaled, epochs=50, batch_size=128)

# Generate synthetic data
X_synthetic = vae.predict(X_scaled)

# Combine real and synthetic data
X_balanced = np.concatenate([X_scaled, X_synthetic])
y_balanced = np.concatenate([y, y])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)

# Train classifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Evaluate
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))



Epoch 1/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.5463
Epoch 2/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.2308
Epoch 3/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.1859
Epoch 4/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.1947
Epoch 5/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1724
Epoch 6/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1430
Epoch 7/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.1312
Epoch 8/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 0.1875
Epoch 9/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 0.1358
Epoch 10/50
[1m3341/3341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1