In [None]:
%pip install -U --upgrade tensorflow

In [None]:

import sys
import sklearn
import tensorflow as tf
import cv2
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from plotly.offline import iplot
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt


In [None]:
plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [None]:

import os

def get_data():
    return pd.read_csv('../input/deepfake-faces/metadata.csv')
meta=get_data()
meta.head()
meta.shape
len(meta[meta.label=='FAKE']),len(meta[meta.label=='REAL'])
real_df = meta[meta["label"] == "REAL"]
fake_df = meta[meta["label"] == "FAKE"]
sample_size = 8000

real_df = real_df.sample(sample_size, random_state=42)
fake_df = fake_df.sample(sample_size, random_state=42)

sample_meta = pd.concat([real_df, fake_df])


In [None]:


from sklearn.model_selection import train_test_split

Train_set, Test_set = train_test_split(sample_meta,test_size=0.2,random_state=42,stratify=sample_meta['label'])
Train_set, Val_set  = train_test_split(Train_set,test_size=0.3,random_state=42,stratify=Train_set['label'])
Train_set.shape,Val_set.shape,Test_set.shape
((8960, 5), (3840, 5), (3200, 5))
trace0 = go.Bar(
    x=['Train Set', 'Validation Set', 'Test Set'],
    y=y[0],
    name='REAL',
    marker=dict(color='#33cc33'),
    opacity=0.7,
    text=y[0],  # Adding hover text
    hoverinfo='text+y'
)

trace1 = go.Bar(
    x=['Train Set', 'Validation Set', 'Test Set'],
    y=y[1],
    name='FAKE',
    marker=dict(color='#ff3300'),
    opacity=0.7,
    text=y[1],  # Adding hover text
    hoverinfo='text+y'
)

data = [trace0, trace1]
layout = go.Layout(
    title='Count of classes in each set',
    xaxis={'title': 'Set'},
    yaxis={'title': 'Count'},
    barmode='group'  # Optional: specify the bar mode
)


In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(15,15))
for cur,i in enumerate(Train_set.index[25:50]):
    plt.subplot(5,5,cur+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    
    plt.imshow(cv2.imread('../input/deepfake-faces/faces_224/'+Train_set.loc[i,'videoname'][:-4]+'.jpg'))
    
    if(Train_set.loc[i,'label']=='FAKE'):
        plt.xlabel('FAKE Image')
    else:
        plt.xlabel('REAL Image')
        
plt.show()


In [None]:

def retreive_dataset(set_name):
    images,labels=[],[]
    for (img, imclass) in zip(set_name['videoname'], set_name['label']):
        images.append(cv2.imread('../input/deepfake-faces/faces_224/'+img[:-4]+'.jpg'))
        if(imclass=='FAKE'):
            labels.append(1)
        else:
            labels.append(0)
    
    return np.array(images),np.array(labels)

X_train,y_train=retreive_dataset(Train_set)
X_val,y_val=retreive_dataset(Val_set)
X_test,y_test=retreive_dataset(Test_set)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose
from tensorflow.keras.models import Model

def build_generator(input_shape=(224, 224, 3), output_shape=(224, 224, 3), latent_dim=100):
    input_image = Input(shape=input_shape, name='input_image')
    
    # Encoder
    x = Conv2D(64, kernel_size=4, strides=2, padding='same')(input_image)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    
    # Flatten for latent representation
    x = Flatten()(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    
    # Sampler
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = tf.random.normal(shape=tf.shape(z_mean))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

    z = tf.keras.layers.Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
    
    # Decoder
    x = Dense(56*56*128)(z)
    x = Reshape((56, 56, 128))(x)
    x = Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='sigmoid')(x)

    return Model(input_image, x)

def build_discriminator(input_shape=(224, 224, 3)):
    input_image = Input(shape=input_shape, name='input_image')
    
    x = Conv2D(64, kernel_size=4, strides=2, padding='same')(input_image)
    x = LeakyReLU(alpha=0.2)(x)
    x = Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Flatten()(x)
    validity = Dense(1, activation='sigmoid')(x)

    return Model(input_image, validity)

def build_bicyclegan(generator, discriminator):
    input_image = Input(shape=(224, 224, 3), name='input_image')
    
    fake_image = generator(input_image)
    validity_real = discriminator(input_image)
    validity_fake = discriminator(fake_image)
    
    return Model(input_image, [validity_real, validity_fake, fake_image])
generator = build_generator()
discriminator = build_discriminator()
bicycle_gan = build_bicyclegan(generator, discriminator)
bicycle_gan.compile(optimizer='adam',
                    loss=['binary_crossentropy', 'binary_crossentropy', 'mse'],
                    loss_weights=[1, 1, 10],  # Adjust weights for different losses
                    metrics=['accuracy'])
bicycle_gan.summary()

In [None]:
import numpy as np

# Define optimizer
optimizer = tf.keras.optimizers.Adam()

# Define loss functions
binary_crossentropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
mse_loss = tf.keras.losses.MeanSquaredError()

# Define metrics
accuracy = tf.keras.metrics.BinaryAccuracy()

# Define number of epochs and batch size
epochs = 10
batch_size = 32

# Training loop
# Training loop
for epoch in range(epochs):
    for batch_start in range(0, len(X_train), batch_size):
        batch_end = min(batch_start + batch_size, len(X_train))
        real_images_batch = X_train[batch_start:batch_end]
        
        with tf.GradientTape() as disc_tape, tf.GradientTape() as gen_tape:
            # Generate fake images
            generated_images = generator(real_images_batch, training=True)
            
            # Generate fake labels
            fake_labels_batch = np.zeros((len(real_images_batch), 1))  # Assuming all generated images are fake
            
            # Discriminator loss
            real_loss = binary_crossentropy(y_train[batch_start:batch_end, np.newaxis], discriminator(real_images_batch, training=True))
            fake_loss = binary_crossentropy(fake_labels_batch, discriminator(generated_images, training=True))
            total_discriminator_loss = real_loss + fake_loss
            
            # Generator loss
            generator_loss = binary_crossentropy(y_train[batch_start:batch_end, np.newaxis], discriminator(generated_images, training=True))
            reconstruction_loss = mse_loss(real_images_batch, generated_images)
            total_generator_loss = generator_loss + 10 * reconstruction_loss  # Adjust weights as needed
            
        # Calculate gradients
        discriminator_gradients = disc_tape.gradient(total_discriminator_loss, discriminator.trainable_variables)
        generator_gradients = gen_tape.gradient(total_generator_loss, generator.trainable_variables)
        
        # Apply gradients
        optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))
        optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables))
        
        # Update metrics
        accuracy.update_state(y_train[batch_start:batch_end], discriminator(real_images_batch, training=True))
        
    # Print progress
    print(f'Epoch {epoch+1}/{epochs}, Discriminator Loss: {total_discriminator_loss}, '
          f'Generator Loss: {total_generator_loss}, Accuracy: {accuracy.result()}')

    # Reset metrics at the end of each epoch
    accuracy.reset_states()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(range(1, epochs+1), discriminator_losses, label='Discriminator Loss')
plt.plot(range(1, epochs+1), generator_losses, label='Generator Loss')
plt.plot(range(1, epochs+1), accuracies, label='Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Loss/Accuracy')
plt.title('Training Results')
plt.legend()
plt.grid(True)
plt.show()