# **Identificación de Imágenes Auténticas y Sintéticas : Abordando los Desafíos de las Imágenes Sintéticas en la Sociedad Actual**

**Contenido**
1. [Configuración del Entorno](#title1)
2. [Construcción del Conjunto de Datos](#title2)
3. [Creación del Modelo](#title3)

## Configuración del Entorno<a name="title1"></a>

In [1]:
import tensorflow as tf
print(tf.config.experimental.list_physical_devices("GPU"))
print(tf.__version__)
# Para limitar la memoria que usa la GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Configurar TensorFlow para que solo utilice la GPU si es necesario
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Configuración de GPU completada")
    except RuntimeError as e:
        print(e)
# Verifica que la GPU está disponible
print("¿GPU está disponible?:", tf.config.list_physical_devices('GPU'))
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
import gc
import tensorflow as tf
from keras import backend as K

# Limpiar la memoria del backend
K.clear_session()
# Forzar la recolección de basura
gc.collect()

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
2.10.0
Configuración de GPU completada
¿GPU está disponible?: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


0

In [2]:
# Limpiar la memoria del backend
K.clear_session()
# Forzar la recolección de basura
gc.collect()

105

In [2]:
import numpy as np
import pandas as pd 
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import shutil
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.utils import class_weight
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight


## Construcción del Conjunto de Datos y Preprocesandolos<a name="title2"></a>

Fuente: https://www.kaggle.com/datasets/kaustubhdhote/human-faces-dataset & https://www.kaggle.com/datasets/hamzaboulahia/hardfakevsrealfaces/code

In [3]:


dataset_path = './Human Faces Dataset'
real_images_path = os.path.join(dataset_path, 'Real Images')
ai_generated_images_path = os.path.join(dataset_path, 'AI-Generated Images')

output_path = './kaggle/working/'
train_path = os.path.join(output_path, 'train')
val_path = os.path.join(output_path, 'val')
test_path = os.path.join(output_path, 'test')

for path in [train_path, val_path, test_path]:
    os.makedirs(os.path.join(path, 'Real Images'), exist_ok=True)
    os.makedirs(os.path.join(path, 'AI-Generated Images'), exist_ok=True)

train_split = 0.7
val_split = 0.15
test_split = 0.15

def split_and_copy_images(source_dir, dest_dirs, split_ratios):
    images = os.listdir(source_dir)
    random.shuffle(images)
    
    train_size = int(len(images) * split_ratios[0])
    val_size = int(len(images) * split_ratios[1])
    
    for i, img in enumerate(images):
        if i < train_size:
            dest_dir = dest_dirs[0]
        elif i < train_size + val_size:
            dest_dir = dest_dirs[1]
        else:
            dest_dir = dest_dirs[2]
            
        shutil.copy(os.path.join(source_dir, img), os.path.join(dest_dir, img))

split_and_copy_images(real_images_path, [os.path.join(train_path, 'Real Images'), os.path.join(val_path, 'Real Images'), os.path.join(test_path, 'Real Images')], [train_split, val_split, test_split])
split_and_copy_images(ai_generated_images_path, [os.path.join(train_path, 'AI-Generated Images'), os.path.join(val_path, 'AI-Generated Images'), os.path.join(test_path, 'AI-Generated Images')], [train_split, val_split, test_split])

In [4]:


img_height, img_width = 224, 224
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

Found 6741 images belonging to 2 classes.
Found 1444 images belonging to 2 classes.
Found 1445 images belonging to 2 classes.


# Modelo<a name="title3"></a>



In [5]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers[-10:]: 
    layer.trainable = True

model.compile(optimizer=Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              

In [None]:
# Obtener las clases verdaderas del conjunto de entrenamiento
true_classes = train_generator.classes

# Calcular los pesos de las clases
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(true_classes),
    y=true_classes
)
class_weights = dict(enumerate(class_weights))

# Mostrar los pesos de las clases
print("Pesos de las clases:", class_weights)

# Entrenar el modelo
history = model.fit(
    train_generator,
    epochs=15,
    validation_data=val_generator,
    class_weight=class_weights
)

Pesos de las clases: {0: 1.0399568034557236, 1: 0.963}
Epoch 1/15
Epoch 2/15

In [None]:
# Evaluar el modelo en el conjunto de prueba
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

# Generar predicciones en el conjunto de prueba
test_generator.reset()  # Reiniciar el generador
predictions = model.predict(test_generator)
predicted_classes = (predictions > 0.5).astype(int).reshape(-1)

# Obtener las etiquetas verdaderas y los nombres de las clases
true_classes_test = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Generar el informe de clasificación
report = classification_report(true_classes_test, predicted_classes, target_names=class_labels)
print("Classification Report:")
print(report)

# Generar la matriz de confusión
cm = confusion_matrix(true_classes_test, predicted_classes)
print("Confusion Matrix")
print(cm)

# Visualizar la matriz de confusión
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_labels, yticklabels=class_labels, cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Matriz de Confusión')
plt.show()

# Graficar el historial de entrenamiento
plt.figure(figsize=(14,5))

# Precisión
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Entrenamiento')
plt.plot(history.history['val_accuracy'], label='Validación')
plt.title('Precisión del Modelo')
plt.ylabel('Precisión')
plt.xlabel('Época')
plt.legend(loc='upper left')

# Pérdida
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Entrenamiento')
plt.plot(history.history['val_loss'], label='Validación')
plt.title('Pérdida del Modelo')
plt.ylabel('Pérdida')
plt.xlabel('Época')
plt.legend(loc='upper left')

plt.show()