In [1]:
from PIL import Image
from torch.utils.data import Dataset
import os
import random
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
import timm
import numpy as np
import torcheval


In [17]:
# Chemin vers le dossier principal contenant les données
data_folder_train = './project_data/train'
data_folder_validation = './project_data/val'
# Chemin vers le fichier texte de sortie
train_file = './train.txt'
val_file = "./val.txt"
test_file = "./test.txt"

def file_exist(path) :
    if os.path.exists(path):
        os.remove(path)
        print(f"Le fichier {path} a été supprimé avec succès.")
    else:
        print(f"Le fichier {path} n'existe pas.")

def get_labels_from_path(image_path, base_folder):
    # Obtenez le chemin relatif par rapport au dossier principal
    relative_path = os.path.relpath(image_path, start=base_folder)
    # Séparez le chemin relatif en éléments
    path_elements = os.path.dirname(relative_path).split(os.path.sep)
    label = path_elements[0]
    if label == "FakeManipulation-1" or label == "FakeManipulation-2" :
        return "0.5"
    elif label == "FakeManipulation-3" or label == "FakeManipulation-4" :
        return "0.75"
    elif label == "FakeManipulation-5" :
        return "1"
    elif label == "Real-1" or label == "Real-2" or label == "Real-3" or label == "Real-4" :
        return "0"
    else :
        print("error")



# Ouvrir le fichier en mode écriture
def create_txt(data_folder, train) :
    if (train) :
        output_txt_file = train_file
        txt_file = open(output_txt_file, 'a')
        # Parcourir les dossiers et sous-dossiers
        for root, dirs, files in os.walk(data_folder):
            for file in files:
                # Vérifier si le fichier est une image (vous pouvez ajuster les extensions selon votre cas)
                if file.endswith('.jpg') :
                    # Chemin complet du fichier
                    image_path = os.path.join(root, file)

                    # Obtenir les labels à partir du chemin du fichier ou de toute autre méthode appropriée
                    labels = get_labels_from_path(image_path, data_folder)

                    # Écrire dans le fichier texte
                    txt_file.write(f'{image_path} {" ".join(map(str, labels))}\n')
                else :
                    print("Error")
        txt_file.close()
    else :
        indice = 0
        output_txt_file_val = val_file
        output_txt_file_test = test_file
        txt_file_val = open(output_txt_file_val, 'a')
        txt_file_test = open(output_txt_file_test, 'a')

        for root, dirs, files in os.walk(data_folder):
            for file in files:
                indice += 1
                # Vérifier si le fichier est une image (vous pouvez ajuster les extensions selon votre cas)
                if file.endswith('.jpg') :
                    # Chemin complet du fichier
                    image_path = os.path.join(root, file)

                    # Obtenir les labels à partir du chemin du fichier ou de toute autre méthode appropriée
                    labels = get_labels_from_path(image_path, data_folder)
                    # Écrire dans le fichier texte
                    if indice % 2 == 0 :
                        txt_file_val.write(f'{image_path} {labels}\n')     
                    else :               
                        txt_file_test.write(f'{image_path} {labels}\n')     
                else :
                    print("Error")
        text_file_val.close()
        text_file_test.close()


file_exist(train_file)
file_exist(val_file)
file_exist(test_file)

create_txt(data_folder_train, 1)
create_txt(data_folder_validation, 0)

Le fichier ./train.txt a été supprimé avec succès.
Le fichier ./val.txt a été supprimé avec succès.
Le fichier ./test.txt a été supprimé avec succès.


In [3]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Définir les transformations
transform_augmented = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])



In [18]:
"""
Author: Honggu Liu
"""
class MyDataset(Dataset):
    def __init__(self, txt_path, transform=None, target_transform=None):
        fh = open(txt_path, 'r')
        imgs = []
        for line in fh:
            line = line.rstrip()
            words = line.split(sep=' ')
            print(words)
            imgs.append((words[0], float(words[1])))
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.classes = ['Real', 'FakeManipulation-1', 'FakeManipulation-2', 'FakeManipulation-3']
        


    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = Image.open(fn).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        return img, label
    

    def __len__(self):
        return len(self.imgs)

In [19]:


# Créer des ensembles de données
train_data = MyDataset(txt_path=train_file, transform=transform_augmented)
val_data = MyDataset(txt_path=val_file, transform=transform)
test_data = MyDataset(txt_path=test_file, transform=transform)

# Créer des chargeurs de données
batch_size = 32
train_loader = DataLoader(dataset=train_data, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(dataset=val_data, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(dataset=test_data, batch_size=32, shuffle=True, num_workers=4)

#Vérifier si la data augmentation fonctionne
print(len(train_data))
print(len(train_loader.dataset))

total_samples_after_augmentation = len(train_loader) * batch_size
print("Nombre total d'échantillons après augmentation :", total_samples_after_augmentation)





['./project_data/train/FakeManipulation-2/acd59eaa27acc007792336467741fee5/b6259f63c82293a4fc665de4b89c5261/frame00079.jpg', '0', '.', '5']
['./project_data/train/FakeManipulation-2/acd59eaa27acc007792336467741fee5/b6259f63c82293a4fc665de4b89c5261/frame00099.jpg', '0', '.', '5']
['./project_data/train/FakeManipulation-2/acd59eaa27acc007792336467741fee5/b6259f63c82293a4fc665de4b89c5261/frame00010.jpg', '0', '.', '5']
['./project_data/train/FakeManipulation-2/acd59eaa27acc007792336467741fee5/b6259f63c82293a4fc665de4b89c5261/frame00101.jpg', '0', '.', '5']
['./project_data/train/FakeManipulation-2/acd59eaa27acc007792336467741fee5/b6259f63c82293a4fc665de4b89c5261/frame00115.jpg', '0', '.', '5']
['./project_data/train/FakeManipulation-2/acd59eaa27acc007792336467741fee5/b6259f63c82293a4fc665de4b89c5261/frame00062.jpg', '0', '.', '5']
['./project_data/train/FakeManipulation-2/acd59eaa27acc007792336467741fee5/b6259f63c82293a4fc665de4b89c5261/frame00055.jpg', '0', '.', '5']
['./project_data/tra

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Obtenez quelques échantillons du DataLoader
sample_images, _ = next(iter(train_loader))

# Définir la fonction d'affichage
def show_images(images, title):
    num_images = images.size(0)
    fig, axs = plt.subplots(1, num_images, figsize=(12, 4))

    for i in range(num_images):
        axs[i].imshow(np.transpose(images[i].numpy(), (1, 2, 0)))
        axs[i].axis('off')
        axs[i].set_title(title)

    plt.show()

# Afficher quelques images avant et après l'augmentation
show_images(sample_images, title='Original Images')
show_images(transform_augmented(sample_images), title='Augmented Images')


In [None]:

def is_correct(pred, label):
    if label == 0 and pred == 0:
        return True
    elif (label == 0 and pred != 0) :                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
        return False
    elif (label != 0 and pred == 0) :
        return False
    else :
        return True


# Charger EfficientNet B3
model = timm.create_model('efficientnet_b3', pretrained=True)


# Modifier la couche de sortie pour correspondre au nombre de classes de votre tâche
num_classes = len(train_data.classes)
model.classifier = nn.Linear(model.classifier.in_features, num_classes)

# Définir le périphérique
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Définir le critère de perte et l'optimiseur
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



# Entraînement du modèle (à adapter en fonction de vos besoins)
num_epochs = 10
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward et optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()    

        if (i+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')


# Enregistrer le modèle
torch.save(model.state_dict(), 'model.ckpt')

# Charger le modèle
model.load_state_dict(torch.load('model.ckpt'))
model.eval()

# Test le model
with torch.no_grad():
    correct = 0
    total = 0
    missed = 0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        if is_correct(predicted, labels) :
            correct += 1
        else :
            missed += 1
        



print(f'Accuracy of the model on the test images: {correct / total}%')
print(f'Recall of the model on the test images: {correct / (correct + missed)}%')
print(f'Precision of the model on the test images: {correct / (correct + (total - correct))}%')



In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.optim import lr_scheduler
import argparse
import os
import cv2

from network.models import model_selection
from network.mesonet import Meso4, MesoInception4
from dataset.transform import xception_default_data_transforms
from dataset.mydataset import MyDataset
def main():
	args = parse.parse_args()
	name = args.name
	continue_train = args.continue_train
	train_list = args.train_list
	val_list = args.val_list
	epoches = args.epoches
	batch_size = args.batch_size
	model_name = args.model_name
	model_path = args.model_path
	output_path = os.path.join('./output', name)
	if not os.path.exists(output_path):
		os.mkdir(output_path)
	torch.backends.cudnn.benchmark=True
	train_dataset = MyDataset(txt_path=train_list, transform=xception_default_data_transforms['train'])
	val_dataset = MyDataset(txt_path=val_list, transform=xception_default_data_transforms['val'])
	train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=8)
	val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=8)
	train_dataset_size = len(train_dataset)
	val_dataset_size = len(val_dataset)
	model = model_selection(modelname='xception', num_out_classes=2, dropout=0.5)
	if continue_train:
		model.load_state_dict(torch.load(model_path))
	model = model.cuda()
	criterion = nn.CrossEntropyLoss()
	optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08)
	scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
	model = nn.DataParallel(model)
	best_model_wts = model.state_dict()
	best_acc = 0.0
	iteration = 0
	for epoch in range(epoches):
		print('Epoch {}/{}'.format(epoch+1, epoches))
		print('-'*10)
		model.train()
		train_loss = 0.0
		train_corrects = 0.0
		val_loss = 0.0
		val_corrects = 0.0
		for (image, labels) in train_loader:
			iter_loss = 0.0
			iter_corrects = 0.0
			image = image.cuda()
			labels = labels.cuda()
			optimizer.zero_grad()
			outputs = model(image)
			_, preds = torch.max(outputs.data, 1)
			loss = criterion(outputs, labels)
			loss.backward()
			optimizer.step()
			iter_loss = loss.data.item()
			train_loss += iter_loss
			iter_corrects = torch.sum(preds == labels.data).to(torch.float32)
			train_corrects += iter_corrects
			iteration += 1
			if not (iteration % 20):
				print('iteration {} train loss: {:.4f} Acc: {:.4f}'.format(iteration, iter_loss / batch_size, iter_corrects / batch_size))
		epoch_loss = train_loss / train_dataset_size
		epoch_acc = train_corrects / train_dataset_size
		print('epoch train loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

		model.eval()
		with torch.no_grad():
			for (image, labels) in val_loader:
				image = image.cuda()
				labels = labels.cuda()
				outputs = model(image)
				_, preds = torch.max(outputs.data, 1)
				loss = criterion(outputs, labels)
				val_loss += loss.data.item()
				val_corrects += torch.sum(preds == labels.data).to(torch.float32)
			epoch_loss = val_loss / val_dataset_size
			epoch_acc = val_corrects / val_dataset_size
			print('epoch val loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
			if epoch_acc > best_acc:
				best_acc = epoch_acc
				best_model_wts = model.state_dict()
		scheduler.step()
		#if not (epoch % 40):
		torch.save(model.module.state_dict(), os.path.join(output_path, str(epoch) + '_' + model_name))
	print('Best val Acc: {:.4f}'.format(best_acc))
	model.load_state_dict(best_model_wts)
	torch.save(model.module.state_dict(), os.path.join(output_path, "best.pkl"))




if __name__ == '__main__':
	parse = argparse.ArgumentParser(
		formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parse.add_argument('--name', '-n', type=str, default='fs_xception_c0_299')
	parse.add_argument('--train_list', '-tl' , type=str, default = './data_list/FaceSwap_c0_train.txt')
	parse.add_argument('--val_list', '-vl' , type=str, default = './data_list/FaceSwap_c0_val.txt')
	parse.add_argument('--batch_size', '-bz', type=int, default=64)
	parse.add_argument('--epoches', '-e', type=int, default='20')
	parse.add_argument('--model_name', '-mn', type=str, default='fs_c0_299.pkl')
	parse.add_argument('--continue_train', type=bool, default=False)
	parse.add_argument('--model_path', '-mp', type=str, default='./output/df_xception_c0_299/1_df_c0_299.pkl')
	main()

In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.optim import lr_scheduler
import argparse
import os
import cv2
from network.models import model_selection
from dataset.transform import xception_default_data_transforms
from dataset.mydataset import MyDataset
def main():
	args = parse.parse_args()
	test_list = args.test_list
	batch_size = args.batch_size
	model_path = args.model_path
	torch.backends.cudnn.benchmark=True
	test_dataset = MyDataset(txt_path=test_list, transform=xception_default_data_transforms['test'])
	test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=8)
	test_dataset_size = len(test_dataset)
	corrects = 0
	acc = 0
	#model = torchvision.models.densenet121(num_classes=2)
	model = model_selection(modelname='xception', num_out_classes=2, dropout=0.5)
	model.load_state_dict(torch.load(model_path))
	if isinstance(model, torch.nn.DataParallel):
		model = model.module
	model = model.cuda()
	model.eval()
	with torch.no_grad():
		for (image, labels) in test_loader:
			image = image.cuda()
			labels = labels.cuda()
			outputs = model(image)
			_, preds = torch.max(outputs.data, 1)
			corrects += torch.sum(preds == labels.data).to(torch.float32)
			print('Iteration Acc {:.4f}'.format(torch.sum(preds == labels.data).to(torch.float32)/batch_size))
		acc = corrects / test_dataset_size
		print('Test Acc: {:.4f}'.format(acc))



if __name__ == '__main__':
	parse = argparse.ArgumentParser(
		formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parse.add_argument('--batch_size', '-bz', type=int, default=32)
	parse.add_argument('--test_list', '-tl', type=str, default='./data_list/Deepfakes_c0_test.txt')
	parse.add_argument('--model_path', '-mp', type=str, default='./pretrained_model/df_c0_best.pkl')
	main()
	print('Hello world!!!')

In [None]:
def test(model, data_path):
    Accuracy, Recall, Precision, AUC = 0, 0, 0
    """
    You need to finish this function.
    """
    return Accuracy, Recall, Precision, AUC