# Photo Organiser

This file is made as a server to run without any interuption and found all the faces from the folder and make a group of them


In [None]:
# With pip:
!pip install facenet-pytorch



In [None]:
import os
from PIL import Image
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt
import numpy as np
# Initialize the MTCNN module
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.5, 0.6, 0.6], factor=0.709, device=device)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [None]:
def save_filenames_to_txt(folder_path, output_file):
    with open(output_file, 'w') as f:
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                f.write(file_path + '\n')
    print(f"All filenames saved to {output_file}")




In [None]:
def get_embedding(image_path):
    img = Image.open(image_path).convert('RGB')
    img_cropped_list, probs = mtcnn(img, return_prob=True)

    embeddings = []
    face_images = []

    if img_cropped_list is not None:
        if isinstance(probs, float):
            probs = [probs]

        for img_cropped, prob in zip(img_cropped_list, probs):
            if prob > 0.9:
                if img_cropped.ndimension() == 2:  # Handle 2D tensor case
                    img_cropped = img_cropped.unsqueeze(0).repeat(3, 1, 1)

                if img_cropped.ndimension() == 3 and img_cropped.shape[0] == 3:  # [C, H, W]
                    img_cropped_np = (
                        img_cropped.permute(1, 2, 0)  # [H, W, C]
                        .mul(255)
                        .byte()
                        .cpu()
                        .numpy()
                    )
                    img_cropped_pil = Image.fromarray(img_cropped_np)
                    face_images.append(img_cropped_pil)

                    img_cropped = img_cropped.unsqueeze(0).to(device)
                    embedding = resnet(img_cropped).detach().cpu()
                    embeddings.append(embedding)
                else:
                    print(f"Unexpected tensor shape: {img_cropped.shape}")
    else:
        print(f"No faces detected or low probability for image: {image_path}")

    return embeddings, face_images


In [None]:
def loading_existing_embedding(face_folder):
    print("Loading existing embeddings...")
    existing_embeddings = {}
    for filename in os.listdir(face_folder):
        if filename.endswith('.pt'):
            embedding_path = os.path.join(face_folder, filename)
            embedding = torch.load(embedding_path)
            key = os.path.splitext(filename)[0]
            existing_embeddings[key] = embedding
    return existing_embeddings


def is_new_face(face_folder,new_embedding, threshold=0.6):

    existing_embeddings=loading_existing_embedding(face_folder)
    for key, existing_embedding in existing_embeddings.items():
        distance = torch.norm(new_embedding - existing_embedding).item()
        # print(f'Comparing distance with {key}: {distance:.4f}')
        if distance < threshold:
            print(f'Distance with {key}: {distance:.4f}')
            return False, key  # Return False and the most similar key immediately

    return True, len(existing_embeddings)  # Return True and the current count if new





In [None]:
def process_embeddings(file_list_path,face_folder):

    with open(file_list_path, 'r') as f:
        done_scan = []
        if os.path.exists("done_scan"):
            with open("done_scan", 'r') as f1:
                done_scan = [line.strip() for line in f1.readlines()]
        filenames = [line.strip() for line in f.readlines()]
        if not done_scan:
            filenames = filenames
        else:
            filenames = [filename for filename in filenames if filename not in done_scan]

    for image_path in filenames:
        if os.path.exists(image_path):
            pass
        else:
            print(f"File not found: {image_path}")
            continue
            print(f"File not found: {image_path}")
        print(f"Processing: {image_path}")
        embeddings,faces = get_embedding(image_path)
        if embeddings is not []:
            for face_and_embedding in zip(embeddings,faces):
                print("hello")

                face_embedding=face_and_embedding[0]
                img_cropped_pil=face_and_embedding[1]

                condition , lenth_existing_embedding_or_key_of_existing_fave = is_new_face(face_folder,face_embedding)
                if condition:
                    print("New face detected, saving...")

                    # Save the cropped face image
                    face_path = os.path.join(face_folder, f'face_{lenth_existing_embedding_or_key_of_existing_fave + 1}.jpg')
                    img_cropped_pil.save(face_path)
                    print(f'New face detected and saved as: {face_path}')

                    # Save the embedding
                    embedding_path = os.path.join(face_folder, f'face_{lenth_existing_embedding_or_key_of_existing_fave + 1}.pt')
                    torch.save(face_embedding, embedding_path)
                    print(f'Embedding saved as: {embedding_path}')

                    # Create a new file to track occurrences of this face
                    tracking_file = os.path.join(face_folder, f'face_{lenth_existing_embedding_or_key_of_existing_fave + 1}.txt')
                    with open(tracking_file, 'w') as f:
                        f.write(image_path + '\n')  # Add the current image path
                    print(f'Tracking file created for face: {tracking_file}')

                else:
                    # Append the current image path to the file of the most similar face
                    tracking_file = os.path.join(face_folder, f'{lenth_existing_embedding_or_key_of_existing_fave}.txt')
                    with open(tracking_file, 'a') as f:
                        f.write(image_path + '\n')
                    print(f'Appended image path to tracking file: {tracking_file}')

                # You can add any logic here for further processing
        with open("done_scan", 'a') as f:
            f.write(image_path + '\n')



In [None]:
face_folder = './faces'
os.makedirs(face_folder, exist_ok=True)
folder_path = '/content/sample_data/photos'
output_file = 'folder1file.txt'
save_filenames_to_txt(folder_path, output_file)

file_list_path = output_file
process_embeddings(file_list_path,face_folder)

All filenames saved to folder1file.txt
Processing: /content/sample_data/photos/luc-van-loon-aiaNuzedKkE-unsplash.jpg
hello
Loading existing embeddings...
Distance with face_1: 0.0000
Appended image path to tracking file: ./faces/face_1.txt
Processing: /content/sample_data/photos/audrey-m-jackson-cAFpd2vqnPE-unsplash.jpg
hello
Loading existing embeddings...
Distance with face_2: 0.0000
Appended image path to tracking file: ./faces/face_2.txt
Processing: /content/sample_data/photos/omid-armin-yZwrmzKGKZA-unsplash.jpg
hello
Loading existing embeddings...
Distance with face_3: 0.0000
Appended image path to tracking file: ./faces/face_3.txt
Processing: /content/sample_data/photos/alexis-chloe-TYDkKEgc0Fg-unsplash.jpg
hello
Loading existing embeddings...
Distance with face_4: 0.0000
Appended image path to tracking file: ./faces/face_4.txt
Processing: /content/sample_data/photos/zulmaury-saavedra-Z9GKfYmWOAM-unsplash.jpg
hello
Loading existing embeddings...
Distance with face_5: 0.0000
Append