In [None]:
!pip install opencv-python

In [None]:
!pip install face-recognition

In [None]:
!pip install unrar
!unrar x "/content/videos.rar"

In [None]:
import cv2
import face_recognition
import os

def extract_unique_faces(video_path, output_dir):
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Load the video
    video_capture = cv2.VideoCapture(video_path)
    known_face_encodings = []
    frame_count = 0
    face_count = 0

    while True:
        # Read a frame from the video
        ret, frame = video_capture.read()
        if not ret:
            break

        frame_count += 1
        # Process every 10th frame to reduce computational load
        if frame_count % 10 != 0:
            continue

        # Convert the frame from BGR (OpenCV format) to RGB (face_recognition format)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Find all face locations and encodings in the frame
        face_locations = face_recognition.face_locations(rgb_frame)
        face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

        for face_encoding, face_location in zip(face_encodings, face_locations):
            # Check if this face is already known
            matches = face_recognition.compare_faces(known_face_encodings, face_encoding, tolerance=0.6)

            if not any(matches):
                # Save the new face encoding
                known_face_encodings.append(face_encoding)
                face_count += 1

                # Extract the face region
                top, right, bottom, left = face_location
                face_image = frame[top:bottom, left:right]

                # Save the face image as a .jpg file
                face_file_name = os.path.join(output_dir, f"face_{face_count}.jpg")
                cv2.imwrite(face_file_name, face_image)
                print(f"Saved: {face_file_name}")

    # Release the video capture object
    video_capture.release()
    print(f"Face extraction completed for {video_path}.")

def process_all_videos(input_folder, output_folder, video_count):
    for i in range(1, video_count + 1):
        video_path = os.path.join(input_folder, f"{i}.mp4")  # Assuming videos are named 1.mp4, 2.mp4, etc.
        video_output_dir = os.path.join(output_folder, str(i))  # Subfolder for each video
        print(f"Processing video {video_path}...")
        extract_unique_faces(video_path, video_output_dir)

# Example usage
input_folder = "/content/videos"      # Folder containing the videos named 1.mp4, 2.mp4, ...
output_folder = "output_faces"     # Base folder for output face captures
video_count = 259                   # Total number of videos

process_all_videos(input_folder, output_folder, video_count)


In [None]:

import shutil
import os

def zip_folder(folder_path, output_filename):
    """Zips a folder and its contents."""
    try:
        shutil.make_archive(output_filename, 'zip', folder_path)
        print(f"Successfully zipped '{folder_path}' to '{output_filename}.zip'")
    except FileNotFoundError:
        print(f"Error: Folder '{folder_path}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")


# Example usage (assuming 'output_faces' is the folder to zip)
zip_folder("output_faces", "output_faces_zipped")

In [None]:
import face_recognition
import os

def deduplicate_faces(base_dir):
    """
    Deduplicates faces across multiple video subfolders.

    Args:
        base_dir (str): Directory containing subfolders with face images.

    Returns:
        dict: Mapping of Influencer ID to associated face image paths.
    """
    known_encodings = []
    influencer_mapping = {}

    influencer_counter = 0

    for subfolder in sorted(os.listdir(base_dir), key=lambda x: int(x)):
        subfolder_path = os.path.join(base_dir, subfolder)
        if not os.path.isdir(subfolder_path):
            continue

        for image_file in os.listdir(subfolder_path):
            image_path = os.path.join(subfolder_path, image_file)

            # Load face image and compute encoding
            image = face_recognition.load_image_file(image_path)
            encodings = face_recognition.face_encodings(image)

            if len(encodings) > 0:
                encoding = encodings[0]

                # Check if the face matches any known encoding
                matches = face_recognition.compare_faces(known_encodings, encoding, tolerance=0.6)
                if True in matches:
                    # Get the existing influencer ID
                    influencer_id = matches.index(True)
                else:
                    # Assign a new Influencer ID
                    influencer_id = influencer_counter
                    known_encodings.append(encoding)
                    influencer_counter += 1

                # Map influencer to their image
                if influencer_id not in influencer_mapping:
                    influencer_mapping[influencer_id] = []
                influencer_mapping[influencer_id].append(image_path)

    return influencer_mapping


In [None]:
def map_influencers_to_videos(influencer_mapping):
    """
    Maps influencers to their corresponding videos based on the face images.

    Args:
        influencer_mapping (dict): Mapping of Influencer ID to face image paths.

    Returns:
        dict: Mapping of Influencer ID to video IDs.
    """
    influencer_to_videos = {}
    for influencer_id, image_paths in influencer_mapping.items():
        video_ids = {os.path.basename(os.path.dirname(path)) for path in image_paths}
        influencer_to_videos[influencer_id] = list(video_ids)
    return influencer_to_videos


In [None]:
import pandas as pd

def calculate_average_performance(influencer_to_videos, performance_data):
    """
    Calculates the average performance for each influencer.

    Args:
        influencer_to_videos (dict): Mapping of Influencer ID to video IDs.
        performance_data (pd.DataFrame): DataFrame containing Video IDs and performance.

    Returns:
        pd.DataFrame: DataFrame with Influencer ID and their average performance.
    """
    results = []
    for influencer_id, video_ids in influencer_to_videos.items():
        # Filter performance data for videos associated with this influencer
        video_performances = performance_data[
            performance_data['Video ID'].isin(video_ids)
        ]['Performance']

        # Calculate average performance
        avg_performance = video_performances.mean()
        results.append({'Influencer ID': influencer_id, 'Average Performance': avg_performance})

    return pd.DataFrame(results)


In [None]:
from PIL import Image

def save_influencer_faces_with_performance(influencer_mapping, performance_summary, output_dir):
    """
    Save influencer faces with their average performance in the output directory.

    Args:
        influencer_mapping (dict): Mapping of Influencer ID to face image paths.
        performance_summary (pd.DataFrame): DataFrame with Influencer ID and average performance.
        output_dir (str): Directory to save the results.
    """
    os.makedirs(output_dir, exist_ok=True)

    for _, row in performance_summary.iterrows():
        influencer_id = row['Influencer ID']
        avg_performance = row['Average Performance']

        # Use the first face image of the influencer
        image_path = influencer_mapping[influencer_id][0]
        img = Image.open(image_path)

        # Save with average performance in filename
        output_path = os.path.join(output_dir, f"{influencer_id}_avg_{avg_performance:.2f}.jpg")
        img.save(output_path)
        print(f"Saved: {output_path}")
