## Downloading Videos from sheet to my google drive

In [None]:
import os
import pandas as pd
import requests

Step 1: Load Excel file
excel_path = "/content/Assignment Data.xlsx"  # Replace with your file path
data = pd.read_excel(excel_path)

# Step 2: Create a directory to store the videos
dataset_folder = "/content/drive/MyDrive/assi_Data_video/video_dataset"
os.makedirs(dataset_folder, exist_ok=True)

# Step 3: Prepare a list for metadata
metadata = []

# Step 4: Loop through the data and download videos
for index, row in data.iterrows():
    score = row['Performance']  # Adjust column name to match Excel
    video_url = row['Video URL']  # Adjust column name to match Excel

    try:
        # Download video using requests
        video_filename = f"{index}_video.mp4"
        video_path = os.path.join(dataset_folder, video_filename)

        # Stream and save video
        with requests.get(video_url, stream=True) as response:
            response.raise_for_status()  # Raise an error for bad status codes
            with open(video_path, "wb") as video_file:
                for chunk in response.iter_content(chunk_size=8192):  # Download in chunks
                    video_file.write(chunk)

        # Append to metadata
        metadata.append({"video_path": video_path, "score": score})
        print(f"Downloaded: {video_filename}")

    except Exception as e:
        print(f"Error downloading video from {video_url}: {e}")

# Step 5: Save metadata to a CSV file
metadata_file = os.path.join(dataset_folder, "metadata.csv")
pd.DataFrame(metadata).to_csv(metadata_file, index=False)
print(f"Metadata saved at: {metadata_file}")


## Creating a metadata CSV file containing video names and scores

In [None]:
import pandas as pd

# Load the existing metadata file
metadata_path = "/content/drive/MyDrive/assi_Data_video/video_dataset/metadata.csv"
metadata = pd.read_csv(metadata_path)

# Extract only the file names from the full paths
metadata['video_name'] = metadata['video_path'].apply(lambda x: os.path.basename(x))

# Remove the old 'video_path' column and reorder columns
metadata = metadata[['video_name', 'score']]

# Save the updated metadata
updated_metadata_path = "/content/drive/MyDrive/assi_Data_video/video_dataset/updated_metadata.csv"
metadata.to_csv(updated_metadata_path, index=False)
print(f"Updated metadata saved at: {updated_metadata_path}")


Updated metadata saved at: /content/drive/MyDrive/assi_Data_video/video_dataset/updated_metadata.csv


In [None]:
pip install face_recognition opencv-python-headless pandas deepface moviepy imagehash scikit-learn


Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting imagehash
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K  

### Importing Required Libraries

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from moviepy.editor import VideoFileClip
import face_recognition
from sklearn.cluster import DBSCAN
from collections import defaultdict
from PIL import Image
import imagehash


  if event.key is 'enter':



## Removing Duplicate Videos from Dataset using video hash

In [None]:
def extract_video_hash(video_path):
    """Generate a perceptual hash for a video based on its frames."""
    video = cv2.VideoCapture(video_path)
    frame_hashes = []
    frame_count = 0

    while True:
        ret, frame = video.read()
        if not ret:
            break
        frame_count += 1

        # Sample one frame every 30 frames
        if frame_count % 30 == 0:
            frame = cv2.resize(frame, (200, 200))  # Resize for uniform hashing
            frame_hashes.append(str(imagehash.phash(Image.fromarray(frame))))

    video.release()
    return "".join(frame_hashes)

def remove_duplicates(video_paths):
    """Remove duplicate videos based on hash comparison."""
    seen_hashes = set()
    unique_videos = []

    for video_path in video_paths:
        video_hash = extract_video_hash(video_path)
        if video_hash not in seen_hashes:
            seen_hashes.add(video_hash)
            unique_videos.append(video_path)

    return unique_videos


In [None]:
metadata_file = "/content/drive/MyDrive/assi_Data_video/video_dataset/metadata.csv"
metadata = pd.read_csv(metadata_file)

video_paths = metadata['video_path'].tolist()
scores = metadata['score'].tolist()

# Step 1: Remove duplicates
unique_videos = remove_duplicates(video_paths)

# Step 2: Extract faces and recognize unique influencers
influencer_data = []
all_encodings = []



## Saving unique videos to my google drive

In [None]:
import os
import shutil

# Path to the folder where unique videos will be saved
unique_videos_folder = "/content/drive/MyDrive/assi_Data_video/unique_videos"
os.makedirs(unique_videos_folder, exist_ok=True)  # Create the folder if it doesn't exist

# Copy each unique video to the new folder
for video_path in unique_videos:
    video_name = os.path.basename(video_path)  # Extract the video file name
    destination_path = os.path.join(unique_videos_folder, video_name)  # Define the destination path
    shutil.copy(video_path, destination_path)  # Copy the video file
    print(f"Copied {video_name} to {unique_videos_folder}")

print("All unique videos have been saved to the new folder.")

Copied 0_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 1_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 2_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 3_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 5_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 6_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 7_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 11_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 12_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 14_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 15_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 18_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 23_video.mp4 to /content/drive/MyDrive/assi_Data_video/unique_videos
Copied 24_video.mp4