# Prime Video Backend

## This code processes a video by first segmenting it into smaller clips, then performing object detection on each segment using YOLOv5. It identifies and removes duplicate images from the detected outputs and deletes specific folders named 'person'. Additionally, it retains only one image per category from each segment and moves the processed images into designated segment folders. This comprehensive workflow ensures that the video is thoroughly analyzed and organized before the final version is prepared for uploading to Prime Video.

In [None]:
import cv2
import os
import subprocess
from PIL import Image
import imagehash
import shutil

def segment_video(video_path, segment_duration, output_folder):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error opening video file.")
        return []

    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frames_per_segment = frame_rate * segment_duration
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    segment_index = 0
    frame_index = 0
    out = None
    segment_paths = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_index % frames_per_segment == 0:
            if out is not None:
                out.release()
            segment_index += 1

            output_path = os.path.join(output_folder, f"segment_{segment_index}.mp4")
            out = cv2.VideoWriter(output_path, 
                                  cv2.VideoWriter_fourcc(*'mp4v'), 
                                  frame_rate, 
                                  (frame_width, frame_height))
            print(f"Starting segment {segment_index} at frame {frame_index}")
            segment_paths.append(output_path)

        out.write(frame)
        frame_index += 1

    if out is not None:
        out.release()

    cap.release()
    print("Video segmentation completed.")
    
    return segment_paths

def find_duplicate_images(root_folder):
    image_hashes = {}
    duplicates = []

    for root, _, files in os.walk(root_folder):
        for file in files:
            if file.lower().endswith(('jpeg', 'png', 'jpg', 'bmp')):
                file_path = os.path.join(root, file)
                
                try:
                    with Image.open(file_path) as img:
                        hash_value = imagehash.average_hash(img)
                except (OSError, IOError, ValueError) as e:
                    print(f"Skipping {file_path}: {e}")
                    continue
                
                if hash_value in image_hashes:
                    for original_file in image_hashes[hash_value]:
                        if are_images_similar(file_path, original_file):
                            duplicates.append((file_path, original_file))
                            break  
                    image_hashes[hash_value].append(file_path)
                else:
                    image_hashes[hash_value] = [file_path]
    
    return duplicates

def are_images_similar(file1, file2):
    try:
        with Image.open(file1) as img1, Image.open(file2) as img2:
            hash1 = imagehash.average_hash(img1)
            hash2 = imagehash.average_hash(img2)
            return hash1 == hash2
    except (OSError, IOError, ValueError):
        return False

def remove_folder(folder_path):
    try:
        shutil.rmtree(folder_path)
        print(f"Removed folder {folder_path} and its contents")
    except OSError as e:
        print(f"Error removing folder {folder_path}: {e}")

video_path = 'godzilla.mp4' 
segment_duration = 20  
output_folder = 'output_segments'  

segmented_paths = segment_video(video_path, segment_duration, output_folder)

for segment_path in segmented_paths:
    command = f"python detect.py --weights yolov5s.pt --source {segment_path} --view-img --save-crop --vid-stride 10 --save-conf"
    subprocess.run(command, shell=True)
    print(f"Processed {segment_path} with YOLOv5 detection")

root_folder = 'runs/detect/exp/crops'  
duplicates = find_duplicate_images(root_folder)

if duplicates:
    print("Duplicate images found:")
    for dup in duplicates:
        print(f"{dup[0]} is a duplicate of {dup[1]}")
else:
    print("No duplicate images found.")

base_folder = 'runs/detect'  
for folder in os.listdir(base_folder):
    if folder.startswith('exp'):
        person_folder = os.path.join(base_folder, folder, 'crops', 'person')
        if os.path.exists(person_folder):
            remove_folder(person_folder)
        else:
            print(f"The folder {person_folder} does not exist.")

root_folder = 'runs/detect'
def keep_one_image_per_folder(root_folder):
    for base_folder in os.listdir(root_folder):
        if base_folder.startswith('exp'):
            exp_folder = os.path.join(root_folder, base_folder)
            crops_folder = os.path.join(exp_folder, 'crops')

            for folder_name in os.listdir(crops_folder):
                folder_path = os.path.join(crops_folder, folder_name)
                if os.path.isdir(folder_path):
                    files = os.listdir(folder_path)
                    if len(files) > 1:
                        files.sort()
                        for file in files[1:]:
                            file_path = os.path.join(folder_path, file)
                            try:
                                os.remove(file_path)
                                print(f"Removed {file_path}")
                            except OSError as e:
                                print(f"Error removing file {file_path}: {e}")
rooot_folder = 'runs/detect'
def keep_one_image_per_folder(rooot_folder):
    for base_folder in os.listdir(rooot_folder):
        if base_folder.startswith('exp'):
            exp_folder = os.path.join(rooot_folder, base_folder)
            crops_folder = os.path.join(exp_folder, 'crops')

            for folder_name in os.listdir(crops_folder):
                folder_path = os.path.join(crops_folder, folder_name)
                if os.path.isdir(folder_path):
                    files = os.listdir(folder_path)
                    if len(files) > 0:
                        files.sort()
                        for file in files[1:]:
                            file_path = os.path.join(folder_path, file)
                            try:
                                os.remove(file_path)
                                print(f"Removed {file_path}")
                            except OSError as e:
                                print(f"Error removing file {file_path}: {e}")

def move_images_to_segment_folder(rot_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for base_folder in sorted(os.listdir(rot_folder)):
        if base_folder.startswith('exp'):
            exp_folder = os.path.join(rot_folder, base_folder)
            crops_folder = os.path.join(exp_folder, 'crops')

            segment_index = base_folder.replace('exp', '')
            segment_folder = os.path.join(output_folder, f'segment{segment_index}')
            if not os.path.exists(segment_folder):
                os.makedirs(segment_folder)

            image_counter = 1
            for root, _, files in os.walk(crops_folder):
                for file in files:
                    if file.lower().endswith(('jpeg', 'png', 'jpg', 'bmp')):
                        src_file_path = os.path.join(root, file)
                        dest_file_path = os.path.join(segment_folder, f"image{image_counter}.jpg")

                        shutil.move(src_file_path, dest_file_path)
                        print(f"Moved {src_file_path} to {dest_file_path}")
                        image_counter += 1

keep_one_image_per_folder(rooot_folder)
rot_folder = 'runs/detect'

output_folder = 'segments'

move_images_to_segment_folder(rot_folder, output_folder)



## Find the product description in amazon


In [None]:
import textwrap
import google.generativeai as genai
from PIL import Image
import os

# Set your API key
os.environ['GOOGLE_API_KEY'] = "AIzaSyDgf5jfKC-3c5mgtJL4FHm7GyvSEWE1YHE"
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
model = genai.GenerativeModel('gemini-pro-vision')

# Function to convert text to Markdown format
def to_markdown(text):
    text = text.replace(' • ', '*')
    return textwrap.indent(text, '>', predicate=lambda _: True)

# Function to process images in a directory and save Amazon URLs
def process_images(directory):
    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith((".jpg", ".jpeg", ".png")):
                img_path = os.path.join(root, filename)
                img = Image.open(img_path)
                
                # Generate repair recommendations for the image
                response = model.generate_content(
                    ["Analyse the image and give me the searchword term to search in amazon.only 2 words no need sentence or explanation i repeat only searchword no sentence like The image is a photo of a flower. The flower is white and has a yello....", img],
                    stream=True
                )
                response.resolve()
                
                # Generate Amazon URL
                amazon_search_term = response.text.strip()
                amazon_url = f"https://www.amazon.com/s?k={amazon_search_term.replace(' ', '+')}"
                
                # Save the Amazon URL to a text file
                url_file_path = os.path.join(root, f"{os.path.splitext(filename)[0]}.txt")
                with open(url_file_path, 'w') as url_file:
                    url_file.write(amazon_url)
                
                print(f"Processed {img_path} and saved Amazon URL to {url_file_path}")

# Directory containing subfolders with images
cracked_parts_dir = "./segments/"

# Process each subfolder
for subdir in os.listdir(cracked_parts_dir):
    subdir_path = os.path.join(cracked_parts_dir, subdir)
    if os.path.isdir(subdir_path):
        process_images(subdir_path)


## Generate Searchword 


In [None]:
import textwrap
import google.generativeai as genai
from IPython.display import display, Markdown
import PIL.Image
import os

# Set your API key
os.environ['GOOGLE_API_KEY'] = "AIzaSyC_PLAU2oJCDBSmhfkuq4H_oq04O5GxXDA"
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
model = genai.GenerativeModel('gemini-pro-vision')

# Function to convert text to Markdown format
def to_markdown(text):
    text = text.replace(' • ', '*')
    return Markdown(textwrap.indent(text, '>', predicate=lambda _: True))

# Function to process images in a directory
def process_images(directory):
    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith((".jpg", ".jpeg", ".png")):
                img_path = os.path.join(root, filename)
                img = PIL.Image.open(img_path)
                
                # Generate repair recommendations for the image
                response = model.generate_content(["Analyse the image and give me the searchword term to search in amazon.only 2 words no need sentence or explanation i repeat only searchword no sentence like The image is a photo of a flower. The flower is white and has a yello....", img], stream=True)
                response.resolve()

                # Store the recommendations
                search_term = response.text.strip()
                
                # Save the search term in a text file next to the image
                search_term_file = os.path.join(root, f"{os.path.splitext(filename)[0]}_search_term.txt")
                with open(search_term_file, 'w') as f:
                    f.write(search_term)
                
                # Display the recommendations
                display(to_markdown(response.text))

                search_term = response.text.split()[:2]  # Adjust this based on the actual response format
        
        # Construct Amazon search URL
                amazon_url = f"https://www.amazon.com/s?k={'+'.join(search_term)}"

# Directory containing subfolders with images
cracked_parts_dir = "./segments/"

# Process each subfolder
for subdir in os.listdir(cracked_parts_dir):
    subdir_path = os.path.join(cracked_parts_dir, subdir)
    if os.path.isdir(subdir_path):
        process_images(subdir_path)
