### image collect and store

In [10]:
import cv2
import mediapipe as mp
import numpy as np
import os

name = input("Enter your name: ")

folder_path = os.path.join(os.getcwd(), name)
os.makedirs(folder_path, exist_ok=True)
print(f"Saving frames to: {folder_path}")

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)

# Use both Pose and Hands (NO segmentation needed)
with mp_pose.Pose(static_image_mode=False,
                  model_complexity=1,
                  enable_segmentation=False,  # ✅ Segmentation turned off
                  min_detection_confidence=0.5,
                  min_tracking_confidence=0.5) as pose, \
     mp_hands.Hands(static_image_mode=False,
                    max_num_hands=2,
                    min_detection_confidence=0.5,
                    min_tracking_confidence=0.5) as hands:

    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert to RGB
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image_rgb.flags.writeable = False

        # Step 1: Pose estimation
        pose_results = pose.process(image_rgb)

        # Step 2: Hand detection
        hand_results = hands.process(image_rgb)

        image_rgb.flags.writeable = True
        image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

        # We skip background removal and use original image directly
        output_image = image_bgr.copy()

        # Draw pose landmarks
        if pose_results.pose_landmarks:
            mp_drawing.draw_landmarks(
                output_image,
                pose_results.pose_landmarks,
                mp_pose.POSE_CONNECTIONS)

        # Draw hand landmarks
        if hand_results.multi_hand_landmarks:
            for hand_landmarks in hand_results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    output_image,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS)

        # Save frame to folder
        filename = os.path.join(folder_path, f"frame_{frame_count:04d}.jpg")
        cv2.imwrite(filename, output_image)
        frame_count += 1

        # Show preview
        cv2.imshow('Pose + Hands (No Background Removed)', output_image)

        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


Enter your name:  ankit


Saving frames to: E:\removeBackground\ankit


In [1]:
import os
import cv2
import numpy as np
from PIL import Image
import mediapipe as mp

# --- Get folder path ---
folder = input("path: ").strip()
if not os.path.isdir(folder):
    print("no folder")
    exit()

def pillow_compress(image_path, quality):
    try:
        img = Image.open(image_path).convert("RGB")
        img.save(image_path, format='JPEG', quality=quality, optimize=True)
    except Exception as e:
        print(f"Compression error on {image_path}: {e}")

# --- Background removal + lenient cropping ---
def remove_background_and_crop(image):
    h, w = image.shape[:2]
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    mp_pose = mp.solutions.pose
    with mp_pose.Pose(static_image_mode=True,
                      model_complexity=2,
                      enable_segmentation=True,
                      min_detection_confidence=0.0001) as pose:

        results = pose.process(rgb_image)

        if not results.pose_landmarks:
            return image  # Return original if no person detected

        # Get segmentation mask
        if results.segmentation_mask is not None:
            mask = results.segmentation_mask > 0.0001
            output = np.where(mask[..., None], image, 0).astype(np.uint8)
        else:
            points = [(int(lm.x * w), int(lm.y * h)) for lm in results.pose_landmarks.landmark]
            hull = cv2.convexHull(np.array(points, dtype=np.int32))
            mask = np.zeros((h, w), dtype=np.uint8)
            cv2.fillConvexPoly(mask, hull, 255)
            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
            mask = cv2.dilate(mask, kernel, iterations=1)
            mask_3ch = cv2.merge([mask] * 3)
            output = np.where(mask_3ch == 255, image, 0).astype(np.uint8)

        # --- Calculate bounding box ---
        x_coords = [int(lm.x * w) for lm in results.pose_landmarks.landmark]
        y_coords = [int(lm.y * h) for lm in results.pose_landmarks.landmark]
        x_min, x_max = max(min(x_coords), 0), min(max(x_coords), w)
        y_min, y_max = max(min(y_coords), 0), min(max(y_coords), h)

        # More lenient padding (25% of width/height)
        pad_x = int(0.25 * w)
        pad_y = int(0.25 * h)
        x_min = max(0, x_min - pad_x)
        y_min = max(0, y_min - pad_y)
        x_max = min(w, x_max + pad_x)
        y_max = min(h, y_max + pad_y)

        # Crop image
        cropped_output = output[y_min:y_max, x_min:x_max]

        return cropped_output

# --- Process images ---
for filename in os.listdir(folder):
    if not filename.lower().endswith(".jpg"):
        continue

    image_path = os.path.join(folder, filename)
    print(f"Processing: {filename}")

    pillow_compress(image_path, quality=65)

    original_img = cv2.imread(image_path)
    final_img = remove_background_and_crop(original_img)
    cv2.imwrite(image_path, final_img)

    pillow_compress(image_path, quality=55)

print("✅ All images processed and replaced successfully.")


path:  h


Processing: frame_0121.jpg
Processing: frame_0030.jpg
Processing: frame_0031.jpg
Processing: frame_0032.jpg
Processing: frame_0033.jpg
Processing: frame_0034.jpg
Processing: frame_0035.jpg
Processing: frame_0036.jpg
Processing: frame_0037.jpg
Processing: frame_0038.jpg
Processing: frame_0039.jpg
Processing: frame_0040.jpg
Processing: frame_0041.jpg
Processing: frame_0042.jpg
Processing: frame_0043.jpg
Processing: frame_0044.jpg
Processing: frame_0045.jpg
Processing: frame_0046.jpg
Processing: frame_0047.jpg
Processing: frame_0048.jpg
Processing: frame_0049.jpg
Processing: frame_0050.jpg
Processing: frame_0051.jpg
Processing: frame_0052.jpg
Processing: frame_0053.jpg
Processing: frame_0054.jpg
Processing: frame_0055.jpg
Processing: frame_0056.jpg
Processing: frame_0057.jpg
Processing: frame_0058.jpg
Processing: frame_0059.jpg
Processing: frame_0060.jpg
Processing: frame_0061.jpg
Processing: frame_0062.jpg
Processing: frame_0063.jpg
Processing: frame_0064.jpg
Processing: frame_0065.jpg
P

In [1]:
!pip install rembg

Collecting rembg
  Downloading rembg-2.0.61-py3-none-any.whl.metadata (18 kB)
Collecting opencv-python-headless (from rembg)
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting pooch (from rembg)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting pymatting (from rembg)
  Downloading pymatting-1.1.14-py3-none-any.whl.metadata (7.7 kB)
Collecting scikit-image (from rembg)
  Downloading scikit_image-0.24.0-cp39-cp39-win_amd64.whl.metadata (14 kB)
Collecting numpy (from rembg)
  Using cached numpy-2.0.2-cp39-cp39-win_amd64.whl.metadata (59 kB)
Collecting numba!=0.49.0 (from pymatting->rembg)
  Downloading numba-0.60.0-cp39-cp39-win_amd64.whl.metadata (2.8 kB)
Collecting imageio>=2.33 (from scikit-image->rembg)
  Downloading imageio-2.37.0-py3-none-any.whl.metadata (5.2 kB)
Collecting tifffile>=2022.8.12 (from scikit-image->rembg)
  Downloading tifffile-2024.8.30-py3-none-any.whl.metadata (31 kB)
Collecting lazy-loader>=0.4 

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
facenet-pytorch 2.6.0 requires numpy<2.0.0,>=1.24.0, but you have numpy 2.0.2 which is incompatible.
facenet-pytorch 2.6.0 requires torch<2.3.0,>=2.2.0, but you have torch 2.7.1+cu118 which is incompatible.
facenet-pytorch 2.6.0 requires torchvision<0.18.0,>=0.17.0, but you have torchvision 0.22.1+cu118 which is incompatible.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.0.2 which is incompatible.
label-studio 1.15.0 requires numpy<2.0.0,>=1.26.4, but you have numpy 2.0.2 which is incompatible.
label-studio-sdk 1.0.8 requires nltk<4.0.0,>=3.9.1, but you have nltk 3.8.1 which is incompatible.
label-studio-sdk 1.0.8 requires numpy<2.0.0, but you have numpy 2.0.2 which is incompatible.
mediapipe 0.10.21 requires numpy<2, but you have numpy 2.0.2 which is incompatible.
tensorflow-intel 2.12.1 requires

In [2]:
pip install numpy==1.26.4 --force-reinstall


Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp39-cp39-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp39-cp39-win_amd64.whl (15.8 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
Successfully installed numpy-1.26.4
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
facenet-pytorch 2.6.0 requires torch<2.3.0,>=2.2.0, but you have torch 2.7.1+cu118 which is incompatible.
facenet-pytorch 2.6.0 requires torchvision<0.18.0,>=0.17.0, but you have torchvision 0.22.1+cu118 which is incompatible.
label-studio-sdk 1.0.8 requires nltk<4.0.0,>=3.9.1, but you have nltk 3.8.1 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
tensorflow-intel 2.12.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.26.4 which is incompatible.
tensorflow-intel 2.12.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.13.1 which is incompatible.
torchtext 0.14.1 requires torch==1.13.1, but you have torch 2.7.1+cu118 which is incompatible.

[notice] A new release of pip is availa

In [3]:
import torch

# Check if CUDA is available
cuda_available = torch.cuda.is_available()
print("CUDA Available:", cuda_available)

# If CUDA is available, print details
if cuda_available:
    print("CUDA Device Count:", torch.cuda.device_count())
    print("Current Device Index:", torch.cuda.current_device())
    print("Current Device Name:", torch.cuda.get_device_name(torch.cuda.current_device()))


CUDA Available: True
CUDA Device Count: 1
Current Device Index: 0
Current Device Name: NVIDIA GeForce RTX 3050 Laptop GPU


In [5]:
from rembg import remove
from PIL import Image

# 🔹 Replace this with your image path
input_path = r"4825_Prasanna K.B_189_20250802185335 - Copy/frame_0213.jpg"
output_path = r"person_nobg.png"

# Open input image
input_image = Image.open(input_path)

# Remove background
output_image = remove(input_image)

# Save result (PNG keeps transparency)
output_image.save(output_path)

print(f"✅ Background removed and saved to {output_path}")


*************** EP Error ***************
EP Error D:\a\_work\1\s\onnxruntime\python\onnxruntime_pybind_state.cc:490 onnxruntime::python::RegisterTensorRTPluginsAsCustomOps Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported.
 when using ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
Falling back to ['CUDAExecutionProvider', 'CPUExecutionProvider'] and retrying.
****************************************
✅ Background removed and saved to person_nobg.png
