Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug]: Memory Leakage with YouTube video #292

Closed
ioangatop opened this issue Feb 23, 2022 · 24 comments
Closed

[Bug]: Memory Leakage with YouTube video #292

ioangatop opened this issue Feb 23, 2022 · 24 comments
Labels
QUESTION ❓ User asked about the working/usage of VidGear APIs. SOLVED 🏁 This issue/PR is resolved now. Goal Achieved!

Comments

@ioangatop
Copy link

Hi! These days I came across this issue, where it appears that the memory usage keeps increasing as I stream a video.
In this example, it increases even more, but in a microserver setting, like launching this script in a REST-API server, there is a steady but small increase over time.

In this scenario, I run the script with docker;

# main.py

from memory_profiler import profile
from vidgear.gears import VideoGear
from typing import Callable
import logging
import time
import cv2


fp = open("/workspace/memory_profiler.log","w+")


class VideoStream:

    def __init__(
        self,
        stream_source: str,
        stream_resolution: str = "sd",
        stream_framerate: int = 4,
        stream_max_frames: int = float("inf"),
        stream_backend: int = cv2.CAP_GSTREAMER,
        func: Callable = None,
        reconnect_frequency: int = 3600,
        *args, **kwargs
    ):

        # stream params
        self.stream_source = stream_source
        self.stream_resolution = stream_resolution
        self.stream_framerate = stream_framerate
        self.stream_max_frames = stream_max_frames \
            if stream_max_frames not in [-1, None] else float("inf")
        self.stream_backend = stream_backend
        self.reconnect_frequency = reconnect_frequency

        self.stream = self.init_stream()
        self.func = func

        # utils
        self.n_frame = 0
        self.last_reconnection = time.time()
        # For YouTube video, its better to reduced the amount of requests
        self.stream_sleep = 0.05 if self.is_stream_mode(self.stream_source) else 0

    def get_stream_info(self):
        info = {
            "height": int(self.stream.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)),
            "width": int(self.stream.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
            "total": int(self.stream.stream.get(cv2.CAP_PROP_FRAME_COUNT)),
            "fps": self.stream.stream.get(cv2.CAP_PROP_FPS),
        }
        return info

    @staticmethod
    def is_stream_mode(source):
        return False if str(source).isdigit() else True

    def init_stream(self):

        stream_mode = self.is_stream_mode(self.stream_source)

        if stream_mode:
            options = {
                "STREAM_RESOLUTION": "best",
                "STREAM_PARAMS": {"nocheckcertificate": True},
            }

        else:
            options = {"CAP_PROP_FPS": 30}

        stream = VideoGear(
            source=self.stream_source,
            backend=self.stream_backend,
            stream_mode=stream_mode,
            logging=False,
            **options
        )
        logging.info(f"Stream with source '{self.stream_source}' initialized.")
        return stream

    def start_stream(self):
        self.stream.start()
        logging.info(f"Stream with source '{self.stream_source}' started.")

    def stop_stream(self):
        self.stream.stop()
        logging.info(f"Stream with source '{self.stream_source}' stopped.")

    def stop(self):
        self.stop_stream()

    def fetch_frame(self):
        return self.stream.read()

    def generator(self):
        self.start_stream()

        total_frames = 0
        while True:
            frame = self.fetch_frame()

            if frame is None or self.n_frame >= self.stream_max_frames:
                break

            if total_frames % (30 // self.stream_framerate) == 0:
                inputs = {
                    "id": self.n_frame,
                    "image": cv2.imencode(".jpg", frame)[1].tobytes(),
                    "source": str(self.stream_source),
                    "timestamp": time.time()
                }
                yield self.func(**inputs) if self.func else inputs
                self.n_frame += 1

            if self.stream_sleep:
                time.sleep(self.stream_sleep)
            total_frames += 1

        self.stop_stream()

    def __iter__(self):
        yield from self.generator()


@profile(stream=fp)
def main():

    stream = {
        "name": "live1",
        "stream_source": "http://youtube.com/watch?v=y7QiNgui5Tg",
        "stream_framerate": 4,
        "stream_resolution": "best",
        "stream_max_frames": 100
    }

    videostreamer = VideoStream(**stream)

    for frame in videostreamer:
        print(frame["id"], end='\r')


if __name__ == "__main__":
    main()
# Dockerfile

FROM python:3.8-slim

# utils -------------------------------------------
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
ENV OPENCV_VERSION="4.5.1"
ENV TZ Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

# init virtualenv ---------------------------------
ENV VIRTUAL_ENV=/opt/venv
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

# install OpenCV ----------------------------------
WORKDIR /opt/build
RUN apt-get -qq update \
    && apt-get -qq install -y --no-install-recommends \
        build-essential \
        cmake \
        git \
        wget \
        unzip \
        yasm \
        pkg-config \
        libswscale-dev \
        libtbb2 \
        libtbb-dev \
        libjpeg-dev \
        libpng-dev \
        libtiff-dev \
        libopenjp2-7-dev \
        libavformat-dev \
        libpq-dev \
        libgstreamer1.0-0 \
        ffmpeg \
        gstreamer1.0-plugins-base \
        gstreamer1.0-plugins-good \
        gstreamer1.0-plugins-bad \
        gstreamer1.0-plugins-ugly \
        gstreamer1.0-libav \
        gstreamer1.0-tools \
        libgstreamer1.0-dev \
        libgstreamer-plugins-base1.0-dev \
        protobuf-compiler \
        libgtk2.0-dev \
        ocl-icd-opencl-dev \
    && pip install numpy \
    && wget -q https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip -O opencv.zip \
    && unzip -qq opencv.zip -d /opt \
    && rm -rf opencv.zip \
    && cmake \
        -D BUILD_TIFF=ON \
        -D BUILD_opencv_python2=OFF \
        -D BUILD_opencv_java=OFF \
        -D CMAKE_BUILD_TYPE=RELEASE \
        -D WITH_CUDA=OFF \
        -D WITH_OPENGL=ON \
        -D WITH_OPENCL=ON \
        -D WITH_TBB=ON \
        -D WITH_EIGEN=ON \
        -D WITH_V4L=ON \
        -D BUILD_TESTS=OFF \
        -D BUILD_PERF_TESTS=OFF \
        -D CMAKE_BUILD_TYPE=RELEASE \
        -D CMAKE_INSTALL_PREFIX=$(python -c "import sys; print(sys.prefix)") \
        -D PYTHON3_EXECUTABLE=$(which python3) \
        -D PYTHON3_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \
        -D PYTHON3_PACKAGES_PATH=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") \
        -D WITH_GSTREAMER=ON \
        -D WITH_FFMPEG=ON \
        /opt/opencv-${OPENCV_VERSION} \
    && make -j$(nproc) \
    && make install \
    && rm -rf /opt/build/* \
    && rm -rf /opt/opencv-${OPENCV_VERSION} \
    && rm -rf /var/lib/apt/lists/* \
    && apt-get -qq autoremove \
    && apt-get -qq clean

# install pip packages ----------------------------
RUN pip3 install --upgrade pip \
    && pip3 install --no-cache-dir \
        vidgear[core]>=0.2.4

RUN pip3 install memory_profiler
# run.sh
docker build . -t venv:latest
docker run \
    -v $PWD:/workspace \
    venv:latest python3 /workspace/main.py
@welcome
Copy link

welcome bot commented Feb 23, 2022

Thanks for opening this issue, a maintainer will get back to you shortly!

In the meantime:

  • Read our Issue Guidelines, and update your issue accordingly. Please note that your issue will be fixed much faster if you spend about half an hour preparing it, including the exact reproduction steps and a demo.
  • Go comprehensively through our dedicated FAQ & Troubleshooting section.
  • For any quick questions and typos, please refrain from opening an issue, as you can reach us on Gitter community channel.

@abhiTronix
Copy link
Owner

@ioangatop Few questions:

  • This is specific to youtube video or any video?
  • Are there multiple threads involved in your code? Is code running in a multi-threaded environment?
  • What happens when you use OpenCV directly instead of vidgear with normal video?

@abhiTronix abhiTronix added MISSING : INFORMATION 🔍 Requested proper information/evidence about the Issue/PR in context. WAITING FOR RESPONSE ⏳ Waiting for the user response. labels Feb 23, 2022
@ioangatop
Copy link
Author

  • Any youtube long/live videos will do
  • Single thread runs this script in the background (e.g. threading.Thread(target=main, daemon=True).start())
  • No, because I'm interested in fetching from a USB camera, live video youtube, or remote wireless camera.

@no-response no-response bot removed the MISSING : INFORMATION 🔍 Requested proper information/evidence about the Issue/PR in context. label Feb 23, 2022
@abhiTronix
Copy link
Owner

No, because I'm interested in fetching from a USB camera, live video youtube, or remote wireless camera.

@ioangatop Do with USB camera feed, I want to eliminate possibility that OpenCV itself is responsible.

@abhiTronix
Copy link
Owner

Single thread runs this script in the background (e.g. threading.Thread(target=main, daemon=True).start())

@ioangatop Wait isn't this multi-threading?

@ioangatop
Copy link
Author

ioangatop commented Feb 23, 2022

No, because I'm interested in fetching from a USB camera, live video youtube, or remote wireless camera.

@ioangatop Do with USB camera feed, I want to eliminate possibility that OpenCV itself is responsible.

it does look like this: [opencv](opencv/opencv#13255)

Single thread runs this script in the background (e.g. threading.Thread(target=main, daemon=True).start())

@ioangatop Wait isn't this multi-threading?

Hmmm not sure

@ioangatop
Copy link
Author

this is how it looks like in the server


# 12:43
CONTAINER ID   NAME           CPU %     MEM USAGE / LIMIT     MEM %     NET I/O           BLOCK I/O        PIDS
7e59150d95ca   datastreamer   21.10%    229.8MiB / 15.57GiB   1.44%     14.6MB / 88.5MB   0B / 995kB       28

# 12:44
CONTAINER ID   NAME           CPU %     MEM USAGE / LIMIT     MEM %     NET I/O         BLOCK I/O        PIDS
7e59150d95ca   datastreamer   14.32%    230.3MiB / 15.57GiB   1.44%     17MB / 110MB    0B / 995kB       28

# 12:46
CONTAINER ID   NAME           CPU %     MEM USAGE / LIMIT     MEM %     NET I/O          BLOCK I/O        PIDS
7e59150d95ca   datastreamer   16.61%    230.7MiB / 15.57GiB   1.45%     22.2MB / 149MB   0B / 995kB       28

# 12:48
CONTAINER ID   NAME           CPU %     MEM USAGE / LIMIT     MEM %     NET I/O          BLOCK I/O        PIDS
7e59150d95ca   datastreamer   17.18%    232.6MiB / 15.57GiB   1.46%     27.5MB / 189MB   0B / 995kB       28

# 12:50
CONTAINER ID   NAME           CPU %     MEM USAGE / LIMIT     MEM %     NET I/O          BLOCK I/O        PIDS
7e59150d95ca   datastreamer   14.33%    232.7MiB / 15.57GiB   1.46%     29.8MB / 206MB   0B / 995kB       28

@abhiTronix
Copy link
Owner

Single thread runs this script in the background (e.g. threading.Thread(target=main, daemon=True).start())

@ioangatop Wait isn't this multi-threading?

Hmmm not sure

Yes it is. This is case of infamous GIL which makes it impossible to use same resource on multiple threads, hence whatever high end your PC specs are or optimizations you perform, it will going to throttle the performance or cause leaks.

@abhiTronix
Copy link
Owner

@ioangatop The GIL is per Python process, a Docker container may have 1 or many Python processes, each with it's own GIL.

@ioangatop
Copy link
Author

@abhiTronix Thank you for the pointer, I'll take a look at it. However, I would like to mention that I have multiple other microservers that work with Thread (ml model, tracking etc) and not have any memory leak

@abhiTronix
Copy link
Owner

@ioangatop The problem here is that vidgear itself uses Multi-Threading in its CamGear API:

def start(self):
"""
Launches the internal *Threaded Frames Extractor* daemon.
**Returns:** A reference to the CamGear class object.
"""
self.__thread = Thread(target=self.__update, name="CamGear", args=())
self.__thread.daemon = True
self.__thread.start()
return self

And using threading over threading is bad.

@abhiTronix
Copy link
Owner

@ioangatop Good news is that I'm working on new library that will eliminate use of threading and will produce high performance frames with FFmpeg. Follow issue #148

@abhiTronix abhiTronix added QUESTION ❓ User asked about the working/usage of VidGear APIs. WON'T FIXED 🚫 This issue will not be worked on labels Feb 23, 2022
@ioangatop
Copy link
Author

@abhiTronix ah right, that makes sense! Does Vidgear has any generator, like main.py, that I can take advantage of without me starting another process? Any idea is super welcome!

I track the issue, super exciting!

@abhiTronix
Copy link
Owner

Does Vidgear has any generator, like main.py

You mean starting CamGear directly from terminal? No, but can be added easily.

@ioangatop
Copy link
Author

ioangatop commented Feb 23, 2022

No, I want to start vidgear to fetch frames from a source and put them in a message broker (like redis) in the background.

Like, senting a POST request, process_video, and it will start in the background

@abhiTronix
Copy link
Owner

@ioangatop Isn't read() function act as iterator?

@abhiTronix
Copy link
Owner

Ok you meant python generator, no.

@ioangatop
Copy link
Author

tbh it doesn't matter, just to run in the background

@abhiTronix
Copy link
Owner

you mean smthing like threading.Thread(target=videostreamer.read, daemon=True).start()?

No this is invalid. You want your python script to run in background or just vidgear?

@abhiTronix
Copy link
Owner

Because vidgear's start() once called will keep read() running/alive in background untill stop() function is called manually by user.

@ioangatop
Copy link
Author

script that runs vidgear in the background.

Highlevel:

def start_process(video_source):
    process(video_source)          # <-- it will execute but continue 
    print("started processing)
    return True

and the process should be smthing like

def process(source):
    while True:
        frame = vid.read()
        # ...
    vid.stop()

@abhiTronix
Copy link
Owner

@ioangatop Why don't you use asyncio python? It is thread safe and you could easily create custom generator something like https://abhitronix.github.io/vidgear/latest/gears/webgear/advanced/#using-webgear-with-a-custom-sourceopencv

@ioangatop
Copy link
Author

@abhiTronix I think thats a good solution, I'll take a look at it! Thank you!

@abhiTronix
Copy link
Owner

abhiTronix commented Feb 23, 2022

Yes, I was going to say this that you can run asyncio generator with multi-threaded CamGear in the background without worrying about GIL. I'm using it already in WebGear:

async def __producer(self):
"""
WebGear's default asynchronous frame producer/generator.
"""
# loop over frames
while self.__isrunning:
# read frame
frame = self.__stream.read()
# display blank if NoneType
if frame is None:
frame = (
self.blank_frame
if self.blank_frame is None
else self.blank_frame[:]
)
if not self.__enable_inf:
self.__isrunning = False
else:
# create blank
if self.blank_frame is None:
self.blank_frame = create_blank_frame(
frame=frame,
text="No Input" if self.__enable_inf else "The End",
logging=self.__logging,
)
# reducer frames size if specified
if self.__frame_size_reduction:
frame = await reducer(
frame,
percentage=self.__frame_size_reduction,
interpolation=self.__interpolation,
)
# handle JPEG encoding
if self.__jpeg_compression_colorspace == "GRAY":
if frame.ndim == 2:
# patch for https://gitlab.com/jfolz/simplejpeg/-/issues/11
frame = np.expand_dims(frame, axis=2)
encodedImage = simplejpeg.encode_jpeg(
frame,
quality=self.__jpeg_compression_quality,
colorspace=self.__jpeg_compression_colorspace,
fastdct=self.__jpeg_compression_fastdct,
)
else:
encodedImage = simplejpeg.encode_jpeg(
frame,
quality=self.__jpeg_compression_quality,
colorspace=self.__jpeg_compression_colorspace,
colorsubsampling="422",
fastdct=self.__jpeg_compression_fastdct,
)
# yield frame in byte format
yield (
b"--frame\r\nContent-Type:image/jpeg\r\n\r\n" + encodedImage + b"\r\n"
)
# sleep for sometime.
await asyncio.sleep(0)

@abhiTronix abhiTronix added SOLVED 🏁 This issue/PR is resolved now. Goal Achieved! and removed WON'T FIXED 🚫 This issue will not be worked on WAITING FOR RESPONSE ⏳ Waiting for the user response. labels Feb 23, 2022
@abhiTronix abhiTronix removed their assignment Feb 25, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
QUESTION ❓ User asked about the working/usage of VidGear APIs. SOLVED 🏁 This issue/PR is resolved now. Goal Achieved!
Projects
None yet
Development

No branches or pull requests

2 participants