From c150ad59a439779df2a17216c408d7aa155c6d38 Mon Sep 17 00:00:00 2001 From: Larry Yan Date: Mon, 12 Aug 2019 20:51:03 +0800 Subject: [PATCH] fix(preprocessor): modify ffmpeg video pre add video cutting method --- gnes/preprocessor/helper.py | 2 +- gnes/preprocessor/video/ffmpeg.py | 12 ++++++++++++ tests/test_video_preprocessor.py | 19 +++++++++++++++++++ tests/yaml/preprocessor-ffmpeg4.yml | 8 ++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 tests/yaml/preprocessor-ffmpeg4.yml diff --git a/gnes/preprocessor/helper.py b/gnes/preprocessor/helper.py index 7bb66482..4398b6a0 100644 --- a/gnes/preprocessor/helper.py +++ b/gnes/preprocessor/helper.py @@ -248,7 +248,7 @@ def hsv_histogram(image: 'np.ndarray') -> 'np.ndarray': def phash_descriptor(image: 'np.ndarray'): - image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + image = Image.fromarray(image) import imagehash return imagehash.phash(image) diff --git a/gnes/preprocessor/video/ffmpeg.py b/gnes/preprocessor/video/ffmpeg.py index dccb02d2..ea53e3e3 100644 --- a/gnes/preprocessor/video/ffmpeg.py +++ b/gnes/preprocessor/video/ffmpeg.py @@ -127,6 +127,7 @@ def __init__(self, def apply(self, doc: 'gnes_pb2.Document') -> None: super().apply(doc) + from sklearn.cluster import KMeans if doc.raw_bytes: if self.use_image_input: frames = split_video_frames(doc.raw_bytes, self.splitter) @@ -155,6 +156,17 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: else: sub_videos = [frames] + # cut by clustering: params required + # segment_num + elif self.segment_method == 'cut_by_clustering': + if self.segment_num >= 2: + hash_v = [phash_descriptor(_).hash for _ in frames] + label_v = KMeans(n_clusters=self.segment_num + ).fit_predict(np.array(hash_v, dtype=np.int32)) + sub_videos = [[frames[i] for i, j in enumerate(label_v) if j == _] for _ in range(self.segment_num)] + else: + sub_videos = [frames] + for ci, chunk in enumerate(sub_videos): c = doc.chunks.add() c.doc_id = doc.doc_id diff --git a/tests/test_video_preprocessor.py b/tests/test_video_preprocessor.py index c35b7af8..08a28cb4 100644 --- a/tests/test_video_preprocessor.py +++ b/tests/test_video_preprocessor.py @@ -14,6 +14,7 @@ def setUp(self): self.yml_path = os.path.join(self.dirname, 'yaml', 'preprocessor-ffmpeg.yml') self.yml_path_2 = os.path.join(self.dirname, 'yaml', 'preprocessor-ffmpeg2.yml') self.yml_path_3 = os.path.join(self.dirname, 'yaml', 'preprocessor-ffmpeg3.yml') + self.yml_path_3 = os.path.join(self.dirname, 'yaml', 'preprocessor-ffmpeg4.yml') self.video_path = os.path.join(self.dirname, 'videos') self.video_bytes = [open(os.path.join(self.video_path, _), 'rb').read() for _ in os.listdir(self.video_path)] @@ -87,3 +88,21 @@ def test_video_cut_by_num(self): r = client.recv_message() for d in r.request.index.docs: self.assertEqual(len(d.chunks), 6) + + def test_video_cut_by_clustering(self): + args = set_preprocessor_service_parser().parse_args([ + '--yaml_path', self.yml_path_4 + ]) + c_args = _set_client_parser().parse_args([ + '--port_in', str(args.port_out), + '--port_out', str(args.port_in) + ]) + + with PreprocessorService(args), ZmqClient(c_args) as client: + for req in RequestGenerator.index(self.video_bytes): + msg = gnes_pb2.Message() + msg.request.index.CopyFrom(req.index) + client.send_message(msg) + r = client.recv_message() + for d in r.request.index.docs: + self.assertEqual(len(d.chunks), 6) diff --git a/tests/yaml/preprocessor-ffmpeg4.yml b/tests/yaml/preprocessor-ffmpeg4.yml new file mode 100644 index 00000000..1763f56d --- /dev/null +++ b/tests/yaml/preprocessor-ffmpeg4.yml @@ -0,0 +1,8 @@ +!FFmpegVideoSegmentor +parameter: + segment_method: cut_by_clustering + segment_num: 6 + s: "192*168" + r: 1 +gnes_config: + is_trained: true \ No newline at end of file