/
encoder.pyx
551 lines (470 loc) · 21.2 KB
/
encoder.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
# This file is part of Xpra.
# Copyright (C) 2012-2014 Antoine Martin <antoine@devloop.org.uk>
# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
# later version. See the file COPYING for details.
import time
import os
from xpra.log import Logger
log = Logger("encoder", "x264")
X264_THREADS = int(os.environ.get("XPRA_X264_THREADS", "0"))
from xpra.codecs.codec_constants import get_subsampling_divs, video_codec_spec
from collections import deque
cdef extern from "string.h":
void * memcpy ( void * destination, void * source, size_t num )
void * memset ( void * ptr, int value, size_t num )
from libc.stdint cimport int64_t, uint64_t, uint8_t
cdef extern from *:
ctypedef unsigned long size_t
cdef extern from "stdint.h":
pass
cdef extern from "inttypes.h":
pass
cdef extern from "../buffers/buffers.h":
int object_as_buffer(object obj, const void ** buffer, Py_ssize_t * buffer_len)
cdef extern from "x264.h":
int X264_BUILD
int X264_LOG_ERROR
int X264_CSP_I420
int X264_CSP_I422
int X264_CSP_I444
int X264_CSP_BGR
int X264_CSP_BGRA
int X264_CSP_RGB
const char * const *x264_preset_names
ctypedef struct rc:
int i_rc_method
int i_qp_constant #0 to (51 + 6*(x264_bit_depth-8)). 0=lossless
int i_qp_min #min allowed QP value
int i_qp_max #max allowed QP value
int i_qp_step #max QP step between frames
int i_bitrate
float f_rf_constant #1pass VBR, nominal QP
float f_rf_constant_max #In CRF mode, maximum CRF as caused by VBV
float f_rate_tolerance
int i_vbv_max_bitrate
int i_vbv_buffer_size
float f_vbv_buffer_init #<=1: fraction of buffer_size. >1: kbit
float f_ip_factor
float f_pb_factor
int i_aq_mode #psy adaptive QP. (X264_AQ_*)
float f_aq_strength
int b_mb_tree #Macroblock-tree ratecontrol
int i_lookahead
# 2pass
int b_stat_write #Enable stat writing in psz_stat_out
char *psz_stat_out #output filename (in UTF-8) of the 2pass stats file
int b_stat_read #Read stat from psz_stat_in and use it
char *psz_stat_in #input filename (in UTF-8) of the 2pass stats file
# 2pass params (same as ffmpeg ones)
float f_qcompress #0.0 => cbr, 1.0 => constant qp
float f_qblur #temporally blur quants
float f_complexity_blur #temporally blur complexity
#x264_zone_t *zones #ratecontrol overrides
int i_zones #number of zone_t's
char *psz_zones #alternate method of specifying zones
ctypedef struct x264_param_t:
unsigned int cpu
int i_threads #encode multiple frames in parallel
int i_lookahead_threads #multiple threads for lookahead analysis
int b_sliced_threads #Whether to use slice-based threading
int b_deterministic #whether to allow non-deterministic optimizations when threaded
int b_cpu_independent #force canonical behavior rather than cpu-dependent optimal algorithms
int i_sync_lookahead #threaded lookahead buffer
int i_width
int i_height
int i_csp #CSP of encoded bitstream
int i_level_idc
int i_frame_total #number of frames to encode if known, else 0
int i_log_level
#Bitstream parameters
int i_frame_reference #Maximum number of reference frames
int i_dpb_size #Force a DPB size larger than that implied by B-frames and reference frames
#Useful in combination with interactive error resilience.
int i_keyint_max #Force an IDR keyframe at this interval
int i_keyint_min #Scenecuts closer together than this are coded as I, not IDR.
int i_scenecut_threshold#how aggressively to insert extra I frames
int b_intra_refresh #Whether or not to use periodic intra refresh instead of IDR frames.
int i_bframe #how many b-frame between 2 references pictures
int i_bframe_adaptive
int i_bframe_bias
int i_bframe_pyramid #Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal
int b_open_gop
int b_bluray_compat
rc rc #rate control
ctypedef struct x264_t:
pass
ctypedef struct x264_nal_t:
int i_ref_idc
int i_type
int b_long_startcode
int i_first_mb
int i_last_mb
int i_payload
uint8_t *p_payload
ctypedef struct x264_image_t:
int i_csp #Colorspace
int i_plane #Number of image planes
int i_stride[4] #Strides for each plane
uint8_t *plane[4] #Pointers to each plane
ctypedef struct x264_image_properties_t:
pass
ctypedef struct x264_hrd_t:
pass
ctypedef struct x264_sei_t:
pass
ctypedef struct x264_picture_t:
int i_type #In: force picture type (if not auto)
int i_qpplus1 #In: force quantizer for != X264_QP_AUTO
int i_pic_struct #In: pic_struct, for pulldown/doubling/etc...used only if b_pic_struct=1.
#use pic_struct_e for pic_struct inputs
#Out: pic_struct element associated with frame
int b_keyframe #Out: whether this frame is a keyframe. Important when using modes that result in
#SEI recovery points being used instead of IDR frames.
int64_t i_pts #In: user pts, Out: pts of encoded picture (user)
#Out: frame dts. When the pts of the first frame is close to zero,
#initial frames may have a negative dts which must be dealt with by any muxer
x264_param_t *param #In: custom encoding parameters to be set from this frame forwards (..)
x264_image_t img #In: raw image data
#Out: Out: reconstructed image data
x264_image_properties_t prop #In: optional information to modify encoder decisions for this frame
#Out: information about the encoded frame */
x264_hrd_t hrd_timing #Out: HRD timing information. Output only when i_nal_hrd is set.
x264_sei_t extra_sei#In: arbitrary user SEI (e.g subtitles, AFDs)
void *opaque #private user data. copied from input to output frames.
int x264_param_default_preset(x264_param_t *param, const char *preset, const char *tune)
int x264_param_apply_profile(x264_param_t *param, const char *profile)
void x264_encoder_parameters(x264_t *context, x264_param_t *param)
int x264_encoder_reconfig(x264_t *context, x264_param_t *param)
x264_t *x264_encoder_open(x264_param_t *param)
void x264_encoder_close(x264_t *context)
int x264_encoder_encode(x264_t *context, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ) nogil
cdef set_f_rf(x264_param_t *param, float q):
param.rc.f_rf_constant = q
cdef const char * const *get_preset_names():
return x264_preset_names;
#we choose presets from 1 to 7
#(we exclude placebo)
cdef int get_preset_for_speed(int speed):
if speed > 99:
#only allow "ultrafast" if pct > 99
return 0
return 7 - max(0, min(6, speed / 15))
#the x264 quality option ranges from 0 (best) to 51 (lowest)
cdef float get_x264_quality(int pct):
return <float> (50.0 - (min(100, max(0, pct)) * 49.0 / 100.0))
cdef char *PROFILE_BASELINE = "baseline"
cdef char *PROFILE_MAIN = "main"
cdef char *PROFILE_HIGH = "high"
cdef char *PROFILE_HIGH10 = "high10"
cdef char *PROFILE_HIGH422 = "high422"
cdef char *PROFILE_HIGH444_PREDICTIVE = "high444"
I420_PROFILES = [PROFILE_BASELINE, PROFILE_MAIN, PROFILE_HIGH, PROFILE_HIGH10, PROFILE_HIGH422, PROFILE_HIGH444_PREDICTIVE]
I422_PROFILES = [PROFILE_HIGH422, PROFILE_HIGH444_PREDICTIVE]
I444_PROFILES = [PROFILE_HIGH444_PREDICTIVE]
RGB_PROFILES = [PROFILE_HIGH444_PREDICTIVE]
COLORSPACE_FORMATS = {
"YUV420P" : (X264_CSP_I420, PROFILE_HIGH, I420_PROFILES),
"YUV422P" : (X264_CSP_I422, PROFILE_HIGH422, I422_PROFILES),
"YUV444P" : (X264_CSP_I444, PROFILE_HIGH444_PREDICTIVE, I444_PROFILES),
"BGR" : (X264_CSP_BGR, PROFILE_HIGH444_PREDICTIVE, RGB_PROFILES),
"BGRA" : (X264_CSP_BGRA, PROFILE_HIGH444_PREDICTIVE, RGB_PROFILES),
"BGRX" : (X264_CSP_BGRA, PROFILE_HIGH444_PREDICTIVE, RGB_PROFILES),
"RGB" : (X264_CSP_RGB, PROFILE_HIGH444_PREDICTIVE, RGB_PROFILES),
}
COLORSPACES = {
"YUV420P" : ("YUV420P",),
"YUV422P" : ("YUV422P",),
"YUV444P" : ("YUV444P",),
"BGR" : ("BGR",),
"BGRA" : ("BGRA",),
"BGRX" : ("BGRX",),
"RGB" : ("RGB",),
}
def init_module():
log("enc_x264.init_module()")
def cleanup_module():
log("enc_x264.cleanup_module()")
def get_version():
return X264_BUILD
def get_type():
return "x264"
def get_info():
global COLORSPACES
return {"version" : get_version(),
"formats" : COLORSPACES.keys()}
def get_encodings():
return ["h264"]
def get_input_colorspaces():
global COLORSPACES
return COLORSPACES.keys()
def get_output_colorspaces(input_colorspace):
assert input_colorspace in COLORSPACES
return COLORSPACE_FORMATS[input_colorspace]
def get_spec(encoding, colorspace):
assert encoding in get_encodings(), "invalid encoding: %s (must be one of %s" % (encoding, get_encodings())
assert colorspace in COLORSPACES, "invalid colorspace: %s (must be one of %s)" % (colorspace, COLORSPACES.keys())
#we can handle high quality and any speed
#setup cost is moderate (about 10ms)
return video_codec_spec(encoding=encoding, output_colorspaces=COLORSPACES[colorspace],
codec_class=Encoder, codec_type=get_type(), speed=0, setup_cost=50, width_mask=0xFFFE, height_mask=0xFFFE)
cdef class Encoder:
cdef unsigned long frames
cdef x264_t *context
cdef int width
cdef int height
cdef object src_format
cdef object profile
cdef double time
cdef int colorspace
cdef int preset
cdef int quality
cdef int speed
cdef unsigned long long bytes_in
cdef unsigned long long bytes_out
cdef object last_frame_times
cdef uint64_t first_frame_timestamp
cdef object __weakref__
def init_context(self, int width, int height, src_format, dst_formats, encoding, int quality, int speed, scaling, options): #@DuplicatedSignature
global COLORSPACE_FORMATS
cs_info = COLORSPACE_FORMATS.get(src_format)
assert cs_info is not None, "invalid source format: %s, must be one of: %s" % (src_format, COLORSPACE_FORMATS.keys())
assert encoding=="h264", "invalid encoding: %s" % encoding
assert scaling==(1,1), "x264 does not handle scaling"
self.width = width
self.height = height
self.quality = quality
self.speed = speed
self.preset = get_preset_for_speed(speed)
self.src_format = src_format
self.colorspace = cs_info[0]
self.frames = 0
self.last_frame_times = deque(maxlen=200)
self.time = 0
self.first_frame_timestamp = 0
self.profile = self._get_profile(options, self.src_format)
if self.profile is not None and self.profile not in cs_info[2]:
log.warn("invalid profile specified for %s: %s (must be one of: %s)" % (src_format, self.profile, cs_info[2]))
self.profile = None
if self.profile is None:
self.profile = cs_info[1]
self.init_encoder()
cdef init_encoder(self):
cdef x264_param_t param
cdef const char *preset
preset = get_preset_names()[self.preset]
x264_param_default_preset(¶m, preset, "zerolatency")
param.i_threads = X264_THREADS
if X264_THREADS!=1:
param.b_sliced_threads = 1
param.i_width = self.width
param.i_height = self.height
param.i_csp = self.colorspace
set_f_rf(¶m, get_x264_quality(self.quality))
param.i_log_level = X264_LOG_ERROR
#we never lose frames or use seeking, so no need for regular I-frames:
param.i_keyint_max = 999999
#we don't want IDR frames either:
param.i_keyint_min = 999999
param.b_intra_refresh = 0 #no intra refresh
param.b_open_gop = 1 #allow open gop
x264_param_apply_profile(¶m, self.profile)
self.context = x264_encoder_open(¶m)
assert self.context!=NULL, "context initialization failed for format %s" % self.src_format
def clean(self): #@DuplicatedSignature
if self.context!=NULL:
x264_encoder_close(self.context)
self.context = NULL
self.frames = 0
self.width = 0
self.height = 0
self.src_format = ""
self.profile = None
self.time = 0
self.colorspace = 0
self.preset = 0
self.quality = 0
self.speed = 0
self.bytes_in = 0
self.bytes_out = 0
self.last_frame_times = 0
self.first_frame_timestamp = 0
def get_info(self): #@DuplicatedSignature
cdef double pps
if self.profile is None:
return {}
info = get_info()
info.update({"profile" : self.profile,
"preset" : get_preset_names()[self.preset],
"frames" : self.frames,
"width" : self.width,
"height" : self.height,
"speed" : self.speed,
"quality" : self.quality,
"src_format": self.src_format,
"version" : get_version()})
if self.bytes_in>0 and self.bytes_out>0:
info["bytes_in"] = self.bytes_in
info["bytes_out"] = self.bytes_out
info["ratio_pct"] = int(100.0 * self.bytes_out / self.bytes_in)
if self.frames>0 and self.time>0:
pps = float(self.width) * float(self.height) * float(self.frames) / self.time
info["total_time_ms"] = int(self.time*1000.0)
info["pixels_per_second"] = int(pps)
#calculate fps:
cdef int f = 0
cdef double now = time.time()
cdef double last_time = now
cdef double cut_off = now-10.0
cdef double ms_per_frame = 0
for start,end in list(self.last_frame_times):
if end>cut_off:
f += 1
last_time = min(last_time, end)
ms_per_frame += (end-start)
if f>0 and last_time<now:
info["fps"] = int(0.5+f/(now-last_time))
info["ms_per_frame"] = int(1000.0*ms_per_frame/f)
return info
def __repr__(self):
if self.src_format is None:
return "x264_encoder(uninitialized)"
return "x264_encoder(%s - %sx%s)" % (self.src_format, self.width, self.height)
def is_closed(self):
return self.context==NULL
def get_encoding(self):
return "h264"
def __dealloc__(self):
self.clean()
def get_width(self):
return self.width
def get_height(self):
return self.height
def get_type(self): #@DuplicatedSignature
return "x264"
def get_src_format(self):
return self.src_format
cdef _get_profile(self, options, csc_mode):
#try the environment as a default, fallback to hardcoded default:
profile = os.environ.get("XPRA_X264_%s_PROFILE" % csc_mode)
#now see if the client has requested a different value:
profile = options.get("x264.%s.profile" % csc_mode, profile)
if not profile:
#also using the old names:
old_csc_name = {"YUV420P" : "I420",
"YUV422P" : "I422",
"YUV444P" : "I444",
}.get(csc_mode, csc_mode)
profile = options.get("x264.%s.profile" % csc_mode, profile)
return profile
def compress_image(self, image, quality=-1, speed=-1, options={}):
cdef x264_nal_t *nals = NULL
cdef int i_nals = 0
cdef x264_picture_t pic_out
cdef x264_picture_t pic_in
cdef int frame_size = 0
cdef uint8_t *pic_buf
cdef Py_ssize_t pic_buf_len = 0
cdef char *out
cdef int i #@DuplicatedSignature
start = time.time()
if self.frames==0:
self.first_frame_timestamp = image.get_timestamp()
if speed>=0 and abs(self.speed-speed)>5:
self.set_encoding_speed(speed)
if quality>=0 and abs(self.quality-quality)>5:
self.set_encoding_quality(quality)
assert self.context!=NULL
pixels = image.get_pixels()
istrides = image.get_rowstride()
memset(&pic_out, 0, sizeof(x264_picture_t))
memset(&pic_in, 0, sizeof(x264_picture_t))
if self.src_format.find("RGB")>=0 or self.src_format.find("BGR")>=0:
assert len(pixels)>0
assert istrides>0
assert object_as_buffer(pixels, <const void**> &pic_buf, &pic_buf_len)==0
for i in range(3):
pic_in.img.plane[i] = pic_buf
pic_in.img.i_stride[i] = istrides
self.bytes_in += pic_buf_len
else:
assert len(pixels)==3, "image pixels does not have 3 planes! (found %s)" % len(pixels)
assert len(istrides)==3, "image strides does not have 3 values! (found %s)" % len(istrides)
for i in range(3):
assert object_as_buffer(pixels[i], <const void**> &pic_buf, &pic_buf_len)==0
pic_in.img.plane[i] = pic_buf
pic_in.img.i_stride[i] = istrides[i]
pic_in.img.i_csp = self.colorspace
pic_in.img.i_plane = 3
pic_in.i_pts = image.get_timestamp()-self.first_frame_timestamp
with nogil:
frame_size = x264_encoder_encode(self.context, &nals, &i_nals, &pic_in, &pic_out)
if frame_size < 0:
log.error("x264 encoding error: frame_size is invalid!")
return None
out = <char *>nals[0].p_payload
cdata = out[:frame_size]
self.bytes_out += frame_size
#info for client:
client_options = {
"frame" : self.frames,
"pts" : pic_out.i_pts,
"quality" : min(99, quality),
"speed" : speed}
#accounting:
end = time.time()
self.time += end-start
self.frames += 1
self.last_frame_times.append((start, end))
return cdata, client_options
def set_encoding_speed(self, int pct):
assert pct>=0 and pct<=100, "invalid percentage: %s" % pct
assert self.context!=NULL, "context is closed!"
cdef x264_param_t param #@DuplicatedSignature
cdef int new_preset = get_preset_for_speed(pct)
if new_preset == self.preset:
return
self.speed = pct
#retrieve current parameters:
x264_encoder_parameters(self.context, ¶m)
#apply new preset:
x264_param_default_preset(¶m, get_preset_names()[new_preset], "zerolatency")
#ensure quality remains what it was:
set_f_rf(¶m, get_x264_quality(self.quality))
#apply it:
x264_param_apply_profile(¶m, self.profile)
if x264_encoder_reconfig(self.context, ¶m)!=0:
raise Exception("x264_encoder_reconfig failed for speed=%s" % pct)
self.preset = new_preset
def set_encoding_quality(self, int pct):
assert pct>=0 and pct<=100, "invalid percentage: %s" % pct
assert self.context!=NULL, "context is closed!"
if abs(self.quality - pct)<=4 and pct!=100:
#not enough of a change to bother
return
cdef x264_param_t param #@DuplicatedSignature
#only f_rf_constant is changing
#retrieve current parameters:
x264_encoder_parameters(self.context, ¶m)
#adjust quality:
set_f_rf(¶m, get_x264_quality(self.quality))
#apply it:
if x264_encoder_reconfig(self.context, ¶m)!=0:
raise Exception("x264_encoder_reconfig failed for quality=%s" % pct)
self.quality = pct
def selftest():
#fake empty buffer:
w, h = 24, 16
y = "\0" * (w*h)
u = "\0" * (w*h/4)
v = "\0" * (w*h/4)
for encoding in get_encodings():
e = Encoder()
try:
e.init_context(w, h, "YUV420P", ["YUV420P"], encoding, w, h, (1,1), {})
from xpra.codecs.image_wrapper import ImageWrapper
image = ImageWrapper(0, 0, w, h, [y, u ,v], "YUV420P", 32, [w, w/2, w/2], planes=ImageWrapper.PACKED, thread_safe=True)
c = e.compress_image(image)
#import binascii
#print("compressed data(%s)=%s" % (encoding, binascii.hexlify(str(c))))
finally:
e.clean()