From 9b4d37a78739ec2a10e74b005a594e1e9c19aa60 Mon Sep 17 00:00:00 2001 From: cdhigh Date: Mon, 22 Apr 2024 22:54:46 -0300 Subject: [PATCH] add pymp3cat to work in gae --- application/lib/calibre/web/feeds/news.py | 23 +- application/lib/pymp3cat.py | 353 ++++++++++++++++++++++ application/static/base.css | 4 + application/static/base.js | 25 +- application/templates/base.html | 2 + application/view/setting.py | 1 + application/view/subscribe.py | 33 +- application/work/worker.py | 46 ++- tools/update_req.py | 14 +- 9 files changed, 460 insertions(+), 41 deletions(-) create mode 100644 application/lib/pymp3cat.py diff --git a/application/lib/calibre/web/feeds/news.py b/application/lib/calibre/web/feeds/news.py index be91aeab..1f4a2c23 100644 --- a/application/lib/calibre/web/feeds/news.py +++ b/application/lib/calibre/web/feeds/news.py @@ -764,7 +764,7 @@ def index_to_soup(self, url_or_raw, raw=False, as_tree=False, save_raw=None): return parse(_raw) return BeautifulSoup(_raw, 'lxml') - #提取正文 + #使用自动算法提取正文 def extract_readable_article(self, html, url): try: doc = readability.Document(html, positive_keywords=self.auto_cleanup_keep, url=url) @@ -780,7 +780,7 @@ def extract_readable_article(self, html, url): body_tag = soup.find('body') #如果readability解析失败,则启用备用算法(不够好,但有全天候适应能力) - if not body_tag or len(body_tag.contents) == 0: + if not body_tag or len(body_tag.get_text(strip=True)) < 100: soup = simple_extract(html) body_tag = soup.find('body') if not body_tag or len(body_tag.contents) == 0: #再次失败 @@ -2034,8 +2034,8 @@ def audiofy_html(self, soup, title, job_info): if not self.tts.get('audio_dir'): system_temp_dir = os.environ.get('KE_TEMP_DIR') self.tts['audio_dir'] = PersistentTemporaryDirectory(prefix='tts_', dir=system_temp_dir) - if not self.tts.get('audios'): - self.tts['audios'] = [] + if not self.tts.get('audio_files'): + self.tts['audio_files'] = [] audio_dir = self.tts['audio_dir'] ext = ret['mime'].split('/')[-1] ext = {'mpeg': 'mp3'}.get(ext, ext) @@ -2045,7 +2045,7 @@ def audiofy_html(self, soup, title, job_info): try: with open(filename, 'wb') as f: f.write(audio) - self.tts['audios'].append(filename) + self.tts['audio_files'].append(filename) except Exception as e: self.log.warning(f'Failed to write "{filename}": {e}') else: @@ -2221,8 +2221,17 @@ def preprocess_raw_html(self, raw_html, url): for rules in self.content_extract_rules: newBody.extend(self.get_tags_from_rules(soup, rules)) - oldBody.replace_with(newBody) - return str(soup) + #提取失败,尝试自动提取 + if len(newBody.get_text(strip=True)) < 100: + self.log.warning(f'Failed to extract content using content_extract_rules, try readability algorithm: {url}') + try: + raw_html = self.extract_readable_article(raw_html, url) + except: + self.log.warning(f'Failed to auto cleanup URL: {url}') + return raw_html + else: + oldBody.replace_with(newBody) + return str(soup) #根据一个规则列表,从soup中获取符合条件的tag列表 #rules: 字符串列表或字典列表 diff --git a/application/lib/pymp3cat.py b/application/lib/pymp3cat.py new file mode 100644 index 00000000..b1ac51b0 --- /dev/null +++ b/application/lib/pymp3cat.py @@ -0,0 +1,353 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +#合并mp3文件 +#来源:https://github.com/dmulholl/mp3cat +#将go语言转换为python,方便类似GAE这样不能执行二进制文件的平台合并mp3 +import os, io, struct + +#版本 +MPEGVersion2_5 = 0 +MPEGVersionReserved = 1 +MPEGVersion2 = 2 +MPEGVersion1 = 3 +#层 +MPEGLayerReserved = 0 +MPEGLayerIII = 1 +MPEGLayerII = 2 +MPEGLayerI = 3 +#声道模式 +Stereo = 0 +JointStereo = 1 +DualChannel = 2 +Mono = 3 + +#位率对应表 +v1_br = { + 3: (0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448), #layer1 + 2: (0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384), #layer2 + 1: (0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320), #layer3 +} + +v2_br = { + 3: (0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256), + 2: (0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160), + 1: (0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160), +} + +#采样率对应表[ver][layer] +samplingTable = ((11025, 12000, 8000), (0, 0, 0), (22050, 24000, 16000), (44100, 48000, 32000)) + +#每帧的采样数对应表[ver][layer] +sampleCountTable = ((0, 576, 1152, 384), (0, 0, 0, 0), (0, 576, 1152, 384), (0, 1152, 1152, 384)) + +#通道信息SideInfo大小,相对于帧头偏移 +def GetSideInfoSize(frame): + size = 0 + if frame['layer'] == MPEGLayerIII: + if frame['mpegVer'] == MPEGVersion1: + size = 21 if frame['channelMode'] == Mono else 36 + else: + size = 13 if frame['channelMode'] == Mono else 21 + return size + +#判断一个音乐帧是否是VBR帧头 +def IsVBRHeader(frame): + infoSize = GetSideInfoSize(frame) + if frame['len'] < 4 + infoSize: + return False + + flag = frame['raw'][infoSize:infoSize+4] + if (flag == b'Xing') or (flag == b'Info'): + return True + #再判断是否是VBRI头,固定偏移36字节 + elif (frame['len'] > 4 + 36) and (frame['raw'][36:36+4] == b'VBRI'): + return True + + return False + +#获取流对象里面的下一个对象帧,可能为TAG/ID3/FRAME +#stream: 流对象 +def NextObject(stream): + while True: + start = stream.tell() + header1 = stream.read(4) + if len(header1) != 4: + return None + + #ID3v1标识: 'TAG',包括标签头在内,一共128字节 + if header1[0:3] == b'TAG': + stream.seek(start + 128) + return {'type': 'TAG', 'start': start, 'end': stream.tell(), 'len': 128} + elif header1[0:3] == b'ID3': + #ID3V2头一共10个字节 + #char Header[3]; #ID3 + #char Ver; #版本号ID3V2.3 就记录3 + #char Revision; #副版本号此版本记录为0 + #char Flag; #标志字节,只使用高三位,其它位为0 + #char Size[4]; #标签大小,不包含标签头的10个字节 + #标签大小共四个字节,每个字节只使用低7位,最高位恒为0,计算时将最高位去掉,得到28bit的数据 + header2 = stream.read(6) + if len(header2) != 6: + return None + length = ((header2[2] & 0x7f) * 0x200000 + (header2[3] & 0x7f) * 0x400 + +(header2[4] & 0x7f) * 0x80 + (header2[5] & 0x7f)) + + stream.seek(start) + frame = {'type': 'ID3', 'len': length} + frame['start'] = start + frame['raw'] = stream.read(length + 10) #长度不包含头部10个字节 + frame['end'] = stream.tell() + return frame + elif (header1[0] == 0xff) and ((header1[1] & 0xe0) == 0xe0): #11比特的1,一个音乐数据帧开始 + frame = ParseMusicHeader(header1) + if frame: + stream.seek(start) + frame['start'] = start + frame['raw'] = stream.read(frame['len']) #帧长度包含头部4个字节 + frame['end'] = stream.tell() + return frame + + #出错,往后跳一个字节再重新尝试 + stream.seek(start + 1) + +#只返回有效音乐帧的生成器 +def IterFrame(stream): + while True: + obj = NextObject(stream) + type_ = obj['type'] if obj else '' + if type_ == 'FRAME': + yield obj + elif type_ not in ('TAG', 'ID3'): + return + +#返回输入流的ID3V2标签帧 +def FindID3v2Tag(stream): + while True: + obj = NextObject(stream) + type_ = obj['type'] if obj else '' + if type_ == 'ID3': + return obj + elif type_ not in ('TAG', 'FRAME'): + return None + +#校验音乐数据帧头是否合法,header一共四个字节 +#校验正确返回帧头字典,否则返回None +# typedef FrameHeader +# { +# unsigned int sync:11; //同步信息 +# unsigned int version:2; //版本 +# unsigned int layer: 2; //层 +# unsigned int error protection:1; //CRC校验 +# unsigned int bitrate_index:4; //位率 +# unsigned int sampling_frequency:2; //采样频率 +# unsigned int padding:1; //帧长调节 +# unsigned int private:1; //保留字 +# unsigned int mode:2; //声道模式 +# unsigned int mode extension:2; //扩充模式 +# unsigned int copyright:1; //版权 +# unsigned int original:1; //原版标志 +# unsigned int emphasis:2; //强调模式 +# }HEADER, *LPHEADER; +def ParseMusicHeader(header): + mpegVer = (header[1] & 0x18) >> 3 #2位,0-MPEG2.5,1-未使用,2-MPEG2,3-MPEG1 + if mpegVer == MPEGVersionReserved: + return None + layer = (header[1] & 0x06) >> 1 #2位,层, 0-未使用,1-Layer3, 2-Layer2, 3-Layer3 + if layer == 0: + return None + crcProt = (header[1] & 0x01) == 0x00 #是否有CRC校验,0-校验 + bitRateIndex = (header[2] & 0xf0) >> 4 #位率索引,共4位 + if bitRateIndex == 0 or bitRateIndex == 15: + return None + + #查表得出位率 + if mpegVer == MPEGVersion1: + bitRate = v1_br.get(layer)[bitRateIndex] * 1000 + else: + bitRate = v2_br.get(layer)[bitRateIndex] * 1000 + + samplingRateIndex = (header[2] & 0x0c) >> 2 #采样率索引,2位 + if samplingRateIndex == 3: + return None + + #查表得出采样率 + samplingRate = samplingTable[mpegVer][samplingRateIndex] + + paddingBit = (header[2] & 0x02) == 0x02 #帧长调节 (1 bit) + privateBit = (header[2] & 0x01) == 0x01 #保留字 (1 bit) + channelMode = (header[3] & 0xc0) >> 6 #声道模式 (2 bits) + modeExtension = (header[3] & 0x30) >> 4 #扩充模式,仅用于 Joint Stereo mode. (2 bits) + if (channelMode != JointStereo) and (modeExtension != 0): + return None + + copyrightBit = (header[3] & 0x08) == 0x08 #版权 (1 bit) + originalBit = (header[3] & 0x04) == 0x04 #原版标志 (1 bit) + emphasis = (header[3] & 0x03) #强调标识 (2 bits) + if emphasis == 2: + return None + + #帧大小即每帧的采样数,表示一帧数据中采样的个数 + sampleCount = sampleCountTable[mpegVer][layer] + + #Layer1帧长调节为4字节,其他两层为1字节 + padding = (4 if (layer == MPEGLayerI) else 1) if paddingBit else 0 + + #计算帧长度,下面这段注释是go-lang版本mp3cat的作者原话 + # Calculate the frame length in bytes. There's a lot of confusion online + # about how to do this and definitive documentation is hard to find as + # the official MP3 specification is not publicly available. The + # basic formula seems to boil down to: + # + # bytes_per_sample = (bit_rate / sampling_rate) / 8 + # frame_length = sample_count * bytes_per_sample + padding + # + # In practice we need to rearrange this formula to avoid rounding errors. + # + # I can't find any definitive statement on whether this length is + # supposed to include the 4-byte header and the optional 2-byte CRC. + # Experimentation on mp3 files captured from the wild indicates that it + # includes the header at least. + frameLength = int((sampleCount / 8) * bitRate / samplingRate + padding) + return {'type': 'FRAME', 'len': frameLength, 'bitRate': bitRate, 'samplingRate': samplingRate, + 'sampleCount': sampleCount, 'mpegVer': mpegVer, 'layer': layer, 'channelMode': channelMode} + +#创建一个新的VBR帧 +def NewXingHeader(totalFrames, totalBytes): + data = bytearray(209) + data[0] = 0xFF #前面几个数值是合法的,但是是随便从一个mp3文件里面提取的 + data[1] = 0xFB + data[2] = 0x52 + data[3] = 0xC0 + + frame = ParseMusicHeader(data) + offset = GetSideInfoSize(frame) + data[offset : offset + 4] = b'Xing' + data[offset + 7] = 3 #只是总帧数和总字节数有效 + + # 将 totalFrames 和 totalBytes 以32位大端字节顺序写入 + struct.pack_into('>I', data, offset + 8, totalFrames) + struct.pack_into('>I', data, offset + 12, totalBytes) + return bytes(data) + +#在MP3前面添加一个VBR头 +#output: 要添加的流对象或文件名 +#totalFrames/totalBytes: 总帧数和总字节数 +def AddXingHeader(output, totalFrames, totalBytes): + xingHeader = NewXingHeader(totalFrames, totalBytes) + if isinstance(output, str): + tempFile = output + '.mp3cat.tmp' + with open(output, "rb") as old, open(tempFile, "wb") as new: + new.write(xingHeader) + new.write(old.read()) + try: + os.remove(output) + os.rename(tempFile, output) + except Exception as e: + print(f'Error: {e}') + else: + tempStream = io.BytesIO(output.getvalue()) + output.seek(0) + output.write(xingHeader) + output.write(tempStream.getvalue()) + +#从input_里面将ID3V2标签拷贝到目标文件 +def AddID3v2Tag(output, input_): + tag = FindID3v2Tag(input_) + if not tag: + return + + if isinstance(output, str): + tempFile = output + '.mp3cat.tmp' + with open(output, "rb") as old, open(tempFile, "wb") as new: + new.write(tag['raw']) + new.write(old.read()) + try: + os.remove(output) + os.rename(tempFile, output) + except Exception as e: + print(f'Error: {e}') + else: + tempStream = io.BytesIO(output.getvalue()) + output.seek(0) + output.write(tag['raw']) + output.write(tempStream.getvalue()) + +#合并mp3文件 +#output: 输出文件名或流对象 +#inputs: 输入文件名列表或二进制内容类别 +#tagIndex: 是否需要将第n个文件的ID3拷贝过来 +#force: 是否覆盖目标文件 +#quiet: 是否打印过程 +def merge(output: str, inputs: list, tagIndex: int=None, force: bool=True, quiet: bool=False): + if not force and isinstance(output, str) and os.path.exists(output): + print(f"Error: the file '{output}' already exists.") + return + if inputs and isinstance(inputs[0], str) and output in inputs: + print(f'Error: the list of input files includes the output file.') + return + + printInfo = (lambda x: x) if quiet else (lambda x: print(x)) + + outputStream = open(output, 'wb') if isinstance(output, str) else output + + totalFrames = 0 + totalBytes = 0 + totalFiles = 0 + firstBitRate = 0 + isVBR = False + for idx, input_ in enumerate(inputs): + needClose = False + if isinstance(input_, str): + printInfo(f' + {input_}') + input_ = open(input_, 'rb') + needClose = True + else: + printInfo(f' + ') + + isFirstFrame = True + for frame in IterFrame(input_): + if isFirstFrame: #第一个帧如果是VBR,不包含音乐数据 + isFirstFrame = False + if IsVBRHeader(frame): + continue + + if firstBitRate == 0: + firstBitRate = frame['bitRate'] + elif frame['bitRate'] != firstBitRate: + isVBR = True + + outputStream.write(frame['raw']) + totalFrames += 1 + totalBytes += frame['len'] + totalFiles += 1 + if needClose: + input_.close() + + if isinstance(output, str): + outputStream.close() + + #如果不同的文件的比特率不同,则在前面添加一个VBR头 + if isVBR: + printInfo("• Multiple bitrates detected. Adding VBR header.") + AddXingHeader(output, totalFrames, totalBytes) + if isinstance(output, str): + try: + tempStream.close() + os.remove(output + '.mp3cat.tmp') + except: + pass + + if tagIndex is not None and tagIndex < len(inputs): + input_ = inputs[tagIndex] + needClose = False + if isinstance(input_, str): + printInfo(f"• Copying ID3 tag from: {input_}") + input_ = open(input_, 'rb') + needClose = True + else: + printInfo(f'• Copying ID3 tag from: ') + AddID3v2Tag(output, input_) + if needClose: + input_.close() + + printInfo(f"• {totalFiles} files merged.") diff --git a/application/static/base.css b/application/static/base.css index a8eb5259..3944fb9d 100644 --- a/application/static/base.css +++ b/application/static/base.css @@ -303,6 +303,10 @@ button { color: red; font-weight: normal; font-style: italic; + margin-left: 10px; + padding: 4px; + border: dotted 1px #C785C8; + border-radius: 10px; } .box-list .book .summaryRow { diff --git a/application/static/base.js b/application/static/base.js index c1de2438..513738e8 100644 --- a/application/static/base.js +++ b/application/static/base.js @@ -147,7 +147,7 @@ function AppendRecipeToLibrary(div, id) { var row_str = ['
']; row_str.push(title); if (id.startsWith("upload:")) { - row_str.push(' {0}'.format(i18n.abbrUpl)); + row_str.push('{0}'.format(i18n.abbrUpl)); } row_str.push('
'); row_str.push(recipe.description); @@ -217,7 +217,13 @@ function PopulateMyCustomRss() { var row_str = ['
']; row_str.push(title); if (isfulltext) { - row_str.push(' {0}'.format(i18n.abbrEmb)); + row_str.push('{0}'.format(i18n.abbrEmb)); + } + if (rss.tr_enable) { + row_str.push('{0}'.format(i18n.abbrTr)); + } + if (rss.tts_enable) { + row_str.push('{0}'.format(i18n.abbrTts)); } row_str.push('
'.format(url)); if (url.length > 100) { @@ -254,18 +260,23 @@ function PopulateMySubscribed() { var title = recipe.title; var desc = recipe.description; var need_subs = recipe.needs_subscription; - var separated = recipe.separated; var recipe_id = recipe.recipe_id; var row_str = ['
']; row_str.push(title); if (recipe_id.startsWith("upload:")) { - row_str.push(' {0}'.format(i18n.abbrUpl)); + row_str.push('{0}'.format(i18n.abbrUpl)); } - if (separated) { - row_str.push(' {0}'.format(i18n.abbrSep)); + if (recipe.separated) { + row_str.push('{0}'.format(i18n.abbrSep)); } if (need_subs) { - row_str.push(' {0}'.format(i18n.abbrLog)); + row_str.push('{0}'.format(i18n.abbrLog)); + } + if (recipe.tr_enable) { + row_str.push('{0}'.format(i18n.abbrTr)); + } + if (recipe.tts_enable) { + row_str.push('{0}'.format(i18n.abbrTts)); } row_str.push('
'); if (desc.length > 100) { diff --git a/application/templates/base.html b/application/templates/base.html index b27279f3..e7b2df3a 100644 --- a/application/templates/base.html +++ b/application/templates/base.html @@ -95,6 +95,8 @@ abbrSep: '{{_("Sep")|safe}}', abbrLog: '{{_("Log")|safe}}', abbrEmb: '{{_("Emb")|safe}}', + abbrTr: '{{_("Tr")|safe}}', + abbrTts: '{{_("Tts")|safe}}', testEmailOk: '{{_("The test email has been successfully sent to the following addresses. Please check your inbox or spam folder to confirm its delivery. Depending on your email server, there may be a slight delay.")|safe}}', translating: '{{_("Translating...")|safe}}', configOk: '{{_("The configuration validation is correct.")|safe}}', diff --git a/application/view/setting.py b/application/view/setting.py index 29448d99..b5c3b169 100644 --- a/application/view/setting.py +++ b/application/view/setting.py @@ -150,6 +150,7 @@ def DisplayEnv(): strEnv = [] for d in os.environ: strEnv.append("

" + str(d).rjust(28) + " | " + str(os.environ[d]) + "

") + strEnv.append("

" + 'appDir'.rjust(28) + " | " + appDir + "

") return ''.join(strEnv) #设置国际化语种 diff --git a/application/view/subscribe.py b/application/view/subscribe.py index ece0120e..d68d695b 100644 --- a/application/view/subscribe.py +++ b/application/view/subscribe.py @@ -24,23 +24,32 @@ def MySubscription(tips=None): share_key = user.share_links.get('key', '') title_to_add = request.args.get('title_to_add') #from Bookmarklet url_to_add = request.args.get('url_to_add') - my_custom_rss = [item.to_dict(only=[Recipe.id, Recipe.title, Recipe.url, Recipe.isfulltext]) - for item in user.all_custom_rss()] - my_uploaded_recipes = [item.to_dict(only=[Recipe.id, Recipe.title, Recipe.description, Recipe.needs_subscription, Recipe.language]) - for item in user.all_uploaded_recipe()] - #使用不同的id前缀区分不同的rss类型 + my_custom_rss = [item.to_dict(only=[Recipe.id, Recipe.title, Recipe.url, Recipe.isfulltext, + Recipe.translator, Recipe.tts]) for item in user.all_custom_rss()] + + my_uploaded_recipes = [item.to_dict(only=[Recipe.id, Recipe.title, Recipe.description, Recipe.needs_subscription, + Recipe.language, Recipe.translator, Recipe.tts]) for item in user.all_uploaded_recipe()] + + my_booked_recipes = [item.to_dict(exclude=[BookedRecipe.encrypted_pwd]) + for item in user.get_booked_recipe() if not item.recipe_id.startswith('custom:')] + + #使用不同的id前缀区分不同的rss类型,同时对其他数据进行适当处理 for item in my_custom_rss: item['id'] = 'custom:{}'.format(item['id']) + item['tr_enable'] = item['translator'].get('enable') + item['tts_enable'] = item['tts'].get('enable') for item in my_uploaded_recipes: item['id'] = 'upload:{}'.format(item['id']) item['language'] = item['language'].lower().replace('-', '_').split('_')[0] - - my_booked_recipes = json.dumps([item.to_dict(exclude=[BookedRecipe.encrypted_pwd]) - for item in user.get_booked_recipe() if not item.recipe_id.startswith('custom:')], - separators=(',', ':')) - - my_custom_rss = json.dumps(my_custom_rss) - my_uploaded_recipes=json.dumps(my_uploaded_recipes) + item['tr_enable'] = item['translator'].get('enable') + item['tts_enable'] = item['tts'].get('enable') + for item in my_booked_recipes: + item['tr_enable'] = item['translator'].get('enable') + item['tts_enable'] = item['tts'].get('enable') + + my_custom_rss = json.dumps(my_custom_rss, separators=(',', ':')) + my_uploaded_recipes=json.dumps(my_uploaded_recipes, separators=(',', ':')) + my_booked_recipes = json.dumps(my_booked_recipes, separators=(',', ':')) subscribe_url = urljoin(app.config['APP_DOMAIN'], url_for("bpSubscribe.MySubscription")) url2book_url = urljoin(app.config['APP_DOMAIN'], url_for("bpUrl2Book.Url2BookRoute")) return render_template("my.html", tab="my", **locals()) diff --git a/application/work/worker.py b/application/work/worker.py index b92192a9..943d037c 100644 --- a/application/work/worker.py +++ b/application/work/worker.py @@ -170,7 +170,7 @@ def mp3cat_path(): subprocess.run([mp3Cat, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, shell=True) default_log.debug('Using app mp3cat') except Exception as e: - default_log.warning(f"Can't execute mp3cat. Please check file exists and permissions: {e}") + #default_log.warning(f"Cannot execute mp3cat. Please check file exists and permissions: {e}") mp3Cat = '' return mp3Cat @@ -182,8 +182,10 @@ def MergeAudioSegment(roList): return ret mp3Cat = mp3cat_path() + pymp3cat = None if not mp3Cat: - return ret + import pymp3cat + default_log.info('Using python version mp3cat') import shutil, subprocess from calibre.ptempfile import PersistentTemporaryDirectory @@ -192,13 +194,23 @@ def MergeAudioSegment(roList): chapters = [] #先合并每个recipe生成的片段 for idx, ro in enumerate(roList): - mp3Files = ' '.join([mp3 for mp3 in (ro.tts.get('audios') or [])]) + mp3Files = [mp3 for mp3 in (ro.tts.get('audio_files') or [])] if not mp3Files: continue outputFile = os.path.join(tempDir, f'output_{idx:04d}.mp3') - runRet = subprocess.run(f'{mp3Cat} {mp3Files} -o {outputFile}', shell=True) - if (runRet.returncode == 0) and os.path.exists(outputFile): - chapters.append(outputFile) + if mp3Cat: + mp3Files = ' '.join(mp3Files) + runRet = subprocess.run(f'{mp3Cat} {mp3Files} -f -q -o {outputFile}', shell=True) + if (runRet.returncode == 0) and os.path.exists(outputFile): + chapters.append(outputFile) + else: + try: + pymp3cat.merge(outputFile, mp3Files, quiet=True) + if os.path.exists(outputFile): + chapters.append(outputFile) + except Exception as e: + default_log.warning('Failed to merge mp3 by pymp3cat: {e}') + #再将所有recipe的音频合并为一个大的文件 if len(chapters) == 1: @@ -209,16 +221,28 @@ def MergeAudioSegment(roList): except Exception as e: default_log.warning(f'Failed to read "{chapters[0]}"') elif chapters: - mp3Files = ' '.join(chapters) outputFile = os.path.join(tempDir, 'final.mp3') - runRet = subprocess.run(f'{mp3Cat} {mp3Files} -o {outputFile}', shell=True) - if (runRet.returncode == 0) and os.path.exists(outputFile): + info = '' + if mp3Cat: + mp3Files = ' '.join(chapters) + runRet = subprocess.run(f'{mp3Cat} {mp3Files} -f -q -o {outputFile}', shell=True) + if runRet.returncode != 0: + info = f'mp3cat return code : {runRet.returncode}' + else: + try: + pymp3cat.merge(outputFile, chapters, quiet=True) + except Exception as e: + info = 'Failed merge mp3 by pymp3cat: {e}' + + if not info and os.path.exists(outputFile): try: with open(outputFile, 'rb') as f: data = f.read() - runRet = ('mp3', data) + ret = ('mp3', data) except Exception as e: - default_log.warning(f'Failed to read "{outputFile}"') + default_log.warning(f'Failed to read "{outputFile}": {e}') + else: + default_log.warning(info if info else 'Failed merge mp3') #清理临时文件 for dir_ in [*audioDirs, tempDir]: diff --git a/tools/update_req.py b/tools/update_req.py index ee608c32..34fdc05a 100644 --- a/tools/update_req.py +++ b/tools/update_req.py @@ -5,9 +5,13 @@ ~=2.31.0 : >=2.31.0,==2.31.* >=0.2.3,<1.0.0 """ -import re, os, sys, shutil, subprocess +import re, os, sys, shutil, subprocess, secrets from itertools import chain +def new_secret_key(length=12): + allchars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz' + return ''.join([secrets.choice(allchars) for i in range(length)]) + REQ_COMM = [('requests', '~=2.31.0'), ('chardet', '~=5.2.0'), ('pillow', '~=10.3.0'), @@ -49,7 +53,7 @@ } REQ_PLAT = {'gae': [('appengine-python-standard', '~=1.1.6'), - ('#google-cloud-texttospeech', '~=2.16.3')], + ('google-cloud-texttospeech', '~=2.16.3')], 'docker': [('weedata', '>=0.2.5,<1.0.0'),('pymysql', '~=1.1.0'), #docker install all libs ('psycopg2-binary', '~=2.9.9'),('pymongo', '~=4.6.3'),('redis', '~=5.0.3'), ('celery', '~=5.3.6'),('flask-rq2', '~=18.3'),('sqlalchemy', '~=2.0.29')], @@ -98,7 +102,7 @@ def dockerize_config_py(cfgFile, arg): default_cfg = {'APP_ID': 'kindleear', 'DATABASE_URL': 'sqlite:////data/kindleear.db', 'TASK_QUEUE_SERVICE': 'apscheduler', 'TASK_QUEUE_BROKER_URL': 'memory', 'KE_TEMP_DIR': '/tmp', 'DOWNLOAD_THREAD_NUM': '3', 'ALLOW_SIGNUP': 'no', - 'HIDE_MAIL_TO_LOCAL': 'yes', 'LOG_LEVEL': 'warning'} + 'HIDE_MAIL_TO_LOCAL': 'yes', 'LOG_LEVEL': 'warning', 'SECRET_KEY': new_secret_key} ret = [] inDocComment = False pattern = r"^([_A-Z]+)\s*=\s*(.+)$" @@ -118,6 +122,7 @@ def dockerize_config_py(cfgFile, arg): name = match.group(1) if match else None value = default_cfg.get(name, None) if name is not None and value is not None: + value = value() if callable(value) else value ret.append(f'{name} = "{value}"') else: ret.append(line) @@ -156,7 +161,7 @@ def gaeify_config_py(cfgFile): default_cfg = {'APP_ID': appId, 'APP_DOMAIN': domain, 'SERVER_LOCATION': gae_location(), 'DATABASE_URL': 'datastore', 'TASK_QUEUE_SERVICE': 'gae', 'TASK_QUEUE_BROKER_URL': '', 'KE_TEMP_DIR': '', 'DOWNLOAD_THREAD_NUM': '3', 'ALLOW_SIGNUP': 'no', - 'HIDE_MAIL_TO_LOCAL': 'yes', 'LOG_LEVEL': 'warning'} + 'HIDE_MAIL_TO_LOCAL': 'yes', 'LOG_LEVEL': 'warning', 'SECRET_KEY': new_secret_key} ret = [] inDocComment = False pattern = r"^([_A-Z]+)\s*=\s*(.+)$" @@ -176,6 +181,7 @@ def gaeify_config_py(cfgFile): name = match.group(1) if match else None value = default_cfg.get(name, None) if name is not None and value is not None: + value = value() if callable(value) else value ret.append(f'{name} = "{value}"') else: ret.append(line)