Skip to content
This repository has been archived by the owner on Nov 8, 2023. It is now read-only.

Commit

Permalink
Merge pull request #22 from i3thuan5/留分詞起來合成
Browse files Browse the repository at this point in the history
分詞留起來合成
  • Loading branch information
sih4sing5hong5 committed Nov 16, 2018
2 parents f0b12a8 + 0c5085e commit aa61991
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 45 deletions.
6 changes: 4 additions & 2 deletions siunn1ua2ah4/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,15 @@

KIUNN1_KHAU2_TSHAM1_SOO3 = {
'臺語': {
'服務腔口': '閩南語',
'標音欄位': '臺羅閏號調',
'資料來源':'多元書寫',
'服務腔口': '台語',
'標音欄位': '臺羅',
'拼音': 臺灣閩南語羅馬字拼音,
},
}
for 客話 in ['四縣腔', '海陸腔', '大埔腔', '饒平腔', '詔安腔', ]:
KIUNN1_KHAU2_TSHAM1_SOO3[客話.rstrip('腔')] = {
'資料來源': '綜合標音',
'服務腔口': 客話,
'標音欄位': '臺灣客話',
'拼音': 臺灣客家話拼音,
Expand Down
17 changes: 10 additions & 7 deletions 試驗/test收著資料試驗.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@ def setUp(self):
self.目錄 = join(dirname(abspath(__file__)), '..', '圖')
self. = join(self.目錄, 'ti1a2.jpg')
self. = join(self.目錄, 'huan1ing5.wav')
self. = join(self.目錄, 'huan1ing5.srt')
self.字幕 = join(self.目錄, 'huan1ing5.srt')
self.分詞 = '逐-家|tak8-ke1 做-伙|tso3-hue2 來|lai5- 𨑨-迌|tshit4-tho5 !'

def test_無圖用預設圖(self, 敆做伙mock):
with TemporaryDirectory() as 目錄:
檔案的所在 = join(目錄, 'a.mp4')
做影片.收著資料('閩南語', [], [self.], [self.], 檔案的所在)
做影片.收著資料('閩南語', [], [self.], [self.字幕], [self.分詞], 檔案的所在)
敆做伙mock.assert_called_once_with(
[I7SIAT4_TOO5], [self.], [self.], 檔案的所在)
[I7SIAT4_TOO5], [self.], [self.字幕], 檔案的所在
)

def test_圖無夠愛循環用(self, 敆做伙mock):
with TemporaryDirectory() as 目錄:
Expand All @@ -32,13 +34,14 @@ def test_圖無夠愛循環用(self, 敆做伙mock):
'閩南語',
['a.jpg', 'b.jpg'],
[self., self., self.],
[self., self., self.],
[self.字幕, self.字幕, self.字幕],
[self.分詞, self.分詞, self.分詞],
檔案的所在
)
敆做伙mock.assert_called_once_with(
['a.jpg', 'b.jpg', 'a.jpg'],
[self., self., self.],
[self., self., self.],
[self.字幕, self.字幕, self.字幕],
檔案的所在
)

Expand All @@ -47,7 +50,7 @@ def test_無音用合成音(self, 揣聲音mock, 敆做伙mock):
揣聲音mock.return_value = self.
with TemporaryDirectory() as 目錄:
檔案的所在 = join(目錄, 'a.mp4')
做影片.收著資料('閩南語', [self.], [], [self.], 檔案的所在)
做影片.收著資料('閩南語', [self.], [], [self.字幕], [self.分詞], 檔案的所在)
敆做伙mock.assert_called_once_with(
[self.], [self.], [self.], 檔案的所在
[self.], [self.], [self.字幕], 檔案的所在
)
16 changes: 10 additions & 6 deletions 試驗/test敆檔案試驗.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,34 @@ def setUp(self):
self.目錄 = join(dirname(abspath(__file__)), '..', '圖')
self. = join(self.目錄, 'ti1a2.jpg')
self. = join(self.目錄, 'huan1ing5.wav')
self. = join(self.目錄, 'huan1ing5.srt')
self.字幕 = join(self.目錄, 'huan1ing5.srt')

def test_一組(self):
with TemporaryDirectory() as 目錄:
檔案的所在 = join(目錄, 'a.mp4')
做影片.敆做伙([self.], [self.], [self.], 檔案的所在)
做影片.敆做伙([self.], [self.], [self.字幕], 檔案的所在)
self.assertTrue(isfile(檔案的所在))

def test_兩組(self):
with TemporaryDirectory() as 目錄:
檔案的所在 = join(目錄, 'b.avi')
做影片.敆做伙([self., self.], [self., self.],
[self., self.], 檔案的所在)
做影片.敆做伙(
[self., self.],
[self., self.],
[self.字幕, self.字幕],
檔案的所在
)
self.assertTrue(isfile(檔案的所在))

def test_兩組的檔案佮一組無仝(self):
with TemporaryDirectory() as 目錄:
一組檔案的所在 = join(目錄, 'c.avi')
做影片.敆做伙([self.], [self.], [self.], 一組檔案的所在)
做影片.敆做伙([self.], [self.], [self.字幕], 一組檔案的所在)
兩組檔案的所在 = join(目錄, 'd.avi')
做影片.敆做伙(
[self., self.],
[self., self.],
[self., self.],
[self.字幕, self.字幕],
兩組檔案的所在
)
with open(一組檔案的所在, 'rb') as 一組檔案:
Expand Down
54 changes: 24 additions & 30 deletions 鬥做伙/做影片.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
from siunn1ua2ah4.settings import I7SIAT4_TOO5


from 臺灣言語工具.解析整理.拆文分析器 import 拆文分析器
from 臺灣言語工具.解析整理.文章粗胚 import 文章粗胚

ssl.match_hostname = lambda cert, hostname: True


Expand All @@ -20,8 +17,8 @@ class 做影片(程式腳本):
@classmethod
def 使用者提供的資料(cls, 腔口參數, 圖陣列, 聲陣列, 文字陣列, 影片存檔所在, 縮圖存檔所在):
with TemporaryDirectory() as 目錄:
字陣列 = cls.轉文本資料(腔口參數, 文字陣列, 目錄)
cls.收著資料(腔口參數, 圖陣列, 聲陣列, 字陣列, 影片存檔所在)
字幕檔陣列, 分詞陣列 = cls.轉文本資料(腔口參數, 文字陣列, 目錄)
cls.收著資料(腔口參數, 圖陣列, 聲陣列, 字幕檔陣列, 分詞陣列, 影片存檔所在)
cls._走指令([
'avconv',
'-i', 影片存檔所在,
Expand All @@ -33,29 +30,30 @@ def 使用者提供的資料(cls, 腔口參數, 圖陣列, 聲陣列, 文字陣
])

@classmethod
def 收著資料(cls, 腔口參數, 圖陣列, 聲陣列, 字陣列, 存檔所在):
def 收著資料(cls, 腔口參數, 圖陣列, 聲陣列, 字幕檔陣列, 分詞陣列, 存檔所在):
with TemporaryDirectory() as 目錄:
新聲陣列 = []
for 第幾个, (, ) in enumerate(zip_longest(聲陣列, 字陣列)):
for 第幾个, (, 分詞) in enumerate(zip_longest(聲陣列, 分詞陣列)):
if is not None:
新聲陣列.append()
else:
新聲陣列.append(
cls.揣聲音(腔口參數, , join(目錄, '{}.wav'.format(第幾个))))
cls.揣聲音(腔口參數, 分詞, join(目錄, '{}.wav'.format(第幾个)))
)
if len(圖陣列) == 0:
新圖陣列 = [I7SIAT4_TOO5] * len(字陣列)
新圖陣列 = [I7SIAT4_TOO5] * len(字幕檔陣列)
else:
新圖陣列 = []
while len(新圖陣列) < len(字陣列):
while len(新圖陣列) < len(字幕檔陣列):
新圖陣列.append(圖陣列[len(新圖陣列) % len(圖陣列)])
cls.敆做伙(新圖陣列, 新聲陣列, 字陣列, 存檔所在)
cls.敆做伙(新圖陣列, 新聲陣列, 字幕檔陣列, 存檔所在)

@classmethod
def 敆做伙(cls, 圖陣列, 聲陣列, 字陣列, 存檔所在):
def 敆做伙(cls, 圖陣列, 聲陣列, 字幕檔陣列, 存檔所在):
with TemporaryDirectory() as 目錄:
全部結果表 = join(目錄, 'tuan.pio')
with open(全部結果表, 'w') as :
for 第幾个, (, , ) in enumerate(zip(圖陣列, 聲陣列, 字陣列)):
for 第幾个, (, , 字幕檔) in enumerate(zip(圖陣列, 聲陣列, 字幕檔陣列)):
結果檔 = join(目錄, 'output{}.mp4'.format(第幾个))
暫時圖 = join(目錄, 'jpg{}.jpg'.format(第幾个))
cls._走指令([
Expand All @@ -65,7 +63,7 @@ def 敆做伙(cls, 圖陣列, 聲陣列, 字陣列, 存檔所在):
])
cls._走指令([
'avconv',
'-i', 暫時圖, '-i', , '-vf', 'subtitles={}'.format(),
'-i', 暫時圖, '-i', , '-vf', 'subtitles={}'.format(字幕檔),
'-s', 'svga', '-y', 結果檔,
])
print("file '{}'".format(結果檔), file=)
Expand Down Expand Up @@ -107,9 +105,10 @@ def 轉文本資料(cls, 腔口參數, 文字陣列, 目錄):
print(r1.status, r1.reason)
print(漢羅)
raise RuntimeError()
字陣列 = []
字幕檔陣列 = []
分詞陣列 = []
for 第幾筆, 資料 in enumerate(
json.loads(r1.read().decode('utf-8'))['綜合標音']
json.loads(r1.read().decode('utf-8'))[腔口參數['資料來源']]
):
檔名 = join(目錄, '{}.srt'.format(第幾筆))
cls._陣列寫入檔案(
Expand All @@ -121,30 +120,25 @@ def 轉文本資料(cls, 腔口參數, 文字陣列, 目錄):
資料[腔口參數['標音欄位']].strip()
]
)
字陣列.append(檔名)
return 字陣列
字幕檔陣列.append(檔名)
分詞陣列.append(資料['分詞'])
return 字幕檔陣列, 分詞陣列

@classmethod
def 揣聲音(cls, 腔口參數, , 存檔的所在):
with open() as 檔案:
*_, 漢字, 臺羅 = 檔案.read().strip().split('\n')
with open(存檔的所在, 'wb') as 存檔:
存檔.write(cls.掠聲音(腔口參數, 漢字, 臺羅))
return 存檔的所在
def 揣聲音(cls, 腔口參數, 分詞, 存檔的所在):
with open(存檔的所在, 'wb') as 存檔:
存檔.write(cls.掠聲音(腔口參數, 分詞))
return 存檔的所在

@classmethod
def 掠聲音(cls, 腔口參數, 漢字, 臺羅):
句物件 = 拆文分析器.對齊句物件(
文章粗胚.數字英文中央全加分字符號(漢字),
文章粗胚.建立物件語句前處理減號(腔口參數['拼音'], 臺羅)
)
def 掠聲音(cls, 腔口參數, 分詞):
domain = "xn--lhrz38b.xn--v0qr21b.xn--kpry57d"
網址 = "/{}?{}={}&{}={}".format(
quote('語音合成'),
quote('查詢腔口'),
quote(腔口參數['服務腔口']),
quote('查詢語句'),
quote(句物件.看分詞()),
quote(分詞),
)

conn = http.client.HTTPSConnection(domain)
Expand Down

0 comments on commit aa61991

Please sign in to comment.