/
arg_parser_fp.py
91 lines (82 loc) · 5.03 KB
/
arg_parser_fp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
cc = 0.81
def parse_fastpitch_args(parent, add_help=False):
"""
Parse commandline arguments.
"""
parser = argparse.ArgumentParser(parents=[parent], add_help=add_help,
allow_abbrev=False)
io = parser.add_argument_group('io parameters')
io.add_argument('--n-mel-channels', default=12, type=int,
help='Number of bins in mel-spectrograms')
io.add_argument('--max-seq-len', default=2048, type=int,
help='')
symbols = parser.add_argument_group('symbols parameters')
symbols.add_argument('--n-symbols', default=41, type=int,
help='Number of symbols in dictionary')
symbols.add_argument('--padding-idx', default=40, type=int,
help='Index of padding symbol in dictionary')
symbols.add_argument('--symbols-embedding-dim', default=296, type=int,
help='Input embedding dimension')
in_fft = parser.add_argument_group('input FFT parameters')
in_fft.add_argument('--in-fft-n-layers', default=6, type=int,
help='Number of FFT blocks')
in_fft.add_argument('--in-fft-n-heads', default=3, type=int, #1
help='Number of attention heads')
in_fft.add_argument('--in-fft-d-head', default=128, type=int,
help='Dim of attention heads')
in_fft.add_argument('--in-fft-conv1d-kernel-size', default=3, type=int,
help='Conv-1D kernel size')
in_fft.add_argument('--in-fft-conv1d-filter-size', default=960, type=int,
help='Conv-1D filter size')
in_fft.add_argument('--in-fft-output-size', default=296, type=int,
help='Output dim')
in_fft.add_argument('--p-in-fft-dropout', default=0.05, type=float, #1
help='Dropout probability')
in_fft.add_argument('--p-in-fft-dropatt', default=0.05, type=float, #1
help='Multi-head attention dropout')
in_fft.add_argument('--p-in-fft-dropemb', default=0.05, type=float, #0
help='Dropout added to word+positional embeddings')
out_fft = parser.add_argument_group('output FFT parameters')
out_fft.add_argument('--out-fft-n-layers', default=6, type=int,
help='Number of FFT blocks')
out_fft.add_argument('--out-fft-n-heads', default=4, type=int, #1
help='Number of attention heads')
out_fft.add_argument('--out-fft-d-head', default=128, type=int,
help='Dim of attention head')
out_fft.add_argument('--out-fft-conv1d-kernel-size', default=3, type=int,
help='Conv-1D kernel size')
out_fft.add_argument('--out-fft-conv1d-filter-size', default=960, type=int,
help='Conv-1D filter size')
out_fft.add_argument('--out-fft-output-size', default=296, type=int,
help='Output dim')
out_fft.add_argument('--p-out-fft-dropout', default=0.05, type=float,
help='Dropout probability for out_fft')
out_fft.add_argument('--p-out-fft-dropatt', default=0.05, type=float,
help='Multi-head attention dropout')
out_fft.add_argument('--p-out-fft-dropemb', default=0.05, type=float,
help='Dropout added to word+positional embeddings')
dur_pred = parser.add_argument_group('duration predictor parameters')
dur_pred.add_argument('--dur-predictor-kernel-size', default=3, type=int,
help='Duration predictor conv-1D kernel size')
dur_pred.add_argument('--dur-predictor-filter-size', default=64, type=int,
help='Duration predictor conv-1D filter size')
dur_pred.add_argument('--p-dur-predictor-dropout', default=0.15, type=float,
help='Dropout probability for duration predictor')
dur_pred.add_argument('--dur-predictor-n-layers', default=2, type=int,
help='Number of conv-1D layers')
pitch_pred = parser.add_argument_group('pitch predictor parameters')
pitch_pred.add_argument('--pitch-predictor-kernel-size', default=3, type=int,
help='Pitch predictor conv-1D kernel size')
pitch_pred.add_argument('--pitch-predictor-filter-size', default=256, type=int,
help='Pitch predictor conv-1D filter size')
pitch_pred.add_argument('--p-pitch-predictor-dropout', default=0.1, type=float,
help='Pitch probability for pitch predictor')
pitch_pred.add_argument('--pitch-predictor-n-layers', default=2, type=int,
help='Number of conv-1D layers')
cond = parser.add_argument_group('conditioning parameters')
cond.add_argument('--pitch-embedding-kernel-size', default=3, type=int,
help='Pitch embedding conv-1D kernel size')
cond.add_argument('--speaker-emb-weight', type=float, default=1.0,
help='Scale speaker embedding')
return parser