Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[advanced dreambooth lora training script][bug_fix] change token_abstraction type to str #6040

Merged
merged 10 commits into from
Dec 4, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -300,16 +300,18 @@ def parse_args(input_args=None):
)
parser.add_argument(
"--token_abstraction",
type=str,
default="TOK",
help="identifier specifying the instance(or instances) as used in instance_prompt, validation prompt, "
"captions - e.g. TOK",
"captions - e.g. TOK. To use multiple identifiers, please specify them in a comma seperated string - e.g. "
"'TOK,TOK2,TOK3' etc.",
)

parser.add_argument(
"--num_new_tokens_per_abstraction",
type=int,
default=2,
help="number of new tokens inserted to the tokenizers per token_abstraction value when "
help="number of new tokens inserted to the tokenizers per token_abstraction identifier when "
"--train_text_encoder_ti = True. By default, each --token_abstraction (e.g. TOK) is mapped to 2 new "
"tokens - <si><si+1> ",
)
Expand Down Expand Up @@ -660,17 +662,6 @@ def parse_args(input_args=None):
"inversion training check `--train_text_encoder_ti`"
)

if args.train_text_encoder_ti:
if isinstance(args.token_abstraction, str):
args.token_abstraction = [args.token_abstraction]
elif isinstance(args.token_abstraction, List):
args.token_abstraction = args.token_abstraction
else:
raise ValueError(
f"Unsupported type for --args.token_abstraction: {type(args.token_abstraction)}. "
f"Supported types are: str (for a single instance identifier) or List[str] (for multiple concepts)"
)

env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
if env_local_rank != -1 and env_local_rank != args.local_rank:
args.local_rank = env_local_rank
Expand Down Expand Up @@ -1155,9 +1146,14 @@ def main(args):
)

if args.train_text_encoder_ti:
# we parse the provided token identifier (or identifiers) into a list. s.t. - "TOK" -> ["TOK"], "TOK,
# TOK2" -> ["TOK", "TOK2"] etc.
token_abstraction_list = "".join(args.token_abstraction.split()).split(",")
logger.info(f"list of token identifiers: {token_abstraction_list}")

token_abstraction_dict = {}
token_idx = 0
for i, token in enumerate(args.token_abstraction):
for i, token in enumerate(token_abstraction_list):
token_abstraction_dict[token] = [
f"<s{token_idx + i + j}>" for j in range(args.num_new_tokens_per_abstraction)
]
Expand Down