Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion comfy/bg_removal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,14 @@ def encode_image(self, image):
comfy.model_management.load_model_gpu(self.patcher)
H, W = image.shape[1], image.shape[2]
pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=False)
out = self.model(pixel_values=pixel_values)

if pixel_values.shape[0] > 1:
out = torch.cat([
self.model(pixel_values=pixel_values[i:i+1])
for i in range(pixel_values.shape[0])
], dim=0)
else:
out = self.model(pixel_values=pixel_values)
out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False)

mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
Expand Down
29 changes: 18 additions & 11 deletions comfy/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -1493,38 +1493,45 @@ def clean_hooks(self):
self.unpatch_hooks()
self.clear_cached_hook_weights()

def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
original_state_dict = self.model.diffusion_model.state_dict()
unet_state_dict = {}
def model_state_dict_for_saving(self, model=None, prefix=""):
if model is None:
model = self.model

original_state_dict = model.state_dict()
output_state_dict = {}
keys = list(original_state_dict)
while len(keys) > 0:
k = keys.pop(0)
v = original_state_dict[k]
op_keys = k.rsplit('.', 1)
if (len(op_keys) < 2) or op_keys[1] not in ["weight", "bias"]:
unet_state_dict[k] = v
output_state_dict[k] = v
continue
try:
op = comfy.utils.get_attr(self.model.diffusion_model, op_keys[0])
op = comfy.utils.get_attr(model, op_keys[0])
except:
unet_state_dict[k] = v
output_state_dict[k] = v
continue
if not op or not hasattr(op, "comfy_cast_weights") or \
(hasattr(op, "comfy_patched_weights") and op.comfy_patched_weights == True):
unet_state_dict[k] = v
output_state_dict[k] = v
continue
key = "diffusion_model." + k
key = prefix + k
weight = comfy.utils.get_attr(self.model, key)
if isinstance(weight, QuantizedTensor) and k in original_state_dict:
qt_state_dict = weight.state_dict(k)
caster = LazyCastingQuantizedParam(self, key)
for group_key in (x for x in qt_state_dict if x in original_state_dict):
if group_key in keys:
keys.remove(group_key)
unet_state_dict.pop(group_key, "")
unet_state_dict[group_key] = LazyCastingParamPiece(caster, "diffusion_model." + group_key, original_state_dict[group_key])
output_state_dict.pop(group_key, "")
output_state_dict[group_key] = LazyCastingParamPiece(caster, prefix + group_key, original_state_dict[group_key])
continue
unet_state_dict[k] = LazyCastingParam(self, key, weight)
output_state_dict[k] = LazyCastingParam(self, key, weight)
return output_state_dict

def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
unet_state_dict = self.model_state_dict_for_saving(self.model.diffusion_model, "diffusion_model.")
return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)

def __del__(self):
Expand Down
9 changes: 8 additions & 1 deletion comfy/sd.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,13 @@ def get_sd(self):
sd_clip[k] = sd_tokenizer[k]
return sd_clip

def state_dict_for_saving(self):
sd_clip = self.patcher.model_state_dict_for_saving()
sd_tokenizer = self.tokenizer.state_dict()
for k in sd_tokenizer:
sd_clip[k] = sd_tokenizer[k]
return sd_clip

def load_model(self, tokens={}):
memory_used = 0
if hasattr(self.cond_stage_model, "memory_estimation_function"):
Expand Down Expand Up @@ -1908,7 +1915,7 @@ def save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, m
load_models = [model]
if clip is not None:
load_models.append(clip.load_model())
clip_sd = clip.get_sd()
clip_sd = clip.state_dict_for_saving()
vae_sd = None
if vae is not None:
vae_sd = vae.get_sd()
Expand Down
101 changes: 101 additions & 0 deletions comfy_api_nodes/apis/bytedance_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Pydantic models for BytePlus ModelArk Responses API.

See: https://docs.byteplus.com/en/docs/ModelArk/1585128 (request)
https://docs.byteplus.com/en/docs/ModelArk/1783703 (response)
"""

from typing import Literal

from pydantic import BaseModel, Field


class BytePlusInputText(BaseModel):
type: Literal["input_text"] = "input_text"
text: str = Field(...)


class BytePlusInputImage(BaseModel):
type: Literal["input_image"] = "input_image"
image_url: str = Field(..., description="Image URL or `data:image/...;base64,...` payload")
detail: str = Field("auto", description="One of high, low, auto")


class BytePlusInputVideo(BaseModel):
type: Literal["input_video"] = "input_video"
video_url: str = Field(..., description="Video URL or `data:video/...;base64,...` payload")
fps: float | None = Field(None, ge=0.2, le=5.0)


BytePlusMessageContent = BytePlusInputText | BytePlusInputImage | BytePlusInputVideo


class BytePlusInputMessage(BaseModel):
type: Literal["message"] = "message"
role: str = Field(..., description="One of user, system, assistant, developer")
content: list[BytePlusMessageContent] = Field(...)


class BytePlusResponseCreateRequest(BaseModel):
model: str = Field(...)
input: list[BytePlusInputMessage] = Field(...)
instructions: str | None = Field(None)
max_output_tokens: int | None = Field(None, ge=1)
temperature: float | None = Field(None, ge=0.0, le=2.0)
store: bool | None = Field(False)
stream: bool | None = Field(False)


class BytePlusOutputText(BaseModel):
type: Literal["output_text"] = "output_text"
text: str = Field(...)


class BytePlusOutputRefusal(BaseModel):
type: Literal["refusal"] = "refusal"
refusal: str = Field(...)


class BytePlusOutputContent(BaseModel):
type: str = Field(...)
text: str | None = Field(None)
refusal: str | None = Field(None)


class BytePlusOutputMessage(BaseModel):
type: str = Field(...)
id: str | None = Field(None)
role: str | None = Field(None)
status: str | None = Field(None)
content: list[BytePlusOutputContent] | None = Field(None)


class BytePlusInputTokensDetails(BaseModel):
cached_tokens: int | None = Field(None)


class BytePlusOutputTokensDetails(BaseModel):
reasoning_tokens: int | None = Field(None)


class BytePlusResponseUsage(BaseModel):
input_tokens: int | None = Field(None)
output_tokens: int | None = Field(None)
total_tokens: int | None = Field(None)
input_tokens_details: BytePlusInputTokensDetails | None = Field(None)
output_tokens_details: BytePlusOutputTokensDetails | None = Field(None)


class BytePlusResponseError(BaseModel):
code: str = Field(...)
message: str = Field(...)


class BytePlusResponseObject(BaseModel):
id: str | None = Field(None)
object: str | None = Field(None)
created_at: int | None = Field(None)
model: str | None = Field(None)
status: str | None = Field(None)
error: BytePlusResponseError | None = Field(None)
output: list[BytePlusOutputMessage] | None = Field(None)
usage: BytePlusResponseUsage | None = Field(None)
Loading
Loading