In [22]:
import torch

# Проверка наличия GPU
if torch.cuda.is_available():
    print("GPU is available!")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not available.")

GPU is available!
Number of GPUs: 2
Current GPU: Tesla T4


In [25]:
import os
import torch
from transformers import CLIPTextModel, CLIPTokenizer, M2M100ForConditionalGeneration, M2M100Tokenizer
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from diffusers import StableDiffusionPipeline
from PIL import Image

In [26]:
# 1️⃣ Выбираем GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [32]:
# Функция перевода русского текста на английский
def translate_ru_to_en(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    translated = model.generate(**inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

# Функция обработки промта
def process_prompt(user_input):
    try:
        lang = detect(user_input)
    except:
        lang = "unknown"

    if lang == "ru":
        translated_text = translate_ru_to_en(user_input)
        print(f"🔄 Переведено: {translated_text}")
        return translated_text
    else:
        print(f"✅ Оставлено без изменений: {user_input}")
        return user_input


In [4]:
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float16)

model_index.json:   0%|          | 0.00/543 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/807 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/346 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/911 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/553 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

In [5]:
# Используем другой сэмплер
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)


In [9]:
# Функция перевода русского текста на английский
def translate_ru_to_en(text):
    model_name = "Helsinki-NLP/opus-mt-ru-en"
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    translated = model.generate(**inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

# Функция обработки промта
def process_prompt(user_input):
    try:
        lang = detect(user_input)
    except:
        lang = "unknown"
    if lang == "ru":
        translated_text = translate_ru_to_en(user_input)
        print(f"🔄 Переведено: {translated_text}")
        return translated_text
    else:
        print(f"✅ Оставлено без изменений: {user_input}")
        return user_input

In [10]:
# Функция дообучения с LoRA
def fine_tune_lora(pipe, train_dataloader, prompt, num_epochs=3, lr=1e-4, output_dir="lora_weights"):
    print("🔧 Начало дообучения с LoRA...")
    lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["to_k", "to_q", "to_v", "to_out.0"], lora_dropout=0.05, bias="none")
    pipe.unet = get_peft_model(pipe.unet, lora_config)
    optimizer = torch.optim.AdamW(pipe.unet.parameters(), lr=lr)
    scaler = GradScaler()
    accelerator = Accelerator()
    pipe.unet, optimizer, train_dataloader = accelerator.prepare(pipe.unet, optimizer, train_dataloader)
    for epoch in range(num_epochs):
        pipe.unet.train()
        progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}")
        for batch in progress_bar:
            optimizer.zero_grad()
            noise = torch.randn_like(batch['image']).to(device)
            timesteps = torch.randint(0, 1000, (batch['image'].shape[0],), device=device).long()
            text_input = pipe.tokenizer([prompt] * batch['image'].shape[0], padding="max_length", truncation=True, max_length=77, return_tensors="pt").input_ids.to(device)
            encoder_hidden_states = pipe.text_encoder(text_input).last_hidden_state
            noisy_images = pipe.scheduler.add_noise(batch['image'], noise, timesteps)
            with autocast():
                noise_pred = pipe.unet(noisy_images, timesteps, encoder_hidden_states=encoder_hidden_states).sample
            loss = torch.nn.functional.mse_loss(noise_pred, noise)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            progress_bar.set_postfix(loss=loss.item())
    os.makedirs(output_dir, exist_ok=True)
    pipe.unet.save_pretrained(output_dir)
    print(f"✅ Дообучение завершено. Веса сохранены в '{output_dir}'")

In [11]:
# Функция загрузки весов LoRA
def load_lora_weights(pipe, lora_weights_dir):
    pipe.unet.load_adapter(lora_weights_dir)
    print("🔄 LoRA загружена")

In [12]:
# Функция генерации изображения
def generate_image(prompt, negative_prompt):
    image = pipe(prompt, negative_prompt=negative_prompt, guidance_scale=7.5).images[0]
    image.save("generated_image.png")
    print("✅ Изображение сохранено как 'generated_image.png'")

In [14]:
# Класс для подготовки датасета
class ImageDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_paths = [os.path.join(image_dir, fname) for fname in os.listdir(image_dir) if fname.endswith(('jpg', 'png', 'jpeg'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGBA")  # Преобразуем в формат RGBA
        if self.transform:
            image = self.transform(image)
        return {'image': image}


# Функция подготовки изображений для дообучения
def prepare_images(image_dir):
    transform = transforms.Compose([
        transforms.ToTensor()  # Просто конвертируем в Tensor
    ])
    dataset = ImageDataset(image_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    return dataloader


In [33]:
if __name__ == "__main__":
    image_dir = "../input/in-img"
    train_dataloader = prepare_images(image_dir)
    prompt_for_finetuning = input("Введите промт для дообучения: ")
    prompt_for_finetuning = process_prompt(prompt_for_finetuning)

    # Дообучение LoRA
    lora_weights_dir = "lora_weights"
    fine_tune_lora(pipe, train_dataloader, prompt_for_finetuning, num_epochs=3, lr=1e-4, output_dir=lora_weights_dir)

    # Загрузка весов LoRA
    load_lora_weights(pipe, lora_weights_dir)
    
    # Генерация изображения после дообучения
    final_prompt = input("Введите промт для генерации изображения: ")
    final_prompt = process_prompt(final_prompt)
    generated_image = pipe(final_prompt).images[0]
    generated_image.save("generated_image.png")

Введите промт для дообучения:  Фотография девушки Ани


✅ Оставлено без изменений: Фотография девушки Ани
🔧 Начало дообучения с LoRA...


  scaler = GradScaler()


Epoch 1/3:   0%|          | 0/6 [00:00<?, ?it/s]

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)