In [22]:
import torch

# Проверка наличия GPU
if torch.cuda.is_available():
    print("GPU is available!")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not available.")

GPU is available!
Number of GPUs: 2
Current GPU: Tesla T4


In [4]:
!pip install transformers diffusers accelerate peft



In [5]:
!pip install langdetect
import os
import torch
from diffusers import StableDiffusionPipeline, EulerAncestralDiscreteScheduler
from transformers import MarianMTModel, MarianTokenizer
from langdetect import detect
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from accelerate import Accelerator
from peft import LoraConfig, get_peft_model
from tqdm.auto import tqdm

# Очистка кэша GPU
torch.cuda.empty_cache()

# Настройки устройства
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993222 sha256=8fef49aa612ebfbb48efe23944ec884d4a2d6858935788345abb059dec466e5f
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

Using device: cuda


In [10]:
# Загружаем модель Stable Diffusion 2.1
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float16)
pipe.to(device)

# Используем другой сэмплер для лучшего качества
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)


model_index.json:   0%|          | 0.00/543 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/807 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/911 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/553 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/346 [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

In [7]:
def translate_ru_to_en(text):
    model_name = "Helsinki-NLP/opus-mt-ru-en"
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name).to(device)  # Переносим модель на GPU

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    translated = model.generate(**inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

In [15]:
class ImageDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_paths = [os.path.join(image_dir, fname) for fname in os.listdir(image_dir) if fname.endswith(('jpg', 'png', 'jpeg'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")  # Преобразуем в формат RGB
        if self.transform:
            image = self.transform(image)
        return {'image': image}

# Функция подготовки изображений для дообучения
def prepare_images(image_dir):
    transform = transforms.Compose([
        transforms.Resize((512, 512)),  # Изменяем размер до 512x512
        transforms.ToTensor()          # Преобразуем в тензор
    ])
    dataset = ImageDataset(image_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    return dataloader


In [14]:
from langdetect import detect

def process_prompt(user_input):
    try:
        lang = detect(user_input)
    except:
        lang = "unknown"

    if lang == "ru":
        translated_text = translate_ru_to_en(user_input)
        print(f"🔄 Переведено: {translated_text}")
        return translated_text
    else:
        print(f"✅ Оставлено без изменений: {user_input}")
        return user_input

In [13]:
def fine_tune_lora(pipe, train_dataloader, prompt, num_epochs=3, lr=1e-4, output_dir="lora_weights"):
    print("🔧 Начало дообучения с LoRA...")

    # Настройка LoRA
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["to_k", "to_q", "to_v", "to_out.0"],
        lora_dropout=0.05,
        bias="none"
    )

    pipe.unet = get_peft_model(pipe.unet, lora_config)
    pipe.unet.print_trainable_parameters()

    optimizer = torch.optim.AdamW(pipe.unet.parameters(), lr=lr)
    scaler = GradScaler()
    accelerator = Accelerator()
    pipe.unet, optimizer, train_dataloader = accelerator.prepare(pipe.unet, optimizer, train_dataloader)

    for epoch in range(num_epochs):
        pipe.unet.train()
        progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}")
        for batch in progress_bar:
            optimizer.zero_grad()

            noise = torch.randn_like(batch['image']).to(device)
            timesteps = torch.randint(0, 1000, (batch['image'].shape[0],), device=device).long()

            text_input = [prompt] * batch['image'].shape[0]
            text_input = pipe.tokenizer(text_input, padding="max_length", truncation=True, max_length=77, return_tensors="pt").input_ids.to(device)
            encoder_hidden_states = pipe.text_encoder(text_input).last_hidden_state

            noisy_images = pipe.scheduler.add_noise(batch['image'], noise, timesteps)

            with autocast():
                noise_pred = pipe.unet(noisy_images, timesteps, encoder_hidden_states=encoder_hidden_states).sample

            loss = torch.nn.functional.mse_loss(noise_pred, noise)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            progress_bar.set_postfix(loss=loss.item())

    os.makedirs(output_dir, exist_ok=True)
    pipe.unet.save_pretrained(output_dir)
    print(f"✅ Дообучение завершено. Веса сохранены в '{output_dir}'")

In [46]:
def generate_image(prompt, negative_prompt="low quality, blurry, poorly drawn, distorted, deformed, bad anatomy, bad proportions, watermark, text, nsfw"):
    image = pipe(prompt, negative_prompt=negative_prompt, guidance_scale=7.5).images[0]
    image.save("generated_image.png")
    print("✅ Изображение сохранено как 'generated_image.png'")

In [16]:
if __name__ == "__main__":
    # Путь к папке с изображениями
    image_dir = "/kaggle/input/in-img"  # Укажите путь к вашим изображениям
    train_dataloader = prepare_images(image_dir)

    # Промт для дообучения
    prompt_for_finetuning = "A photo of a girl named Anya"

    # Дообучение LoRA
    lora_weights_dir = "lora_weights"
    fine_tune_lora(pipe, train_dataloader, prompt_for_finetuning, num_epochs=3, lr=1e-4, output_dir=lora_weights_dir)

    # Генерация изображения
    user_prompt = "Фотография девушки по имени Аня, на пляже с крабами, хорошеее качество"
    final_prompt = process_prompt(user_prompt)  # Перевод, если нужно
    negative_prompt_default = "low quality, blurry, poorly drawn, distorted, deformed, bad anatomy, bad proportions, watermark, text, nsfw"
    generate_image(final_prompt, negative_prompt_default)

🔧 Начало дообучения с LoRA...
trainable params: 3,319,808 || all params: 869,230,532 || trainable%: 0.3819


NameError: name 'GradScaler' is not defined