In [1]:
# flag to make settings for colab and kaggle
use_colab = True

if use_colab:
    from google.colab import drive
    drive.mount('/content/drive')
    path = '/content/drive/MyDrive/Github/Product-image-generation-from-text-description'
    im_path = 'content/fashion-dataset/images'    
else:
    path = '/kaggle/input/fashion-data'
    im_path = '/kaggle/input/fashion-product-images-dataset/fashion-dataset/images'

Mounted at /content/drive


In [None]:
# add logging to wandb
if not use_colab:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    key = user_secrets.get_secret("wandb_api")
else:
    with open('wandb_token.txt') as f:
        key = f.read()
        
!pip install --upgrade wandb
import wandb
wandb.login(key=key)
run = wandb.init(project='text-to-image',
                    group='finetune', #resume='must',
                    job_type='train')

In [None]:
!pip install -qq -U diffusers transformers accelerate
!pip install -q bitsandbytes
!pip install torchmetrics[image]

In [None]:
import torch
import pandas as pd
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, Subset
from transformers import CLIPTokenizer
from sklearn.model_selection import train_test_split
import os
    
path_to_descriptions = os.path.join(path, 'descriptions_2.json')
descriptions = pd.read_json(path_to_descriptions, orient='records')
# add style to descriptions
descriptions['description'] = descriptions['description'].apply(lambda x: x + ' isolated on white background')

In [None]:
import sys
sys.path.insert(0, path)

In [None]:
from transformers import CLIPTokenizer

tokenizer = CLIPTokenizer.from_pretrained(args.pretrained_model_name_or_path, 
                                          subfolder="tokenizer",
                                         revision=args.revision)

Downloading (…)tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Downloading (…)tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/788 [00:00<?, ?B/s]

In [None]:
# download data for colab
if not use_colab:
    from google.colab import files
    files.upload()
    os.environ['KAGGLE_CONFIG_DIR'] = "/content"
    !kaggle datasets download -d paramaggarwal/fashion-product-images-dataset -p '/content'
    !unzip '/content/fashion-product-images-dataset.zip'
    !rm /content/fashion-product-images-dataset.zip

In [None]:
from code.CustomDataset import CustomTensorDataset

RESOLUTION = 256

data_transformation_images = transforms.Compose([
            transforms.Resize((RESOLUTION, RESOLUTION)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, ), (0.5, ))
        ])

dataset = CustomTensorDataset(descriptions, tokenizer, im_path, transform_images=data_transformation_images)
indices = np.arange(len(descriptions))
indices_train, indices_test = train_test_split(indices, test_size=0.2)

# datasets
train_dataset = Subset(dataset, indices_train)
test_dataset = Subset(dataset, indices_test)

# dataloaders
batch_size = 8
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
from code.config import args
from code.train_eval import train

args.height, args.width = test_dataloader.dataset[0][1].shape[1:3]
args.logger = wandb
args.train_batch_size = train_dataloader.batch_size

unet, text_encoder = train(args)

In [None]:
from diffusers import DiffusionPipeline

# final pipeline for inference
pipeline = DiffusionPipeline.from_pretrained(
    args.pretrained_model_name_or_path,
    unet=args.accelerator.unwrap_model(unet),
    text_encoder=args.accelerator.unwrap_model(text_encoder),
    revision=args.revision
)
pipeline.save_pretrained(os.path.join(args.output_dir, 'data'))