In [1]:
from share import *
import numpy as np
import argparse, os, sys
from functools import partial
from omegaconf import OmegaConf

import torch
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl

from cldm.model import create_model, load_state_dict

from ldm.data.base import Txt2ImgIterableBaseDataset
from ldm.util import instantiate_from_config


def get_parser(**parser_kwargs):

    parser = argparse.ArgumentParser(**parser_kwargs)
    parser.add_argument(
        "-n",
        "--name",
        type=str,
        const=True,
        default="",
        nargs="?",
        help="postfix for logdir",
    )
    parser.add_argument(
        "-r",
        "--resume",
        type=str,
        const=True,
        default="",
        nargs="?",
        help="resume from logdir or checkpoint in logdir",
    )
    parser.add_argument(
        "-b",
        "--base",
        type=str,
        help="paths to base configs. Loaded from left-to-right. "
             "Parameters can be overwritten or added with command-line options of the form `--key value`.",
        default='./models/cldm_v15_2.yaml',
    )
    parser.add_argument(
        "-l",
        "--logdir",
        type=str,
        default="logs",
        help="directory for logging dat shit",
    )
    parser.add_argument(
        "--only_mid_control",
        action="store_true",
        default=False,
        help="only_mid_control for control net",
    )
    parser.add_argument(
        "--sd_locked",
        action="store_false",
        default=True,
        help="sd_locked for control net",
    )
    # Training
    parser.add_argument(
        "--gpus",
        type=int,
        default=1,
        help="number of gpus for training",
    )
    parser.add_argument(
        "--nnode",
        type=int,
        default=1,
        help="number of nodes for training",
    )

    # Prompt Engineering
    parser.add_argument(
        "--data_config",
        type=str,
        help="path to data config files",
        default='./models/dataset.yaml',
    )
    parser.add_argument(
        "--sd_v2",
        action="store_true",
        default=False,
        help="if use stable diffusion 2.0",
    )

    return parser


def worker_init_fn(_):
    worker_info = torch.utils.data.get_worker_info()

    dataset = worker_info.dataset
    worker_id = worker_info.id

    if isinstance(dataset, Txt2ImgIterableBaseDataset):
        split_size = dataset.num_records // worker_info.num_workers
        # reset num_records to the true number to retain reliable length information
        dataset.sample_ids = dataset.valid_ids[worker_id * split_size:(worker_id + 1) * split_size]
        current_id = np.random.choice(len(np.random.get_state()[1]), 1)
        return np.random.seed(np.random.get_state()[1][current_id] + worker_id)
    else:
        return np.random.seed(np.random.get_state()[1][0] + worker_id)


class DataModuleFromConfig(pl.LightningDataModule):
    def __init__(self, batch_size, train=None, validation=None, test=None, predict=None,
                 num_workers=None, shuffle_test_loader=False, use_worker_init_fn=False,
                 shuffle_val_dataloader=False):
        super().__init__()
        self.batch_size = batch_size
        self.dataset_configs = dict()
        self.num_workers = num_workers if num_workers is not None else batch_size * 2
        self.use_worker_init_fn = use_worker_init_fn
        if train is not None:
            self.dataset_configs["train"] = train
            self.train_dataloader = self._train_dataloader
        if validation is not None:
            self.dataset_configs["validation"] = validation
            self.val_dataloader = partial(self._val_dataloader, shuffle=shuffle_val_dataloader)
        if test is not None:
            self.dataset_configs["test"] = test
            self.test_dataloader = partial(self._test_dataloader, shuffle=shuffle_test_loader)
        if predict is not None:
            self.dataset_configs["predict"] = predict
            self.predict_dataloader = self._predict_dataloader

    def prepare_data(self):
        for data_cfg in self.dataset_configs.values():
            instantiate_from_config(data_cfg)

    def setup(self, stage=None):
        self.datasets = dict(
            (k, instantiate_from_config(self.dataset_configs[k]))
            for k in self.dataset_configs)

    def _train_dataloader(self):
        is_iterable_dataset = isinstance(self.datasets['train'], Txt2ImgIterableBaseDataset)
        if is_iterable_dataset or self.use_worker_init_fn:
            init_fn = worker_init_fn
        else:
            init_fn = None
        return DataLoader(self.datasets["train"], batch_size=self.batch_size,
                          num_workers=self.num_workers, shuffle=False if is_iterable_dataset else True,
                          worker_init_fn=init_fn, persistent_workers=True)

    def _val_dataloader(self, shuffle=False):
        if isinstance(self.datasets['validation'], Txt2ImgIterableBaseDataset) or self.use_worker_init_fn:
            init_fn = worker_init_fn
        else:
            init_fn = None
        return DataLoader(self.datasets["validation"],
                          batch_size=self.batch_size,
                          num_workers=self.num_workers,
                          worker_init_fn=init_fn,
                          shuffle=shuffle, persistent_workers=True)

    def _test_dataloader(self, shuffle=False):
        is_iterable_dataset = isinstance(self.datasets['train'], Txt2ImgIterableBaseDataset)
        if is_iterable_dataset or self.use_worker_init_fn:
            init_fn = worker_init_fn
        else:
            init_fn = None

        # do not shuffle dataloader for iterable dataset
        shuffle = shuffle and (not is_iterable_dataset)

        return DataLoader(self.datasets["test"], batch_size=self.batch_size,
                          num_workers=self.num_workers, worker_init_fn=init_fn, shuffle=shuffle, persistent_workers=True)

    def _predict_dataloader(self, shuffle=False):
        if isinstance(self.datasets['predict'], Txt2ImgIterableBaseDataset) or self.use_worker_init_fn:
            init_fn = worker_init_fn
        else:
            init_fn = None
        return DataLoader(self.datasets["predict"], batch_size=self.batch_size,
                          num_workers=self.num_workers, worker_init_fn=init_fn, persistent_workers=True)



if __name__ == "__main__":
    sys.path.append(os.getcwd())
    # parser = get_parser()
    # parser = Trainer.add_argparse_args(parser)

    opt, _ = get_parser().parse_known_args()

    nowname = f"{opt.name}"
    logdir = os.path.join(opt.logdir, nowname)
    ckptdir = os.path.join(logdir, "checkpoints")

    os.makedirs(logdir, exist_ok=True)
    os.makedirs(ckptdir, exist_ok=True)

    # Configs
    resume_path = './models/control_sd15_ini_2.ckpt' if not opt.sd_v2 else './models/control_sd21_ini.ckpt'
    # batch_size = 32
    learning_rate = 1e-4

logging improved.


In [2]:
# !python tool_add_control.py "C:\\Desktop\\Kanpur\\sem 9\\tushar sir pgp\\Prompt-Diffusion-main\\Prompt-Diffusion-main\\v1-5-pruned-emaonly.ckpt" "./models/control_sd15_ini.ckpt"

In [3]:
    model = create_model(opt.base).cpu()

No module 'xformers'. Proceeding without it.
ControlLDM: Running in eps-prediction mode
DiffusionWrapper has 859.52 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels
Loaded model config from [./models/cldm_v15_2.yaml]


In [4]:
    model.load_state_dict(load_state_dict(resume_path, location='cpu'))
    model.learning_rate = learning_rate
    model.sd_locked = opt.sd_locked
    model.only_mid_control = opt.only_mid_control

Loaded state_dict from [./models/control_sd15_ini_2.ckpt]


In [26]:
    model

ControlLDM(
  (model): DiffusionWrapper(
    (diffusion_model): ControlledUnetModel(
      (time_embed): Sequential(
        (0): Linear(in_features=320, out_features=1280, bias=True)
        (1): SiLU()
        (2): Linear(in_features=1280, out_features=1280, bias=True)
      )
      (input_blocks): ModuleList(
        (0): TimestepEmbedSequential(
          (0): Conv2d(4, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        )
        (1-2): 2 x TimestepEmbedSequential(
          (0): ResBlock(
            (in_layers): Sequential(
              (0): GroupNorm32(32, 320, eps=1e-05, affine=True)
              (1): SiLU()
              (2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            )
            (h_upd): Identity()
            (x_upd): Identity()
            (emb_layers): Sequential(
              (0): SiLU()
              (1): Linear(in_features=1280, out_features=320, bias=True)
            )
            (out_layers): Sequential(
       

In [4]:
    #need to download dataset for this cell to run

    data_config = OmegaConf.load(opt.data_config)
    dataloader = instantiate_from_config(data_config.data)
   # dataloader = instantiate_from_config(ds["train"])
    dataloader.prepare_data()
    dataloader.setup()
    print("#### Data #####")
    # for k in dataloader.datasets:
        # print(f"{k}, {dataloader.datasets[k].__class__.__name__}, {len(dataloader.datasets[k])}")

    

#### Data #####


In [5]:
    callbacks_cfg = {
        "checkpoint_callback": {
            "target": "pytorch_lightning.callbacks.ModelCheckpoint",
            "params": {
                "dirpath": ckptdir,
                "filename": "{epoch:06}-{step:09}",
                "verbose": True,
                'save_top_k': -1,
                'every_n_train_steps': 1000,
                'save_weights_only': True,
                "save_last": True,
            }
        },
        "image_logger": {
            "target": "cldm.logger.ImageLogger",
            "params": {
                "batch_frequency": 500,
                "max_images": 16,
                "clamp": True,
                "log_images_kwargs": {'N': 16,
                                      'unconditional_guidance_scale': 9.0}
            }
        },
    }

    callbacks = [instantiate_from_config(callbacks_cfg[k]) for k in callbacks_cfg]

    

In [6]:
    # need gpu's for this

    tb_logger = pl.loggers.TensorBoardLogger(save_dir=logdir)

    # trainer = pl.Trainer(gpus=opt.gpus, accelerator='ddp', num_nodes=opt.nnode,
    #                      max_steps=10000, check_val_every_n_epoch=2, accumulate_grad_batches=4,
    #                      precision=32, callbacks=callbacks, logger=tb_logger)
    trainer = pl.Trainer( num_nodes=opt.nnode,
                         max_steps=10000, check_val_every_n_epoch=2, accumulate_grad_batches=4,
                         precision=32, callbacks=callbacks, logger=tb_logger)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [7]:
    # model.model.diffusion_model.input_blocks

In [8]:
    #fix small parts of input and download data and get GPU and train model
#val loader works.  convert 1000 images parquet to png, run original data doader and shit and see what happens
    trainer.fit(model, dataloader)


  | Name              | Type               | Params | Mode 
-----------------------------------------------------------------
0 | model             | DiffusionWrapper   | 859 M  | train
1 | first_stage_model | AutoencoderKL      | 83.7 M | eval 
2 | cond_stage_model  | FrozenCLIPEmbedder | 123 M  | eval 
3 | control_model     | ControlNet         | 363 M  | train
-----------------------------------------------------------------
1.2 B     Trainable params
206 M     Non-trainable params
1.4 B     Total params
5,718.092 Total estimated model params size (MB)
1282      Modules in train mode
365       Modules in eval mode


Sanity Checking: |                                                                               | 0/? [00:00<…

C:\Users\Varada Agarwal\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\utilities\data.py:78: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Data shape for DDIM sampling is (1, 4, 32, 32), eta 0.0
Running DDIM Sampling with 50 timesteps


Training: |                                                                                      | 0/? [00:00<…

TypeError: LatentDiffusion.on_train_batch_start() missing 1 required positional argument: 'dataloader_idx'

In [None]:
# !pip install transformers
# !pip install eionops
# !pip install open_clip_torch
# !pip install omegaconf
# !pip install pytorch-lightning
# !pip install datasets

#change in two notebooks from origianl code:
# from pytorch_lightning.utilities import rank_zero_only



In [None]:
# from datasets import load_dataset

# ds = load_dataset("lansinuote/diffusion.8.instruct_pix2pix")

In [None]:
# ds["train"]["input"][999]

In [None]:
# import numpy as np
# import torch
# import torchvision
# from einops import rearrange
# from PIL import Image
# import pandas as pd
# import pyarrow.parquet as pq
# import io
# parquet_file_path = 'lansinuote___diffusion.8.instruct_pix2pix/default/0.0.0/f9080eb8f9223440366092de3156757998949cb2//diffusion.8.instruct_pix2pix-train.arrow'
# df = pd.read_parquet(parquet_file_path)

# image_0 = Image.open(io.BytesIO(df['input'][0]))
# image_1 = Image.open(io.BytesIO(df['output'][0]))

In [None]:
# import pyarrow as pa
# import pyarrow.dataset as ds

# # Reading the Arrow file directly
# dataset = ds.dataset(parquet_file_path)
# table = dataset.to_table()
# df = table.to_pandas()

# # Access the first image from the 'input' column
# input_image_data = df['input'][0]  # Adjust the index as needed
