diff --git a/README.md b/README.md
index 450098a0662..928abee3cf4 100644
--- a/README.md
+++ b/README.md
@@ -175,6 +175,13 @@ not supported.
 
 8. Point your browser to http://localhost:9090 to bring up the web interface.
 9. Type `banana sushi` in the box on the top left and click `Invoke`.
+10. For running Onnx Pipeline need to
+    i. configure the utils.py in openvino installed packages at line 33 and add line "os.environ["PATH"] = os.path.abspath(lib_path) + ";" + os.environ["PATH"]"
+    ii. Copy models weight file into the root directory of InvokeAI.
+    iii. To invoke Onnx pipeline use the following flag
+        ```terminal
+        invokeai --modeltype "Onnx"
+        ```
 
 Be sure to activate the virtual environment each time before re-launching InvokeAI,
 using `source .venv/bin/activate` or `.venv\Scripts\activate`.
diff --git a/invokeai/backend/__init__.py b/invokeai/backend/__init__.py
index dd126a322da..b5826921274 100644
--- a/invokeai/backend/__init__.py
+++ b/invokeai/backend/__init__.py
@@ -1,7 +1,8 @@
 """
 Initialization file for invokeai.backend
 """
-from .generate import Generate
+from .pytorch import Pytorch
+from .onnx import ONNX
 from .generator import (
     InvokeAIGeneratorBasicParams,
     InvokeAIGenerator,
diff --git a/invokeai/backend/args.py b/invokeai/backend/args.py
index eb8b396ee09..09722f323a2 100644
--- a/invokeai/backend/args.py
+++ b/invokeai/backend/args.py
@@ -462,6 +462,13 @@ def _create_arg_parser(self):
         general_group.add_argument(
             "--version", "-V", action="store_true", help="Print InvokeAI version number"
         )
+        model_group.add_argument(
+            "--modeltype",
+            dest="modelType",
+            default="Pytorch",
+            choices=['Pytorch','Onnx'],
+            help="Forces to use Pytorch inference by default. Choose Onnx for onnx pipeline",
+        )
         model_group.add_argument(
             "--root_dir",
             default=None,
diff --git a/invokeai/backend/inferencePipeline.py b/invokeai/backend/inferencePipeline.py
new file mode 100644
index 00000000000..8735b704f22
--- /dev/null
+++ b/invokeai/backend/inferencePipeline.py
@@ -0,0 +1,18 @@
+"""
+This file implements base class for Inference pipeline
+Implements abstract methods for inference related operations
+"""
+from abc import ABC, abstractmethod
+class inferencePipeline(ABC):
+
+    """
+    Instantiation of Inference Pipeline class
+    """
+
+    @abstractmethod
+    def prompt2image(self):
+        pass
+
+    @abstractmethod
+    def getCompleter(self):
+        pass
diff --git a/invokeai/backend/onnx.py b/invokeai/backend/onnx.py
new file mode 100644
index 00000000000..cd465b2c7d8
--- /dev/null
+++ b/invokeai/backend/onnx.py
@@ -0,0 +1,102 @@
+"""
+This class is derived from Inference Model class
+Implements ONNX inference pipeline
+"""
+import traceback
+import time
+import sys
+import os
+import importlib
+
+from diffusers import OnnxStableDiffusionPipeline
+
+utils = importlib.import_module('openvino.utils')
+utils.add_openvino_libs_to_path()
+
+from .inferencePipeline import inferencePipeline
+from ..frontend.CLI.readline import Completer, get_completer
+
+class ONNX(inferencePipeline) :
+    """
+    Instantiation of Onnx model class
+    """
+    def __init__(
+        self,
+        model=None,
+        sampler_name="k_lms",
+        precision="auto",
+        outdir="outputs/",
+        num_images=1,
+        steps=50,
+    ):
+        self.height = 512
+        self.width = 512
+        self.iterations = 1
+        self.steps = 50
+        self.sampler_name = sampler_name
+        self.precision = precision
+        self.model_type = "Onnx"
+        #Set precision for ONNX inference
+        self.outdir = outdir
+        self.precision = "float32"
+        fallback = "runwayml/stable-diffusion-v1-5"
+        self.model = model or fallback
+        self.model_name = model or fallback
+        self.num_images_per_prompt = num_images
+        self.num_inference_steps = steps
+
+    def prompt2image(
+        self,
+        prompt,
+        iterations=None,
+        steps=None,
+        image_callback=None,
+        step_callback=None,
+        outdir=None,
+        width=None,
+        height=None,
+        sampler_name=None,
+        model=None,
+        precision=None,
+        catch_interrupts=False,
+        **args,
+    ):
+        steps = steps or self.steps
+        width = width or self.width
+        height = height or self.height
+        iterations = iterations or self.iterations
+        outdir = outdir or self.outdir
+        sampler_name = self.sampler_name or sampler_name
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+        print("Output directory: ", outdir)
+        tic = time.time()
+        try:
+            if precision == "cpu":
+                onnxPipeline = OnnxStableDiffusionPipeline.from_pretrained(self.model, revision="onnx", provider="CPUExecutionProvider")
+            else:
+                onnxPipeline = OnnxStableDiffusionPipeline.from_pretrained(self.model, revision="onnx", provider="OpenVINOExecutionProvider")
+
+            image = onnxPipeline(prompt, height=height, width=width, num_images_per_prompt=iterations, num_inference_steps=steps).images[0]
+            timestamp = int(time.time())
+            image.save(f"{outdir}/Inference_{timestamp}.png")
+
+        except KeyboardInterrupt:
+
+            if catch_interrupts:
+                print("**Interrupted** Partial results will be returned.")
+            else:
+                raise KeyboardInterrupt
+        except RuntimeError:
+
+            print(traceback.format_exc(), file=sys.stderr)
+            print(">> Could not generate image.")
+        toc = time.time()
+        print("\n>> Usage stats:")
+        print(f">> image(s) generated in", "%4.2fs" % (toc - tic))
+
+    def getCompleter(self, opt):
+        """
+        Invocation of completer
+        """
+        return get_completer(opt, models=[])
diff --git a/invokeai/backend/generate.py b/invokeai/backend/pytorch.py
similarity index 86%
rename from invokeai/backend/generate.py
rename to invokeai/backend/pytorch.py
index 4f3df60f1cc..da651db4404 100644
--- a/invokeai/backend/generate.py
+++ b/invokeai/backend/pytorch.py
@@ -1,3 +1,7 @@
+"""
+This class is derived from Inference Model class
+Implements pytorch inference pipeline
+"""
 # Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
 # Derived from source code carrying the following copyrights
 # Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich
@@ -27,7 +31,6 @@
 from omegaconf import OmegaConf
 from pathlib import Path
 
-import invokeai.backend.util.logging as logger
 from .args import metadata_from_png
 from .generator import infill_methods
 from .globals import Globals, global_cache_dir
@@ -38,6 +41,9 @@
 from .prompting.conditioning import log_tokenization
 from .stable_diffusion import HuggingFaceConceptsLibrary
 from .util import choose_precision, choose_torch_device
+from ..frontend.CLI.readline import Completer, get_completer
+from .globals import Globals, global_config_dir
+from .inferencePipeline import inferencePipeline
 
 def fix_func(orig):
     if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
@@ -129,13 +135,10 @@ def new_func(*args, **kw):
           )
 
 """
-
-
-class Generate:
-    """Generate class
-    Stores default values for multiple configuration items
+class Pytorch(inferencePipeline):
+    """
+    Instantiation of Inference model class
     """
-
     def __init__(
         self,
         model=None,
@@ -192,16 +195,17 @@ def __init__(
         self.karras_max = None
         self.infill_method = None
 
+        #Initiates Pytorch inference
         # Note that in previous versions, there was an option to pass the
         # device to Generate(). However the device was then ignored, so
         # it wasn't actually doing anything. This logic could be reinstated.
         self.device = torch.device(choose_torch_device())
-        logger.info(f"Using device_type {self.device.type}")
+        print(f">> Using device_type {self.device.type}")
         if full_precision:
             if self.precision != "auto":
                 raise ValueError("Remove --full_precision / -F if using --precision")
-            logger.warning("Please remove deprecated --full_precision / -F")
-            logger.warning("If auto config does not work you can use --precision=float32")
+            print("Please remove deprecated --full_precision / -F")
+            print("If auto config does not work you can use --precision=float32")
             self.precision = "float32"
         if self.precision == "auto":
             self.precision = choose_precision(self.device)
@@ -209,13 +213,13 @@ def __init__(
 
         if is_xformers_available():
             if torch.cuda.is_available() and not Globals.disable_xformers:
-                logger.info("xformers memory-efficient attention is available and enabled")
+                print(">> xformers memory-efficient attention is available and enabled")
             else:
-                logger.info(
-                    "xformers memory-efficient attention is available but disabled"
+                print(
+                    ">> xformers memory-efficient attention is available but disabled"
                 )
         else:
-            logger.info("xformers not installed")
+            print(">> xformers not installed")
 
         # model caching system for fast switching
         self.model_manager = ModelManager(
@@ -230,8 +234,8 @@ def __init__(
         fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME
         model = model or fallback
         if not self.model_manager.valid_model(model):
-            logger.warning(
-                f'"{model}" is not a known model name; falling back to {fallback}.'
+            print(
+                f'** "{model}" is not a known model name; falling back to {fallback}.'
             )
             model = None
         self.model_name = model or fallback
@@ -247,10 +251,14 @@ def __init__(
 
         # load safety checker if requested
         if safety_checker:
-            logger.info("Initializing NSFW checker")
+            print(">> Initializing NSFW checker")
             self.safety_checker = SafetyChecker(self.device)
         else:
-            logger.info("NSFW checker is disabled")
+            print(">> NSFW checker is disabled")
+
+    def getCompleter(self, opt):
+        completer = get_completer(opt, models=self.model_manager.list_models())
+        return completer
 
     def prompt2png(self, prompt, outdir, **kwargs):
         """
@@ -281,6 +289,61 @@ def img2img(self, prompt, **kwargs):
         ), "call to img2img() must include the init_img argument"
         return self.prompt2png(prompt, outdir, **kwargs)
 
+    # This routine performs any patch-ups needed after installation
+    def run_patches():
+        # install ckpt configuration files that may have been added to the
+        # distro after original root directory configuration
+        import invokeai.configs as conf
+        from shutil import copyfile
+
+        root_configs = Path(global_config_dir(), 'stable-diffusion')
+        repo_configs = Path(conf.__path__[0], 'stable-diffusion')
+        if not root_configs.exists():
+            os.makedirs(root_configs, exist_ok=True)
+        for src in repo_configs.iterdir():
+            dest = root_configs / src.name
+            if not dest.exists():
+                copyfile(src, dest)
+
+    @classmethod
+    def start(self, opt, args):
+        if args.laion400m:
+            print(
+                "--laion400m flag has been deprecated. Please use --model laion400m instead."
+            )
+            sys.exit(-1)
+        if args.weights:
+            print(
+                "--weights argument has been deprecated. Please edit ./configs/models.yaml, and select the weights using --model instead."
+            )
+            sys.exit(-1)
+        if args.max_loaded_models is not None:
+            if args.max_loaded_models <= 0:
+                print("--max_loaded_models must be >= 1; using 1")
+                args.max_loaded_models = 1
+
+        # alert - setting a few globals here
+        Globals.try_patchmatch = args.patchmatch
+        Globals.always_use_cpu = args.always_use_cpu
+        Globals.disable_xformers = not args.xformers
+        Globals.sequential_guidance = args.sequential_guidance
+        Globals.ckpt_convert = True  # always true now
+
+        # run any post-install patches needed
+        self.run_patches()
+
+        if opt.seamless:
+            print(">> changed to seamless tiling mode")
+
+        # normalize the config directory relative to root
+        if not os.path.isabs(opt.conf):
+            opt.conf = os.path.normpath(os.path.join(Globals.root, opt.conf))
+
+        # migrate legacy models
+        ModelManager.migrate_models()
+
+        return opt
+
     def prompt2image(
         self,
         # these are common
@@ -568,7 +631,7 @@ def process_image(image,seed):
             self.clear_cuda_cache()
 
             if catch_interrupts:
-                logger.warning("Interrupted** Partial results will be returned.")
+                print("**Interrupted** Partial results will be returned.")
             else:
                 raise KeyboardInterrupt
         except RuntimeError:
@@ -576,11 +639,11 @@ def process_image(image,seed):
             self.clear_cuda_cache()
 
             print(traceback.format_exc(), file=sys.stderr)
-            logger.info("Could not generate image.")
+            print(">> Could not generate image.")
 
         toc = time.time()
-        logger.info("Usage stats:")
-        logger.info(f"{len(results)} image(s) generated in "+"%4.2fs" % (toc - tic))
+        print("\n>> Usage stats:")
+        print(f">>   {len(results)} image(s) generated in", "%4.2fs" % (toc - tic))
         self.print_cuda_stats()
         return results
 
@@ -610,16 +673,16 @@ def clear_cuda_stats(self):
     def print_cuda_stats(self):
         if self._has_cuda():
             self.gather_cuda_stats()
-            logger.info(
-                "Max VRAM used for this generation: "+
-                "%4.2fG. " % (self.max_memory_allocated / 1e9)+
-                "Current VRAM utilization: "+
-                "%4.2fG" % (self.memory_allocated / 1e9)
+            print(
+                ">>   Max VRAM used for this generation:",
+                "%4.2fG." % (self.max_memory_allocated / 1e9),
+                "Current VRAM utilization:",
+                "%4.2fG" % (self.memory_allocated / 1e9),
             )
 
-            logger.info(
-                "Max VRAM used since script start: " +
-                "%4.2fG" % (self.session_peakmem / 1e9)
+            print(
+                ">>   Max VRAM used since script start: ",
+                "%4.2fG" % (self.session_peakmem / 1e9),
             )
 
     # this needs to be generalized to all sorts of postprocessors, which should be wrapped
@@ -648,7 +711,7 @@ def apply_postprocessor(
             seed = random.randrange(0, np.iinfo(np.uint32).max)
 
         prompt = opt.prompt or args.prompt or ""
-        logger.info(f'using seed {seed} and prompt "{prompt}" for {image_path}')
+        print(f'>> using seed {seed} and prompt "{prompt}" for {image_path}')
 
         # try to reuse the same filename prefix as the original file.
         # we take everything up to the first period
@@ -697,8 +760,8 @@ def apply_postprocessor(
                 try:
                     extend_instructions[direction] = int(pixels)
                 except ValueError:
-                    logger.warning(
-                        'invalid extension instruction. Use <directions> <pixels>..., as in "top 64 left 128 right 64 bottom 64"'
+                    print(
+                        '** invalid extension instruction. Use <directions> <pixels>..., as in "top 64 left 128 right 64 bottom 64"'
                     )
 
             opt.seed = seed
@@ -721,8 +784,8 @@ def apply_postprocessor(
             # fetch the metadata from the image
             generator = self.select_generator(embiggen=True)
             opt.strength = opt.embiggen_strength or 0.40
-            logger.info(
-                f"Setting img2img strength to {opt.strength} for happy embiggening"
+            print(
+                f">> Setting img2img strength to {opt.strength} for happy embiggening"
             )
             generator.generate(
                 prompt,
@@ -749,12 +812,12 @@ def apply_postprocessor(
             return restorer.process(opt, args, image_callback=callback, prefix=prefix)
 
         elif tool is None:
-            logger.warning(
-                "please provide at least one postprocessing option, such as -G or -U"
+            print(
+                "* please provide at least one postprocessing option, such as -G or -U"
             )
             return None
         else:
-            logger.warning(f"postprocessing tool {tool} is not yet supported")
+            print(f"* postprocessing tool {tool} is not yet supported")
             return None
 
     def select_generator(
@@ -798,8 +861,8 @@ def _make_images(
         image = self._load_img(img)
 
         if image.width < self.width and image.height < self.height:
-            logger.warning(
-                f"img2img and inpainting may produce unexpected results with initial images smaller than {self.width}x{self.height} in both dimensions"
+            print(
+                f">> WARNING: img2img and inpainting may produce unexpected results with initial images smaller than {self.width}x{self.height} in both dimensions"
             )
 
         # if image has a transparent area and no mask was provided, then try to generate mask
@@ -810,8 +873,8 @@ def _make_images(
         if (image.width * image.height) > (
             self.width * self.height
         ) and self.size_matters:
-            logger.info(
-                "This input is larger than your defaults. If you run out of memory, please use a smaller image."
+            print(
+                ">> This input is larger than your defaults. If you run out of memory, please use a smaller image."
             )
             self.size_matters = False
 
@@ -892,11 +955,11 @@ def set_model(self, model_name):
         try:
             model_data = cache.get_model(model_name)
         except Exception as e:
-            logger.warning(f"model {model_name} could not be loaded: {str(e)}")
+            print(f"** model {model_name} could not be loaded: {str(e)}")
             print(traceback.format_exc(), file=sys.stderr)
             if previous_model_name is None:
                 raise e
-            logger.warning("trying to reload previous model")
+            print("** trying to reload previous model")
             model_data = cache.get_model(previous_model_name)  # load previous
             if model_data is None:
                 raise e
@@ -963,15 +1026,15 @@ def upscale_and_reconstruct(
                     if self.gfpgan is not None or self.codeformer is not None:
                         if facetool == "gfpgan":
                             if self.gfpgan is None:
-                                logger.info(
-                                    "GFPGAN not found. Face restoration is disabled."
+                                print(
+                                    ">> GFPGAN not found. Face restoration is disabled."
                                 )
                             else:
                                 image = self.gfpgan.process(image, strength, seed)
                         if facetool == "codeformer":
                             if self.codeformer is None:
-                                logger.info(
-                                    "CodeFormer not found. Face restoration is disabled."
+                                print(
+                                    ">> CodeFormer not found. Face restoration is disabled."
                                 )
                             else:
                                 cf_device = (
@@ -985,7 +1048,7 @@ def upscale_and_reconstruct(
                                     fidelity=codeformer_fidelity,
                                 )
                     else:
-                        logger.info("Face Restoration is disabled.")
+                        print(">> Face Restoration is disabled.")
                 if upscale is not None:
                     if self.esrgan is not None:
                         if len(upscale) < 2:
@@ -998,10 +1061,10 @@ def upscale_and_reconstruct(
                             denoise_str=upscale_denoise_str,
                         )
                     else:
-                        logger.info("ESRGAN is disabled. Image not upscaled.")
+                        print(">> ESRGAN is disabled. Image not upscaled.")
             except Exception as e:
-                logger.info(
-                    f"Error running RealESRGAN or GFPGAN. Your image was not upscaled.\n{e}"
+                print(
+                    f">> Error running RealESRGAN or GFPGAN. Your image was not upscaled.\n{e}"
                 )
 
             if image_callback is not None:
@@ -1067,17 +1130,17 @@ def _set_scheduler(self):
         if self.sampler_name in scheduler_map:
             sampler_class = scheduler_map[self.sampler_name]
             msg = (
-                f"Setting Sampler to {self.sampler_name} ({sampler_class.__name__})"
+                f">> Setting Sampler to {self.sampler_name} ({sampler_class.__name__})"
             )
             self.sampler = sampler_class.from_config(self.model.scheduler.config)
         else:
             msg = (
-                f" Unsupported Sampler: {self.sampler_name} "+
+                f">> Unsupported Sampler: {self.sampler_name} "
                 f"Defaulting to {default}"
             )
             self.sampler = default
 
-        logger.info(msg)
+        print(msg)
 
         if not hasattr(self.sampler, "uses_inpainting_model"):
             # FIXME: terrible kludge!
@@ -1086,17 +1149,17 @@ def _set_scheduler(self):
     def _load_img(self, img) -> Image:
         if isinstance(img, Image.Image):
             image = img
-            logger.info(f"using provided input image of size {image.width}x{image.height}")
+            print(f">> using provided input image of size {image.width}x{image.height}")
         elif isinstance(img, str):
-            assert os.path.exists(img), f"{img}: File not found"
+            assert os.path.exists(img), f">> {img}: File not found"
 
             image = Image.open(img)
-            logger.info(
-                f"loaded input image of size {image.width}x{image.height} from {img}"
+            print(
+                f">> loaded input image of size {image.width}x{image.height} from {img}"
             )
         else:
             image = Image.open(img)
-            logger.info(f"loaded input image of size {image.width}x{image.height}")
+            print(f">> loaded input image of size {image.width}x{image.height}")
         image = ImageOps.exif_transpose(image)
         return image
 
@@ -1184,14 +1247,14 @@ def _check_for_erasure(self, image: Image.Image) -> bool:
 
     def _transparency_check_and_warning(self, image, mask, force_outpaint=False):
         if not mask:
-           logger.info(
-                "Initial image has transparent areas. Will inpaint in these regions."
+            print(
+                ">> Initial image has transparent areas. Will inpaint in these regions."
             )
-           if (not force_outpaint) and self._check_for_erasure(image):
-                logger.info(
-                    "Colors underneath the transparent region seem to have been erased.\n" +
-                    "Inpainting will be suboptimal. Please preserve the colors when making\n" +
-                    "a transparency mask, or provide mask explicitly using --init_mask (-M)."
+            if (not force_outpaint) and self._check_for_erasure(image):
+                print(
+                    ">> WARNING: Colors underneath the transparent region seem to have been erased.\n",
+                    ">>          Inpainting will be suboptimal. Please preserve the colors when making\n",
+                    ">>          a transparency mask, or provide mask explicitly using --init_mask (-M).",
                 )
 
     def _squeeze_image(self, image):
@@ -1202,11 +1265,11 @@ def _squeeze_image(self, image):
 
     def _fit_image(self, image, max_dimensions):
         w, h = max_dimensions
-        logger.info(f"image will be resized to fit inside a box {w}x{h} in size.")
+        print(f">> image will be resized to fit inside a box {w}x{h} in size.")
         # note that InitImageResizer does the multiple of 64 truncation internally
         image = InitImageResizer(image).resize(width=w, height=h)
-        logger.info(
-            f"after adjusting image dimensions to be multiples of 64, init image is {image.width}x{image.height}"
+        print(
+            f">> after adjusting image dimensions to be multiples of 64, init image is {image.width}x{image.height}"
         )
         return image
 
@@ -1217,8 +1280,8 @@ def _resolution_check(self, width, height, log=False):
         )  # resize to integer multiple of 64
         if h != height or w != width:
             if log:
-                logger.info(
-                    f"Provided width and height must be multiples of 64. Auto-resizing to {w}x{h}"
+                print(
+                    f">> Provided width and height must be multiples of 64. Auto-resizing to {w}x{h}"
                 )
             height = h
             width = w
diff --git a/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py b/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py
index dfd19ea9641..69386e54952 100644
--- a/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py
+++ b/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py
@@ -10,7 +10,7 @@
 import psutil
 import torch
 from compel.cross_attention_control import Arguments
-from diffusers.models.attention_processor import AttentionProcessor
+from diffusers.models.cross_attention import AttnProcessor
 from torch import nn
 
 import invokeai.backend.util.logging as logger
@@ -344,7 +344,7 @@ def get_invokeai_attention_mem_efficient(self, q, k, v):
 def restore_default_cross_attention(
     model,
     is_running_diffusers: bool,
-    restore_attention_processor: Optional[AttentionProcessor] = None,
+    restore_attention_processor: Optional[AttnProcessor] = None,
 ):
     if is_running_diffusers:
         unet = model
@@ -546,7 +546,7 @@ def get_mem_free_total(device):
 
 
 class InvokeAIDiffusersCrossAttention(
-    diffusers.models.attention.Attention, InvokeAICrossAttentionMixin
+    diffusers.models.attention.CrossAttention, InvokeAICrossAttentionMixin
 ):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
@@ -597,9 +597,9 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
 from dataclasses import dataclass, field
 
 import torch
-from diffusers.models.attention_processor import (
-    Attention,
-    AttnProcessor,
+from diffusers.models.cross_attention import (
+    CrossAttention,
+    CrossAttnProcessor,
     SlicedAttnProcessor,
 )
 
@@ -649,7 +649,7 @@ class SlicedSwapCrossAttnProcesser(SlicedAttnProcessor):
 
     def __call__(
         self,
-        attn: Attention,
+        attn: CrossAttention,
         hidden_states,
         encoder_hidden_states=None,
         attention_mask=None,
diff --git a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
index b0c85e9fd39..1cc9d82f402 100644
--- a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
+++ b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import torch
-from diffusers.models.attention_processor import AttentionProcessor
+from diffusers.models.cross_attention import AttnProcessor
 from typing_extensions import TypeAlias
 
 import invokeai.backend.util.logging as logger
@@ -102,7 +102,7 @@ def custom_attention_context(
 
     def override_cross_attention(
         self, conditioning: ExtraConditioningInfo, step_count: int
-    ) -> Dict[str, AttentionProcessor]:
+    ) -> Dict[str, AttnProcessor]:
         """
         setup cross attention .swap control. for diffusers this replaces the attention processor, so
         the previous attention processor is returned so that the caller can restore it later.
@@ -119,7 +119,7 @@ def override_cross_attention(
         )
 
     def restore_default_cross_attention(
-        self, restore_attention_processor: Optional["AttentionProcessor"] = None
+        self, restore_attention_processor: Optional["AttnProcessor"] = None
     ):
         self.conditioning = None
         self.cross_attention_control_context = None
diff --git a/invokeai/frontend/CLI/CLI.py b/invokeai/frontend/CLI/CLI.py
index aa0c4bea5f9..7a576df5ce7 100644
--- a/invokeai/frontend/CLI/CLI.py
+++ b/invokeai/frontend/CLI/CLI.py
@@ -18,7 +18,7 @@
 import invokeai.version as invokeai
 import invokeai.backend.util.logging as logger
 
-from ...backend import Generate, ModelManager
+from ...backend import ModelManager
 from ...backend.args import Args, dream_cmd_from_png, metadata_dumps, metadata_from_png
 from ...backend.globals import Globals, global_config_dir
 from ...backend.image_util import (
@@ -30,6 +30,8 @@
 from ...backend.stable_diffusion import PipelineIntermediateState
 from ...backend.util import url_attachment_name, write_log
 from .readline import Completer, get_completer
+from ...backend.onnx import ONNX
+from ...backend.pytorch import Pytorch
 
 # global used in multiple functions (fix)
 infile = None
@@ -41,36 +43,28 @@ def main():
 
     opt = Args()
     args = opt.parse_args()
+    modeltype = None
     if not args:
         sys.exit(-1)
 
-    if args.laion400m:
-        print(
-            "--laion400m flag has been deprecated. Please use --model laion400m instead."
-        )
-        sys.exit(-1)
-    if args.weights:
-        print(
-            "--weights argument has been deprecated. Please edit ./configs/models.yaml, and select the weights using --model instead."
-        )
-        sys.exit(-1)
-    if args.max_loaded_models is not None:
-        if args.max_loaded_models <= 0:
-            print("--max_loaded_models must be >= 1; using 1")
-            args.max_loaded_models = 1
-
-    # alert - setting a few globals here
-    Globals.try_patchmatch = args.patchmatch
-    Globals.always_use_cpu = args.always_use_cpu
-    Globals.internet_available = args.internet_available and check_internet()
-    Globals.disable_xformers = not args.xformers
-    Globals.sequential_guidance = args.sequential_guidance
-    Globals.ckpt_convert = True  # always true now
+    # load the infile as a list of lines
+    if opt.infile:
+        try:
+            if os.path.isfile(opt.infile):
+                infile = open(opt.infile, "r", encoding="utf-8")
+            elif opt.infile == "-":  # stdin
+                infile = sys.stdin
+            else:
+                raise FileNotFoundError(f"{opt.infile} not found.")
+        except (FileNotFoundError, IOError) as e:
+            print(f"{e}. Aborting.")
+            sys.exit(-1)
 
-    # run any post-install patches needed
-    run_patches()
+    print(f">> {invokeai.__app_name__}, version {invokeai.__version__}")
+    print(f'>> InvokeAI runtime directory is "{Globals.root}"')
 
-    logger.info(f"Internet connectivity is {Globals.internet_available}")
+    Globals.internet_available = args.internet_available and check_internet()
+    print(f">> Internet connectivity is {Globals.internet_available}")
 
     if not args.conf:
         config_file = os.path.join(Globals.root, "configs", "models.yaml")
@@ -79,95 +73,101 @@ def main():
                 opt, FileNotFoundError(f"The file {config_file} could not be found.")
             )
 
-    logger.info(f"{invokeai.__app_name__}, version {invokeai.__version__}")
-    logger.info(f'InvokeAI runtime directory is "{Globals.root}"')
+    # normalize the config directory relative to root
+    if not os.path.isabs(opt.conf):
+        opt.conf = os.path.normpath(os.path.join(Globals.root, opt.conf))
 
-    # loading here to avoid long delays on startup
-    # these two lines prevent a horrible warning message from appearing
-    # when the frozen CLIP tokenizer is imported
-    import transformers  # type: ignore
+    if opt.modelType == "Pytorch":
+        #invocation of pytorch model
+        opt = Pytorch.start(opt, args)
 
-    transformers.logging.set_verbosity_error()
-    import diffusers
+        # Loading Face Restoration and ESRGAN Modules
+        gfpgan, codeformer, esrgan = load_face_restoration(opt)
 
-    diffusers.logging.set_verbosity_error()
+        if opt.embeddings:
+            if not os.path.isabs(opt.embedding_path):
+                embedding_path = os.path.normpath(
+                    os.path.join(Globals.root, opt.embedding_path)
+                )
+            else:
+                embedding_path = opt.embedding_path
+        else:
+            embedding_path = None
 
-    # Loading Face Restoration and ESRGAN Modules
-    gfpgan, codeformer, esrgan = load_face_restoration(opt)
+        try:
+            modeltype = Pytorch(
+                conf=opt.conf,
+                model=opt.model,
+                sampler_name=opt.sampler_name,
+                embedding_path=embedding_path,
+                full_precision=opt.full_precision,
+                precision=opt.precision,
+                gfpgan=gfpgan,
+                codeformer=codeformer,
+                esrgan=esrgan,
+                free_gpu_mem=opt.free_gpu_mem,
+                safety_checker=opt.safety_checker,
+                max_loaded_models=opt.max_loaded_models,
+            )
+        except (FileNotFoundError, TypeError, AssertionError) as e:
+            report_model_error(opt, e)
+        except (IOError, KeyError) as e:
+            print(f"{e}. Aborting.")
+            sys.exit(-1)
 
-    # normalize the config directory relative to root
-    if not os.path.isabs(opt.conf):
-        opt.conf = os.path.normpath(os.path.join(Globals.root, opt.conf))
+        # preload the model
+        # loading here to avoid long delays on startup
+        try:
+            modeltype.load_model()
+        except KeyError:
+            pass
+        except Exception as e:
+            report_model_error(opt, e)
 
-    if opt.embeddings:
-        if not os.path.isabs(opt.embedding_path):
-            embedding_path = os.path.normpath(
-                os.path.join(Globals.root, opt.embedding_path)
+        # try to autoconvert new models
+        if path := opt.autoimport:
+            modeltype.model_manager.heuristic_import(
+                str(path), convert=False, commit_to_conf=opt.conf
             )
-        else:
-            embedding_path = opt.embedding_path
-    else:
-        embedding_path = None
 
-    # migrate legacy models
-    ModelManager.migrate_models()
+        if path := opt.autoconvert:
+            modeltype.model_manager.heuristic_import(
+                str(path), convert=True, commit_to_conf=opt.conf
+            )
 
-    # load the infile as a list of lines
-    if opt.infile:
+        # Loading Face Restoration and ESRGAN Modules
+        gfpgan, codeformer, esrgan = load_face_restoration(opt)
+
+        # web server loops forever
+        print("web and gui options: ", opt.gui, opt.web)
+        if opt.web or opt.gui:
+            invoke_ai_web_server_loop(modeltype, gfpgan, codeformer, esrgan)
+            sys.exit(0)
+
+    elif opt.modelType == "Onnx":
+        #Invocation of onnx pipeline
         try:
-            if os.path.isfile(opt.infile):
-                infile = open(opt.infile, "r", encoding="utf-8")
-            elif opt.infile == "-":  # stdin
-                infile = sys.stdin
-            else:
-                raise FileNotFoundError(f"{opt.infile} not found.")
-        except (FileNotFoundError, IOError) as e:
-            logger.critical('Aborted',exc_info=True)
+            modeltype = ONNX(
+                model=opt.model,
+                precision=opt.precision,
+            )
+        except (FileNotFoundError, TypeError, AssertionError) as e:
+            report_model_error(opt, e)
+        except (IOError, KeyError) as e:
+            print(f"{e}. Aborting.")
             sys.exit(-1)
-
-    # creating a Generate object:
-    try:
-        gen = Generate(
-            conf=opt.conf,
-            model=opt.model,
-            sampler_name=opt.sampler_name,
-            embedding_path=embedding_path,
-            full_precision=opt.full_precision,
-            precision=opt.precision,
-            gfpgan=gfpgan,
-            codeformer=codeformer,
-            esrgan=esrgan,
-            free_gpu_mem=opt.free_gpu_mem,
-            safety_checker=opt.safety_checker,
-            max_loaded_models=opt.max_loaded_models,
-        )
-    except (FileNotFoundError, TypeError, AssertionError) as e:
-        report_model_error(opt, e)
-    except (IOError, KeyError):
-        logger.critical("Aborted",exc_info=True)
+    else:
+        print(" Aborting. modelType chosen is not defined.")
         sys.exit(-1)
 
-    if opt.seamless:
-        logger.info("Changed to seamless tiling mode")
-
-    # preload the model
-    try:
-        gen.load_model()
-    except KeyError:
-        pass
-    except Exception as e:
-        report_model_error(opt, e)
+    # these two lines prevent a horrible warning message from appearing
+    # when the frozen CLIP tokenizer is imported
+    import transformers  # type: ignore
 
-    # try to autoconvert new models
-    if path := opt.autoconvert:
-        gen.model_manager.heuristic_import(
-            str(path), commit_to_conf=opt.conf
-        )
+    transformers.logging.set_verbosity_error()
+    import diffusers
 
-    # web server loops forever
-    if opt.web or opt.gui:
-        invoke_ai_web_server_loop(gen, gfpgan, codeformer, esrgan)
-        sys.exit(0)
+    diffusers.logging.set_verbosity_error()
 
     if not infile:
         print(
@@ -175,7 +175,7 @@ def main():
         )
 
     try:
-        main_loop(gen, opt)
+        main_loop(modeltype, opt)
     except KeyboardInterrupt:
         print(
             f'\nGoodbye!\nYou can start InvokeAI again by running the "invoke.bat" (or "invoke.sh") script from {Globals.root}'
@@ -192,12 +192,12 @@ def main_loop(gen, opt):
     path_filter = re.compile(r'[<>:"/\\|?*]')
     last_results = list()
 
+    completer = gen.getCompleter(opt)
     # The readline completer reads history from the .dream_history file located in the
     # output directory specified at the time of script launch. We do not currently support
     # changing the history file midstream when the output directory is changed.
-    completer = get_completer(opt, models=gen.model_manager.list_models())
     set_default_output_dir(opt, completer)
-    if gen.model:
+    if gen.model and isinstance(gen, Pytorch):
         add_embedding_terms(gen, completer)
     output_cntr = completer.get_current_history_length() + 1
 
@@ -437,8 +437,10 @@ def image_writer(
                         catch_interrupts=catch_ctrl_c,
                         **vars(opt),
                     )
-                except (PromptParser.ParsingException, pyparsing.ParseException):
-                    logger.error("An error occurred while processing your prompt",exc_info=True)
+                    sys.exit(-1)
+                except (PromptParser.ParsingException, pyparsing.ParseException) as e:
+                    print("** An error occurred while processing your prompt **")
+                    print(f"** {str(e)} **")
             elif operation == "postprocess":
                 logger.info(f"fixing {opt.prompt}")
                 opt.last_operation = do_postprocess(gen, opt, image_writer)
@@ -484,6 +486,19 @@ def image_writer(
         f'\nGoodbye!\nYou can start InvokeAI again by running the "invoke.bat" (or "invoke.sh") script from {Globals.root}'
     )
 
+def check_internet() -> bool:
+        """
+        Return true if the internet is reachable.
+        It does this by pinging huggingface.co.
+        """
+        import urllib.request
+
+        host = "http://huggingface.co"
+        try:
+            urllib.request.urlopen(host, timeout=1)
+            return True
+        except:
+            return False
 
 # TO DO: remove repetitive code and the awkward command.replace() trope
 # Just do a simple parse of the command!
@@ -1058,7 +1073,7 @@ def get_next_command(infile=None, model_name="no model") -> str:  # command stri
     return command
 
 
-def invoke_ai_web_server_loop(gen: Generate, gfpgan, codeformer, esrgan):
+def invoke_ai_web_server_loop(gen, gfpgan, codeformer, esrgan):
     print("\n* --web was specified, starting web server...")
     from invokeai.backend.web import InvokeAIWebServer
 
@@ -1256,36 +1271,5 @@ def report_model_error(opt: Namespace, e: Exception):
     main()  # would rather do a os.exec(), but doesn't exist?
     sys.exit(0)
 
-
-def check_internet() -> bool:
-    """
-    Return true if the internet is reachable.
-    It does this by pinging huggingface.co.
-    """
-    import urllib.request
-
-    host = "http://huggingface.co"
-    try:
-        urllib.request.urlopen(host, timeout=1)
-        return True
-    except:
-        return False
-
-# This routine performs any patch-ups needed after installation
-def run_patches():
-    # install ckpt configuration files that may have been added to the
-    # distro after original root directory configuration
-    import invokeai.configs as conf
-    from shutil import copyfile
-
-    root_configs = Path(global_config_dir(), 'stable-diffusion')
-    repo_configs = Path(conf.__path__[0], 'stable-diffusion')
-    if not root_configs.exists():
-        os.makedirs(root_configs, exist_ok=True)
-    for src in repo_configs.iterdir():
-        dest = root_configs / src.name
-        if not dest.exists():
-            copyfile(src, dest)
-
 if __name__ == "__main__":
     main()
diff --git a/pyproject.toml b/pyproject.toml
index fd671fee23e..07b5d54b6fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,9 +38,9 @@ dependencies = [
   "albumentations",
   "click",
   "clip_anytorch",          # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
- "compel~=1.1.5",
+  "compel==1.0.1",
   "datasets",
-  "diffusers[torch]~=0.16.1",
+  "diffusers[torch]~=0.14",
   "dnspython==2.2.1",
   "einops",
   "eventlet",
@@ -78,6 +78,9 @@ dependencies = [
   "transformers~=4.26",
   "uvicorn[standard]==0.21.1",
   "windows-curses; sys_platform=='win32'",
+  "onnxruntime-openvino",
+  "openvino==2022.3.0",
+  "diffusers==0.14.0"
 ]
 
 [project.optional-dependencies]