Transformers library model falls into one of the below categories

- Autoregressive-models : Correspond to the decoder of the transformer model

- Autoencoding-models : Same as the encoder in transformer model

**Same arch** can be used as Autoregressive and AutoEncoding model

- seq-to-seq models : Use both encoder and decoder of the original transformer 

- multimodal models

- retrieval-based models

In [None]:
dict_models_tasks = {
    "autoregressive": [{"model":"originalGPT",
                        "datset":"Book Corpus",
                        "tasks":["language modelling", "multitask LM", "multichoice classification"],
                        "special_change": None,
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/gpt",
                        }, 
                        {"model":"GPT2",
                        "dataset": "WebText",
                        "tasks":["language modelling", "multitask LM", "multichoice classification"],
                        "special_change": None,
                        "link": "https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/gpt2",
                        }, 
                        {"model": "CTRL",
                        "dataset": "WebText",
                        "tasks": ["language modelling"],
                        "special_change": ["Introduces Control Code"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/ctrl",
                        },
                        {"model": "TransformerXL",
                        "dataset": "BookCorpus",
                        "tasks": ["language modelling"]
                        "special_change": ["Recurrence Mechanism", "Positional Relative Embedding"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/transformerxl",
                        },
                        {"model": "Reformer",
                        "dataset": "Unknown",
                        "tasks":["language modelling"]
                        "special_change":["Axial position embedding",
                        "local sensitive hashing attention",
                        "Feed forward by chunks, not batches",
                        "Intermediate results not Stored"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/reformer",
                        },
                        {"model":"XLNet",
                        "dataset": "Unknown",
                        "tasks":["language modelling", "token classification", 
                        "sentence classification", "multi-choice classification", 
                        "question answering"],
                        "special_change":["not traditional AR model", "builds on training strategy",],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/xlnet",
                        }],
    "autoencoding": [{"model":"BERT(bidirectional encoding representation from Transformers)",
                        "dataset":["brown corpus", "wikipedia"],
                        "tasks":["language modelling", "mask LM", "multichoice classification",
                        "sentence classification", "token classification", "question answering",
                        "next sentence prediction"],
                        "special_change": None,
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/bert",
                        },
                      {"model":"ALBERT",
                        "dataset":["brown corpus", "wikipedia"],
                        "tasks":["language modelling", "mask LM", "multichoice classification",
                        "sentence classification", "token classification", "question answering",
                        "next sentence prediction"],
                        "special_change": ["Embedding size is different", "next sentence pred is replaced with sentence ordering prediction",
                        "Layers are split in groups"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/albert",
                        },
                        {"model":"RoBERTa",
                        "dataset":["brown corpus", "wikipedia"],
                        "tasks":["mask LM", "multichoice classification",
                        "sentence classification", "token classification", "question answering",],
                        "special_change":["Dynamic Masking", "no NSP", "larger train batches", "BPE"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/roberta",
                        },
                        {"model":"DistilBert",
                        "dataset":["brown corpus", "wikipedia"],
                        "tasks":["mask LM","sentence classification", "token classification", "question answering",],
                        "special_change": ["trained by distillation of BERT", "predicting masked token",
                                        "cos similarity between student and teacher model"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/distilbert",
                        },
                        {"model":"ConvBERT",
                        "datset":"Unknown",
                        "tasks":["multitask LM", "token classification", "sentence classification"],
                        "special_change": ["attention heads replaced with span-based dynamic conv", 
                                            "helps to get local dependencies"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/convbert", 
                        },
                        {"model":"cross-ling language model XLM",
                        "datset":"Unknown",
                        "tasks":["language modelling", "multitask LM", "multichoice classification"],
                        "special_change": ["Causal LM AutoRegressive training", "MLM with 256 tokens",
                                            "MLM and TLM with random masking", "bi-lingual"]
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/xlm",
                        },
                        {"model":"XLM-Roberta",
                        "datset":"Unknown",
                        "tasks":["multitask LM", "token classification", "sentence classification", 
                                "multi-choice QA", "QA"],
                        "special_change": ["Using RoBERTa trick on XLM","no TLM objective", "100 langs trained"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/xlmroberta",
                        },
                        {"model":"FlauBERT",
                        "datset":["book corpus", "wiki pedia"],
                        "tasks":["language modelling", "sentence classification"],
                        "special_change": ["no sentence order prediction"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/flaubert",
                        },
                        {"model":"ELECTRA",
                        "datset":"inputs from another model",
                        "tasks":["masked language modelling", "sentence classification","token classification"],
                        "special_change": ["training is similar to GAN"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/electra",
                        },
                         {"model":"Funnel Transformer",
                        "datset":"unknown",
                        "tasks":["masked language modelling", "sentence classification",
                                "token classification","multi QA", "QA"],
                        "special_change": ["transformer with pooling", "layers in blocks",
                                            "final seq 1/4th of original"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/funnel",
                        },
                        {"model":"LongFormer",
                        "datset":['book corpus', 'wikipedia'],
                        "tasks":["mask LM", "multichoice classification",
                        "sentence classification", "token classification", "question answering",],
                        "special_change": ["replace attn mat with sparse mat to go faster"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/longformer",
                        },],
    "seq-2-seq":[{"model":"BART",
                        "dataset":"unknown",
                        "tasks":["conditional generation", "sequence classification"],
                        "special_change": ["Encoder Decoder model","multiple transformation involved"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/bart",
                        },
                  {"model":"Pegasus",
                        "dataset":"unknown",
                        "tasks":["conditional generation", "summarisation"],
                        "special_change": ["pretrained for Gap Seq Gen",
                                        "pretrained for masked language modelling"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/pegasus",
                        },      
                  {"model":"MarianMT in C++",
                        "dataset":"unknown",
                        "tasks":["conditional generation"],
                        "special_change": ["Modeled in C++"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/marian",
                        },
                  {"model":"T5",
                        "dataset":"unknown",
                        "tasks":["conditional generation"],
                        "special_change": ["supervised & self supervised training",
                                            "Glue / SuperGLUE tasks are converted to text to text"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/t5",
                        },
                  {"model":"MT5",
                        "dataset":"unknown",
                        "tasks":["conditional generation"],
                        "special_change": ["self supervised training",
                                            "Trained on 101 langs"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/mt5",
                        },
                 {"model":"MBART",
                        "dataset":"unknown",
                        "tasks":["conditional generation"],
                        "special_change": ["same as BART",
                                            "Trained on 25 langs"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/mbart",
                        },
                 {"model":"prophetnet",
                        "dataset":"unknown",
                        "tasks":["conditional generation", "summarization"],
                        "special_change": ["novel seq2seq pretraining",
                                            "predicts next n tokens based on prev n tokens",
                                            "main and n-stream self attention"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/prophetnet",
                        },
                 {"model":"xlm-prophetnet",
                        "dataset":"unknown",
                        "tasks":["multi-lingual conditional generation", "question generation", "headline generation"],
                        "special_change": ["same as prophetnet"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/xlmprophetnet",
                        },],
    "MultiModal":[{"model":"MMBT",
                   "dataset":"Unknown",
                   "tasks":["classification"],
                   "special_change":["tokenized text + final activation of resnet image",
                                    "segment embedding to tell text and image difference"],
                    "link":"https://arxiv.org/abs/1909.02950"
                    }],
    "Retrieval Based":[
                  {"model":"Dense Passage Retrieval",
                        "dataset":"unknown",
                        "tasks":["Not implemented"],
                        "special_change": ["Question Encoder", "Context Encoder", "reader"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/dpr",
                        },
                        {"model":"Retrieval Aug Gen for Knowledge Intensive Tasks",
                        "dataset":"unknown",
                        "tasks":["generation", "summarization"],
                        "special_change": ["RAG extracts the text",
                        "seq2Seq model reads and generates output",
                        "RAG-Token and RAG-Sequence"],
                        "link":"https://huggingface.co/docs/transformers/v4.15.0/en/model_doc/rag",
                        },]                       
},