## Using a more complex HF model: `DeepSeek-R1`

In [None]:
from transformers import pipeline, AutoModelForCausalLM, AutoConfig
import optimum

print("Optimum version:", optimum)

DEEP_SEEK_MODEL_KEY = "deepseek-ai/DeepSeek-R1"

# Use trust_remote_code to allow loading custom configuration and model from the repository
config = AutoConfig.from_pretrained(DEEP_SEEK_MODEL_KEY, trust_remote_code=True)

# Override because default of "fp8" is not recognized by
# config.quantization_config["quant_method"] = "bitsandbytes_8bit"
# config.quantization_config["quant_method"] = "default"
# config.quantization_config["quant_method"] = "gptq"
# config.quantization_config = {
#     "quant_method": "gptq",
#     "bits": 4,  # Define the quantization bit-width, e.g., 4 bits
#     "group_size": -1,  # Example value; adjust for your model requirements
#     # You can add other optional parameters here depending on GPTQConfig
# }


# 'awq', 'bitsandbytes_4bit', 'bitsandbytes_8bit', 'gptq', 'aqlm', 'quanto', 'eetq', 'higgs', 'hqq', 'compressed-tensors', 'fbgemm_fp8', 'torchao', 'bitnet', 'vptq'

config.quantization_config = {
#    "quant_method": "default",
    "quant_method": "bitsandbytes_8bit",
#    "quant_method": "aqlm",
    "bits": 8,  # Define the quantization bit-width, e.g., 4 bits
    "group_size": -1,  # Example value; adjust for your model requirements
    # You can add other optional parameters here depending on GPTQConfig
}

model = AutoModelForCausalLM.from_pretrained(DEEP_SEEK_MODEL_KEY,
                                             trust_remote_code=True,
                                             ignore_mismatched_sizes=True,
                                             config=config  # use tweaked config
                                             )

# Hugging Face pipeline now works with the custom model
pipe = pipeline("text-generation", model=model, tokenizer=DEEP_SEEK_MODEL_KEY, trust_remote_code=True)
#
# Generate text: for DeepSeek
output = pipe("Generate a sample text using DeepSeek-R1.",
              truncation=True,
              max_length=50,
              num_return_sequences=1,
              temperature=0.6,
              #              quantization_type="gptq",
              trust_remote_code=True
              )

#output = pipe("Test the custom DeepSeek-R1 model", max_length=50)

print(output)
