In [2]:
from mlx_lm import load, generate
import litellm
from litellm import CustomLLM, ModelResponse, completion

class MLXLiteLLM(CustomLLM):

    def __init__(self, mlx_model_name):
        super().__init__()
        self.mlx_model_name = mlx_model_name
        self.model, self.tokenizer = load(self.mlx_model_name)


    def completion(self, *args, **kwargs) -> ModelResponse:
        # LiteLLM passes messages parameter instead of prompt
        if "messages" in kwargs:
            messages = kwargs["messages"]
        elif "prompt" in kwargs:
            # Fallback for direct calls with prompt
            messages = [{"role": "user", "content": kwargs["prompt"]}]
        else:
            raise ValueError("Either 'messages' or 'prompt' must be provided")
            
        prompt = self.tokenizer.apply_chat_template(
            messages, add_generation_prompt=True
        )

        text = generate(self.model, self.tokenizer, prompt=prompt)

        return ModelResponse(
            model = self.mlx_model_name,
            completion = text
        )
    
mlx_litellm = MLXLiteLLM(mlx_model_name = "mlx-community/Mistral-7B-Instruct-v0.3-4bit")

litellm.custom_provider_map = [
    {"provider": "mlx-litellm", "custom_handler": mlx_litellm}
]

resp = completion(
    model = 'mlx-litellm/my-model',
    messages = [{"role": "user", "content": "What is the capital of Illinois?"}]
)

Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 201096.77it/s]


In [3]:
resp

ModelResponse(id='chatcmpl-46d8b5c7-1150-42e6-94cc-d95b77f599a1', created=1741726996, model='mlx-community/Mistral-7B-Instruct-v0.3-4bit', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content=None, role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None))], usage=Usage(completion_tokens=0, prompt_tokens=0, total_tokens=0, completion_tokens_details=None, prompt_tokens_details=None), completion='The capital of Illinois is Springfield. It has been the capital since Illinois became a state in 1818. Springfield is located in the central part of the state.')

In [1]:
from mlx_lm import load, generate

model, tokenizer = load("mlx-community/Mistral-7B-Instruct-v0.3-4bit")

prompt = "Write a story about Einstein"

messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
    messages, add_generation_prompt=True
)

text = generate(model, tokenizer, prompt=prompt, verbose=True)
text

  from .autonotebook import tqdm as notebook_tqdm
Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 180123.48it/s]


Title: A Brush with Genius: The Encounter of the Young Artist and Albert Einstein

In the quaint town of Bern, Switzerland, nestled amidst the picturesque Alps, lived a young, aspiring artist named Max. Max was a peculiar child, not because of his artistic talents, but because of his insatiable curiosity and an unusual friend.

Max's friend was none other than Albert Einstein, the renowned physicist who would one day change the world with his theory of relativity. At the time, however, Einstein was just a humble patent clerk, struggling to make ends meet.

Max and Einstein shared a unique bond. Every day, Max would visit Einstein at the patent office, where Einstein would explain his complex theories and equations to Max, who would then attempt to illustrate them in his art. Max found beauty in Einstein's work, and Einstein found clarity in Max's interpretation.

One day, as Max was sketching Einstein's latest equation, he found himself stuck. The equation was complex, and Max struggle

'Title: A Brush with Genius: The Encounter of the Young Artist and Albert Einstein\n\nIn the quaint town of Bern, Switzerland, nestled amidst the picturesque Alps, lived a young, aspiring artist named Max. Max was a peculiar child, not because of his artistic talents, but because of his insatiable curiosity and an unusual friend.\n\nMax\'s friend was none other than Albert Einstein, the renowned physicist who would one day change the world with his theory of relativity. At the time, however, Einstein was just a humble patent clerk, struggling to make ends meet.\n\nMax and Einstein shared a unique bond. Every day, Max would visit Einstein at the patent office, where Einstein would explain his complex theories and equations to Max, who would then attempt to illustrate them in his art. Max found beauty in Einstein\'s work, and Einstein found clarity in Max\'s interpretation.\n\nOne day, as Max was sketching Einstein\'s latest equation, he found himself stuck. The equation was complex, and

In [None]:
text