<a href="https://colab.research.google.com/github/developerY/MojoMax/blob/main/MojoMax.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install modular --index-url https://dl.modular.com/public/nightly/python/simple/ --extra-index-url https://download.pytorch.org/whl/cpu

In [None]:
!nvidia-smi

In [None]:
import numpy as np
from max.driver import CPU, Accelerator, Tensor, accelerator_count
from max.dtype import DType
from max.engine import InferenceSession
from max.graph import DeviceRef, Graph, TensorType, ops

In [None]:
accelerator_count()

In [None]:
device = CPU() if accelerator_count() == 0 else Accelerator()
device

In [None]:
vector_width = 10
dtype = DType.float32

with Graph(
    "vector_addition",
    input_types=[
        TensorType(
            dtype,
            shape=[vector_width],
            device=DeviceRef.from_device(device),
        ),
        TensorType(
            dtype,
            shape=[vector_width],
            device=DeviceRef.from_device(device),
        ),
    ],
) as graph:
    lhs, rhs = graph.inputs
    output = lhs + rhs
    graph.output(output)

In [None]:
session = InferenceSession(
    devices=[device],
)

model = session.load(graph)

In [None]:
lhs_values = np.random.uniform(size=(vector_width)).astype(np.float32)
rhs_values = np.random.uniform(size=(vector_width)).astype(np.float32)

lhs_tensor = Tensor.from_numpy(lhs_values).to(device)
rhs_tensor = Tensor.from_numpy(rhs_values).to(device)

In [None]:
result = model.execute(lhs_tensor, rhs_tensor)[0]

result = result.to(CPU())

In [None]:
print("Left-hand-side values:")
print(lhs_values)
print()

print("Right-hand-side values:")
print(rhs_values)
print()

print("Graph result:")
print(result.to_numpy())
print()

print("Expected result:")
print(lhs_values + rhs_values)

In [None]:
from max.entrypoints.llm import LLM
from max.pipelines import PipelineConfig
from max.serve.config import Settings

In [None]:
model_path = "Qwen/Qwen2.5-0.5B-Instruct"
print(f"Loading model: {model_path}")
pipeline_config = PipelineConfig(model_path=model_path)
settings = Settings()
llm = LLM(settings, pipeline_config)

prompts = [
    "The fastest way to learn python is",
]

print("Generating responses...")
responses = llm.generate(prompts, max_new_tokens=50)

for i, (prompt, response) in enumerate(zip(prompts, responses)):
    print(f"========== Response {i} ==========")
    print(prompt + response)
    print()