Generate CAD models using Llama 3.1 and CadQuery library using Retrieval Augmented Generation (RAG). Use runtime: T4 GPU.


Step One: Install Dependencies and Setup Ollama

In [1]:
# Installing collab-xterm for the termainal access
!pip install colab-xterm
%load_ext colabxterm

#Install Cadquery library
!pip install cadquery

#Install LangChain and related packages
!pip install langchain langchain-community langchain-ollama chromadb

#Install Ollama
!curl https://ollama.ai/install.sh | sh

#Start Ollama server in the backgrounf
import os
os.system("ollama serve &")

#Pull Llama 3.1 8B model
!ollama pull llama3.1:8B

#Check if Ollama is running
!ollama list

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13281    0 13281    0     0  55647      0 --:--:-- --:--:-- --:--:-- 55569
>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?202

Step Two: Prepare cadquery documentation for RAG. Follow the steps below:
1. Initialize the Llama LLM from Ollama and Embeddings.
2. Initialize the chromadb client as our vector database for RAG.
3. Scrape and process the cadquery documentaiton.
4. If the scraping fails, create a simplified documentation.
5. Create embeddings and store in chromadb.


In [2]:
#pull dependencies
!ollama pull mxbai-embed-large
import chromadb
import requests
from bs4 import BeautifulSoup
from langchain_ollama import OllamaLLM, OllamaEmbeddings

#Initialize the Ollama LLM and Embedder
llm = OllamaLLM(model="llama3.1:8b")
embedder = OllamaEmbeddings(model="mxbai-embed-large")

#Initialize the chromadb client
client = chromadb.Client()

try:
  collection = client.get_collection(name= "cadquery_docs")
except:
  collection = client.create_collection(name= "cadquery_docs")

#scraping cadquery documentation
def scrape_cadquery_docs(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract text from paragraphs, headings, and code blocks
        docs = []
        for element in soup.find_all(['p', 'h1', 'h2', 'h3', 'pre']):
            text = element.get_text(strip=True)
            if text and len(text) > 20:  # Filter out short or empty texts
                docs.append(text)
        return docs
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return []

#Link to cadquery documentation, examples section.
cadquery_doc_url = "https://cadquery.readthedocs.io/en/latest/examples.html"

#process the documentation
cadquery_docs = scrape_cadquery_docs(cadquery_doc_url)


# Generate embeddings and store in ChromaDB
for i, doc in enumerate(cadquery_docs):
    embeddings = embedder.embed_query(doc)  # Use embed_query for single text input
    collection.add(ids=[str(i)], embeddings=[embeddings], documents=[doc])

print(f"Loaded {len(cadquery_docs)} CadQuery documentation snippets into vector store.")



[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h

Step 3: Prompting

Providing Few-shot examples to guide the LLM to generate correct cadquery code.

In [32]:
few_shot_examples = [
    {
        "prompt": "Create a cube with side length 20 mm and export it as a STEP file.",
        "code": """import cadquery as cq
result = cq.Workplane('XY').box(20, 20, 20)
cq.exporters.export(result, 'cube.step', cq.exporters.ExportTypes.STEP)
"""
    },
    {
        "prompt": "Create a cylinder with radius 10 mm and height 30 mm, then export it as a STEP file.",
        "code": """import cadquery as cq
result = cq.Workplane('XY').circle(10).extrude(30)
cq.exporters.export(result, 'cylinder.step', cq.exporters.ExportTypes.STEP)
"""
    },
    {
        "prompt": "Create a sphere with radius 15 mm and export it as a STEP file.",
        "code": """import cadquery as cq
result = cq.Workplane('XY').sphere(15)
cq.exporters.export(result, 'sphere.step', cq.exporters.ExportTypes.STEP)
"""
    },

]

# Format few-shot examples for the prompt
few_shot_prompt = "\n\n".join([
    f"Prompt: {ex['prompt']}\nCode:\n{ex['code']}"
    for ex in few_shot_examples
])

print("Few-shot examples prepared:")
print(few_shot_prompt)

Few-shot examples prepared:
Prompt: Create a cube with side length 20 mm and export it as a STEP file.
Code:
import cadquery as cq
result = cq.Workplane('XY').box(20, 20, 20)
cq.exporters.export(result, 'cube.step', cq.exporters.ExportTypes.STEP)


Prompt: Create a cylinder with radius 10 mm and height 30 mm, then export it as a STEP file.
Code:
import cadquery as cq
result = cq.Workplane('XY').circle(10).extrude(30)
cq.exporters.export(result, 'cylinder.step', cq.exporters.ExportTypes.STEP)


Prompt: Create a sphere with radius 15 mm and export it as a STEP file.
Code:
import cadquery as cq
result = cq.Workplane('XY').sphere(15)
cq.exporters.export(result, 'sphere.step', cq.exporters.ExportTypes.STEP)



Step Four: Create the RAG pipeline to combine documentation retrieval and few-shot examples that we have created and generate code using Llama.

In [33]:
!pip install --upgrade langchain
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from google.colab import files

#Initialize the Ollama LLM
llm = OllamaLLM(model="llama3.1:8b")

# Initialize the Ollama Embeddings
embedder = OllamaEmbeddings(model="mxbai-embed-large")

#Define the prompt template
template = """You are an expert in CadQuery, a Python library for parametric CAD modeling. Using the following CadQuery documentation and examples, generate correct CadQuery code for the user's prompt. Ensure the code includes exporting the model as a STEP file. Always import cadquery as cq and use the STEP export function: cq.exporters.export(result, 'model.step', cq.exporters.ExportTypes.STEP).

CadQuery Documentation:
{context}

Few-Shot Examples:
{few_shot}

User Prompt: {prompt}

Generated Code:
```python
# Your code here
```
"""
prompt = ChatPromptTemplate.from_template(template)

#Create the RAG chain
rag_chain = prompt | llm | StrOutputParser()

#Create a function to generate the cad code: Retrieve relevant documents -> Generate the code using RAG -> Extract the code from output
def generate_cad_code(user_prompt):
  #Retrieve relevant documents
  embeddings = embedder.embed_query(user_prompt)
  results = collection.query(query_embeddings=[embeddings], n_results=3)
  context = "\n".join(results["documents"][0])
  #Generate the code using RAG
  output = rag_chain.invoke({
        "context": context,
        "few_shot": few_shot_prompt,
        "prompt": user_prompt
    })
  #Extract code from the output
  code_start = output.find("```python") + 9
  code_end = output.find("```", code_start)
  code = output[code_start:code_end].strip()

  return code


#Testing the RAG pipeline
test_prompt = "Create a cube with side length 15 mm and export it as a STEP file."
generated_code = generate_cad_code(test_prompt)
print("Generated Code:")
print(generated_code)

Generated Code:
import cadquery as cq

result = cq.Workplane('XY').box(15, 15, 15)
cq.exporters.export(result, 'cube.step', cq.exporters.ExportTypes.STEP)


In [39]:
import cadquery as cq
from google.colab import files
import os

# Create a prompt and generate the code
prompt = "Create a sphere with radius 5 mm and export it as a STEP file."
code = generate_cad_code(prompt)
print("\nGenerated Code for Prompt:")
print(code)

#Execute the code
try:
    exec(code)
    print("CAD model generated successfully.")
    # Download the STEP file for the second prompt
    files.download(os.path.join(os.getcwd(), 'sphere_5mm.step')) #Replace 'plate.step' with desired file name.
except Exception as e:
    print(f"Error executing code: {e}")




Generated Code for Another Prompt:
import cadquery as cq
result = cq.Workplane('XY').sphere(5)
cq.exporters.export(result, 'sphere.step', cq.exporters.ExportTypes.STEP)
CAD model generated successfully.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>