In [1]:
# Import Cell - Patent Claim Generator
import os
import sys
import yaml
from pathlib import Path
from typing import List

# LangChain imports
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI


In [2]:

# Add project root to path for imports
project_root = Path.cwd().parent  # Assuming you're in the notebooks directory
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Import your prompt module
from src.prompt_engineering.pr_patent_claims import PATENT_CLAIM_GENERATION_PROMPT

# For notebook UI
from IPython.display import display, HTML
import ipywidgets as widgets

In [3]:
# Load Model Configuration
def load_model_config():
    config_path = project_root / "config" / "model_config.yaml"
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    return config

model_config = load_model_config()

In [4]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [5]:
# Set up OpenRouter API key
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    print("⚠️ OPENAI_API_KEY not found in environment variables")
    openai_api_key = input("Enter your OpenRouter API key: ")

# Set up model options from config
model_options = model_config['openai']['default_models']
default_model = model_options[0]  
default_model

'gpt-4.1-nano'

In [6]:
# Connect to ChromaDB
# Setup Vector Store Connection
import chromadb
from chromadb.config import Settings
import os

chroma_host = "vector_db"
chroma_port = 8000

chroma_client = chromadb.HttpClient(
    host=chroma_host,
    port=chroma_port,
    settings=Settings(
        anonymized_telemetry=False
    )
)


In [7]:
model_name = "sentence-transformers/all-mpnet-base-v2"
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"batch_size": 32, "normalize_embeddings": True}
)

  embeddings = HuggingFaceEmbeddings(


In [8]:
# Create LangChain Chroma instance
collection_name = "patents"  # Your existing collection
vectorstore = Chroma(
    client=chroma_client,
    collection_name=collection_name,
    embedding_function=embeddings
)

# Create a retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 10}
)

  vectorstore = Chroma(


In [9]:
# Initialize LLM with OpenRouter
def initialize_llm(model_name=default_model):
    llm = ChatOpenAI(
        temperature=0.2,
        model_name="gpt-4.1-nano",
        api_key=openai_api_key,
        base_url="https://openrouter.ai/api/v1",
    )
    return llm


In [10]:
# Create an enhanced prompt template that requests source attribution
enhanced_template = """
You are a patent claim generation assistant. Given a technical description of an invention and some relevant patent examples, your task is to generate comprehensive patent claims.

RELEVANT PATENT EXAMPLES:
{context}

USER'S INVENTION DESCRIPTION:
{description}

Follow these steps strictly:

1. **Analyze the Description**:
   - Identify the **core invention or novel idea**.
   - Detect all **technical components**, **methods**, or **features** that contribute to functionality or novelty.

2. **Generate Patent Claims**:
   - Write the claims in clear, formal language.
   - Start with **at least one independent claim** (method or system).
   - Follow with **dependent claims** that add details, such as:
     - Specific materials used
     - Geometric shapes
     - Placement techniques
     - Functional enhancements
     - Environmental variations (e.g., under heat or torque)
     - Multi-mode tuning
     - Manufacturing methods

3. **Structure**:
   - Number each claim clearly (e.g., Claim 1, Claim 2, ...)
   - Avoid repeating elements already claimed in parent claims

4. **Source Analysis**:
   - After all claims, include a section titled "SOURCE INFLUENCE ANALYSIS"
   - For each patent example that influenced your claims, list:
     - The patent ID
     - Which specific claims were influenced by it
     - What specific concepts or terminology were borrowed
    - and the full path of the file

Based on the user's description and the relevant patents, generate comprehensive patent claims:
"""



In [26]:
# Interactive UI for Patent Claim Generation

# Model selection dropdown
model_selector = widgets.Dropdown(
    options=model_options,
    value=default_model,
    description='Model:',
    style={'description_width': 'initial'}
)

# Create text area for input
description_input = widgets.Textarea(
    value='',
    placeholder='Enter your invention description here...',
    description='Description:',
    disabled=False,
    rows=10,
    style={'description_width': 'initial'}
)

# Create output area
output = widgets.Output()

# Generate button
generate_button = widgets.Button(
    description='Generate Patent Claims',
    button_style='primary',
    tooltip='Click to generate patent claims'
)

# Progress indicator
progress = widgets.IntProgress(
    value=0,
    min=0,
    max=100,
    description='Processing:',
    bar_style='info',
    style={'bar_color': '#0066cc'}
)

# Function to handle generation
def on_generate_button_clicked(description_input):
    output.clear_output()
    
    # Get selected model
    selected_model = model_selector.value
    global llm
    llm = initialize_llm(selected_model)
    
    # Enhanced document formatting to include more metadata
    def format_docs_with_metadata(docs):
        formatted = []
        for i, doc in enumerate(docs):
            metadata = doc.metadata
            formatted.append(
                f"PATENT EXAMPLE {i+1}:\n"
                f"ID: {metadata.get('doc_id', 'Unknown')}\n"
                f"ID: {metadata.get('filePath', 'Unknown')}\n"
                f"Title: {metadata.get('title', 'Unknown')}\n"
                f"Section: {metadata.get('section', 'Unknown')}\n"
                f"Content: {doc.page_content}\n"
            )
        return "\n\n".join(formatted)
    
    # Update your RAG chain with the enhanced components
    ENHANCED_RAG_PROMPT = PromptTemplate.from_template(enhanced_template)
    
    enhanced_rag_chain = (
        {"context": retriever | format_docs_with_metadata, "description": RunnablePassthrough()}
        | ENHANCED_RAG_PROMPT
        | llm
        | StrOutputParser()
    )
    
    description = description_input
    if not description or len(description) < 10:
        print("Please enter a more detailed description")
        return
    
    progress.value = 10
    print(f"🤖 Using model: {selected_model}")
    print("📚 Retrieving relevant patents...")
    
    progress.value = 30
    print("🔍 Analyzing technical description...")
    
    try:
        progress.value = 50
        # Run the RAG chain
        result = enhanced_rag_chain.invoke(description)
        
        progress.value = 90
        print("\n🎯 Generated Patent Claims:\n")
        print(result)
        
        progress.value = 100
    except Exception as e:
        print(f"❌ An error occurred: {str(e)}")
        progress.value = 0

result = on_generate_button_clicked()

# Example description
example_description = """
The present invention generally relates to shaft assemblies for transmitting rotary power in a driveline and more particularly to a method for attenuating driveline vibrations transmitted through a shaft assembly
"""

result = on_generate_button_clicked(example_description)
result
# Uncomment to pre-fill the description
# description_input.value = example_description

TypeError: on_generate_button_clicked() missing 1 required positional argument: 'description_input'

In [18]:
model_orchstra =  """

You are an intelligent orchestration agent in a larger AI system.

Your task is to analyze the user's input and determine what kind of information they have provided. Based on this classification, your response will guide the downstream flow of processing.

### 🎯 Your Output Must Be a Valid JSON Object:

```json
{{
  "scenario": "<desc | claims | both | chat>",
  "use_retriever": <true | false>,
  "reasoning": "<a short explanation of your classification>"
}}
````

### 📘 Definitions:

* `"desc"`: The input contains a description of the idea or invention (how it works, what it is, what it does).
* `"claims"`: The input contains one or more claim-like structures (e.g. legal, patent-style, or technical claim statements).
* `"both"`: The input contains both a description and one or more claims.
* `"chat"`: The input is conversational or does not contain either a clear description or claim.

### 🧠 Logic:

* If the input includes **description**, set `"scenario": "desc"` and `"use_retriever": true`
* If the input includes **claims**, set `"scenario": "claims"` and `"use_retriever": true`
* If the input includes **both**, set `"scenario": "both"` and `"use_retriever": true`
* If the input includes **neither**, set `"scenario": "chat"` and `"use_retriever": false`

If you are unsure between `"desc"` and `"claims"`, choose `"both"`.

### 📥 Input Block:

Evaluate this user input:

```
{user_input}
```

### 📤 Example Response Format:

```json
{{
  "scenario": "desc",
  "use_retriever": true,
  "reasoning": "The input contains a general technical description of a new idea, without formal claims."
}}
```

🚫 Do NOT include anything outside the JSON response.

```
"""

model_orchstra = PromptTemplate.from_template(model_orchstra)

In [19]:
llm = initialize_llm("gpt-4.1-nano")

# Create the chain with the prompt template
orchestrating_content = (
    {"user_input": RunnablePassthrough()}
    | model_orchstra
    | llm
    | StrOutputParser()
)




In [20]:
# Invoke with the user input
result_both = orchestrating_content.invoke({"""
The present invention generally relates to shaft assemblies for transmitting rotary power in a driveline and more particularly to a method for attenuating driveline vibrations transmitted through a shaft assembly

The elevator system (100) of any preceding claim, wherein after receiving a signal in response to a change of state of one or more of the safety devices (126a, 126b, 127, 129, 131, 138a, 138b, 140, 141), the safety controller (121) causes an alarm (139) to be triggered
"""})

result_both

'{\n  "scenario": "both",\n  "use_retriever": true,\n  "reasoning": "The input includes a technical description of an invention as well as claim-like statements referencing specific features and claims."\n}'

In [21]:
# Invoke with the user input
result_descrp = orchestrating_content.invoke({"""
The present invention generally relates to shaft assemblies for transmitting rotary power in a driveline and more particularly to a method for attenuating driveline vibrations transmitted through a shaft assembly
"""})

result_descrp

'{\n  "scenario": "desc",\n  "use_retriever": true,\n  "reasoning": "The input contains a technical description of an invention related to shaft assemblies and vibration attenuation, without specific claims."\n}'

In [22]:
# Invoke with the user input
result_claim = orchestrating_content.invoke({"""
The elevator system (100) of any preceding claim, wherein after receiving a signal in response to a change of state of one or more of the safety devices (126a, 126b, 127, 129, 131, 138a, 138b, 140, 141), the safety controller (121) causes an alarm (139) to be triggered
"""})

result_claim

'{\n  "scenario": "claims",\n  "use_retriever": true,\n  "reasoning": "The input contains a claim-like statement referencing specific components and their interactions, characteristic of patent claims."\n}'

In [23]:
# Invoke with the user input
result_chat = orchestrating_content.invoke({"""
The elevator system (100) of any preceding claim, wherein after receiving a signal in response to a change of state of one or more of the safety devices (126a, 126b, 127, 129, 131, 138a, 138b, 140, 141), the safety controller (121) causes an alarm (139) to be triggered
"""})

result_chat

'{\n  "scenario": "claims",\n  "use_retriever": true,\n  "reasoning": "The input contains a claim-like statement referencing specific components and functionalities, characteristic of technical claims."\n}'