In [20]:
import xgboost as xgb
import pandas as pd
from datetime import datetime

df = pd.read_csv('./cleaned.csv')
raw = pd.read_csv('./mock.csv')

model = xgb.XGBRanker()
model.load_model('./ranker.json')

In [21]:
data = {
    "user_id": "user_1",
    "events": [
        {
            "ts": "2025-03-05 13:20:17+00:00",
            "endpoint": "GET /invoices",
            "sesson_id": "76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33",
            "params": {'board_id': '123'}
        },
        {
            "ts": "2025-03-05 13:22:01+00:00",
            "endpoint": "PUT /invoices/123/status",
            "sesson_id": "76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33",
            "params": {
                "status": "DRAFT"
            }
        }
    ],
    "prompt": "Let's finish billing for Q2",
    "spec_url": "https://raw.githubusercontent.com/damoonsh/OS-Next-Action/refs/heads/main/specs/ops.yaml",
    "k": 5
}

In [22]:
def build_endpoint_map(doc_str):
    endpoint_map = {}

    for line in doc_str.splitlines():
        line = line.strip()
        if not line or line.startswith('#'):
            continue
        if line.startswith('-'):
            parts = line[2:].split(' ', 2)
            if len(parts) < 2:
                continue
            method, path = parts[0], parts[1]

            if method not in endpoint_map:
                endpoint_map[method] = []
            endpoint_map[method].append(path)

    return endpoint_map

def match_endpoint(request_str, endpoint_map):
    parts = request_str.strip().split(' ', 1)
    if len(parts) < 2:
        return None

    method, request_path = parts[0], parts[1]
    candidates = endpoint_map.get(method, [])
    request_segments = request_path.strip('/').split('/')

    for template in candidates:
        template_segments = template.strip('/').split('/')
        if len(request_segments) != len(template_segments):
            continue
        match = True
        for seg, temp_seg in zip(request_segments, template_segments):
            if temp_seg.startswith('{') and temp_seg.endswith('}'):
                continue
            if seg != temp_seg:
                match = False
                break
        if match:
            return template 

    return None

doc = """# Invoice Management  
- GET /invoices/ - Retrieve all invoices with optional filtering parameters  
- POST /invoices/ - Create a new invoice with customer and line item details  
- GET /invoices/{invoice_id} - Retrieve a specific invoice by ID  
- PUT /invoices/{invoice_id} - Update an existing invoice  
- PATCH /invoices/{invoice_id} - Partially update invoice fields  
- DELETE /invoices/{invoice_id} - Delete an invoice  
- GET /invoices/{invoice_id}/status - Get the current status of a specific invoice  
- PUT /invoices/{invoice_id}/status - Update the status of a specific invoice  

# Cost Management  
- GET /costs/{service_id} - Retrieve total costs for a specific service with date filtering  
- POST /costs/{service_id} - Add new cost entry for a service  
- DELETE /costs/{service_id} - Delete all cost entries for a service (requires confirmation)  
- PUT /costs/{service_id}/{cost_id} - Update an existing cost entry  
- PATCH /costs/{service_id}/{cost_id} - Partially update cost entry fields  
- DELETE /costs/{service_id}/{cost_id} - Remove a specific cost entry  

# Revenue Management  
- GET /revenue/{service_id} - Retrieve total revenue for a specific service with date filtering  
- POST /revenue/{service_id} - Record new revenue entry for a service  
- PUT /revenue/{service_id}/{revenue_id} - Update an existing revenue record  
- PATCH /revenue/{service_id}/{revenue_id} - Partially update revenue record  
- DELETE /revenue/{service_id}/{revenue_id} - Remove a specific revenue entry  

# Payment Processing  
- GET /payments/ - List all payments with status, method, and date-range filters  
- POST /payments/ - Process a new payment for an invoice  
- GET /payments/{payment_id} - Retrieve details of a specific payment  
- PATCH /payments/{payment_id} - Partially update payment details  
- DELETE /payments/{payment_id} - Cancel/delete a payment  

# Account Management  
- GET /accounts/ - Retrieve all financial accounts  
- GET /accounts/{account_id} - Get specific account details and current balance  
- PATCH /accounts/{account_id} - Update account information  
- DELETE /accounts/{account_id} - Close/delete an account  

# Budget Planning and Tracking  
- GET /budgets/ - List all budgets with category and period filters  
- POST /budgets/ - Create a new budget  
- GET /budgets/{budget_id} - Retrieve specific budget details  
- PUT /budgets/{budget_id} - Update existing budget allocations  
- PATCH /budgets/{budget_id} - Partially update budget  
- DELETE /budgets/{budget_id} - Delete a budget 
"""

In [None]:
endpoint_map = build_endpoint_map(doc)
for idx in range(len(data['events'])):
    method = data['events'][idx]['endpoint'].split(" ")[0]
    data['events'][idx]['endpoint_abstract'] = f"{method} {match_endpoint(data['events'][idx]['endpoint'], endpoint_map)}"

In [24]:
data

{'user_id': 'user_1',
 'events': [{'ts': '2025-03-05 13:20:17+00:00',
   'endpoint': 'GET /invoices',
   'sesson_id': '76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33',
   'params': {'board_id': '123'},
   'endpoint_abstract': 'GET /invoices/'},
  {'ts': '2025-03-05 13:22:01+00:00',
   'endpoint': 'PUT /invoices/123/status',
   'sesson_id': '76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33',
   'params': {'status': 'DRAFT'},
   'endpoint_abstract': 'PUT /invoices/{invoice_id}/status'}],
 'prompt': "Let's finish billing for Q2",
 'spec_url': 'https://raw.githubusercontent.com/damoonsh/OS-Next-Action/refs/heads/main/specs/ops.yaml',
 'k': 5}

In [None]:
import pandas as pd
from datetime import datetime

def process_events_and_query(data):
    global raw
    
    user_id = data["user_id"] 
    events = data["events"]
    
    new_rows = []
    for event in events:
        new_row = {
            'session_id': event.get('sesson_id', event.get('session_id', '')), # Handle typo in sesson_id
            'user_id': user_id,
            'timestamp': event['ts'],
            'action': event.get('endpoint_abstract', event.get('endpoint', '')),
            'parameters': str(list(event.get('params', [])))
        }
        new_rows.append(new_row)
    
    print(f"\nAdding {len(new_rows)} new rows from events:")
    for i, row in enumerate(new_rows):
        print(f"{i+1}. {row}")
    
    # Add new rows to raw df
    print(new_rows)
    new_df = pd.DataFrame(new_rows)
    print(new_df)
    raw_updated = pd.concat([raw, new_df], ignore_index=True)
    print(raw_updated.iloc[-3:])
    
    # Apply the processing steps exactly as specified
    df = raw_updated.sort_values(['user_id', 'timestamp']).reset_index(drop=True)
    df['prev_action_1'] = df.groupby('user_id')['action'].shift(1)
    df['prev_action_2'] = df.groupby('user_id')['action'].shift(2)
    df['prev_action_3'] = df.groupby('user_id')['action'].shift(3)
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='ISO8601')
    df['day'] = df['timestamp'].dt.day
    df['month'] = df['timestamp'].dt.month
    df['week'] = df['timestamp'].dt.isocalendar().week
    df['year'] = df['timestamp'].dt.year
    df['seconds_passed'] = (df['timestamp'].dt.hour * 3600 + 
                           df['timestamp'].dt.minute * 60 + 
                           df['timestamp'].dt.second)
    df['pa1_ss'] = df['session_id'] == df.groupby('user_id')['session_id'].shift(1)
    df['pa2_ss'] = df['session_id'] == df.groupby('user_id')['session_id'].shift(2)
    df['pa3_ss'] = df['session_id'] == df.groupby('user_id')['session_id'].shift(3)
    
    row = df.iloc[-1]
    
    # Convert to inference format
    X_inference = {
        'seconds_passed': int(row['seconds_passed']),
        'pa1_ss': float(row['pa1_ss']),
        'pa2_ss': float(row['pa2_ss']), 
        'pa3_ss': float(row['pa3_ss']),
        'day': int(row['day']),
        'month': int(row['month']),
        'week': int(row['week']),
        'year': int(row['year']),
        'prev_action_1': row['prev_action_1'] if pd.notna(row['prev_action_1']) else '',
        'prev_action_2': row['prev_action_2'] if pd.notna(row['prev_action_2']) else '',
        'prev_action_3': row['prev_action_3'] if pd.notna(row['prev_action_3']) else '',
        'para1': '',  # Will be filled from current action parameters
        'para2': '',
        'para3': ''
    }
    
    # Extract parameters from the current action (the row's action, not the latest event)
    current_action_params = row['parameters']
    if current_action_params and current_action_params != "[]":
        # Parse the parameters string (it's stored as a string representation of a list)
        import ast
        try:
            params_list = ast.literal_eval(current_action_params)
            X_inference['para1'] = params_list[0] if len(params_list) > 0 else ''
            X_inference['para2'] = params_list[1] if len(params_list) > 1 else ''
            X_inference['para3'] = params_list[2] if len(params_list) > 2 else ''
        except:
            # If parsing fails, leave empty
            pass
    
    return X_inference


In [26]:
X = process_events_and_query(data)


Adding 2 new rows from events:
1. {'session_id': '76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33', 'user_id': 'user_1', 'timestamp': '2025-03-05 13:20:17+00:00', 'action': 'GET /invoices/', 'parameters': "['board_id']"}
2. {'session_id': '76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33', 'user_id': 'user_1', 'timestamp': '2025-03-05 13:22:01+00:00', 'action': 'PUT /invoices/{invoice_id}/status', 'parameters': "['status']"}
[{'session_id': '76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33', 'user_id': 'user_1', 'timestamp': '2025-03-05 13:20:17+00:00', 'action': 'GET /invoices/', 'parameters': "['board_id']"}, {'session_id': '76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33', 'user_id': 'user_1', 'timestamp': '2025-03-05 13:22:01+00:00', 'action': 'PUT /invoices/{invoice_id}/status', 'parameters': "['status']"}]
                             session_id user_id                  timestamp  \
0  76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33  user_1  2025-03-05 13:20:17+00:00   
1  76dbd4b8-18f3-4e6b-bf1d-94b2412a4e33  user_1  2025-03-05 13:22

In [None]:
def generate_history(events):
    lines = ["Event Timeline (from earliest to latest):"]
    
    for event in events:
        endpoint = event['endpoint_abstract']
        params = event['params']
        
        param_str = " ".join([f'{k}:{v}' for k,v in params.items()])
        
        if param_str != " ":
            lines.append(f'{endpoint} with params: {param_str}\n')
        else:
            lines.append(f'{endpoint}\n')
            
    return "\n".join(lines)

In [41]:
generate_history(data['events'])

['Event Timeline (from earliest to latest):', 'GET /invoices/ with params: board_id:123\n', 'PUT /invoices/{invoice_id}/status with params: status:DRAFT\n']


'Event Timeline (from earliest to latest):\nGET /invoices/ with params: board_id:123\n\nPUT /invoices/{invoice_id}/status with params: status:DRAFT\n'

[] History based on events
[] Spec
[] user_prompt
[] exclude methods or endpoints

In [None]:
BASE_PROMPT = """
You are an intelligent API endpoint selector for a SaaS application. Your role is to analyze user interaction history and a next possible action ranking based on the context, previous actions, ranking given by XGBRanker, and available API specifications.

## Instructions:

1. **Context Analysis**: Carefully examine the interaction history to understand:
   - What endpoints were previously called and with which parameters (shown in curly braces {{}})
   - The sequence of user actions and their outcomes
   - Any patterns or workflows the user is following
   - Current state of the application based on previous API calls

2. **Parameter Extraction**: Pay special attention to parameters passed in previous interactions:
   - Extract IDs, filters, and values from previous endpoint calls
   - Consider how these parameters influence the next logical step
   - Identify any missing parameters that might be needed

3. **Endpoint Selection Logic**: 
   - Choose endpoints that logically follow from the user's current workflow
   - Consider CRUD operation sequences (e.g., POST → GET → PATCH → DELETE)
   - Prioritize endpoints that complete user goals or provide necessary follow-up actions
   - Account for business logic constraints (e.g., can't delete invoices that aren't in 'draft' status)

4. **Response Format**: Provide your response as a JSON object with:
   - `selected_endpoint`: The HTTP method and path of the recommended endpoint
   - `reasoning`: Brief explanation of why this endpoint was chosen
   - `suggested_parameters`: Any parameters that should be included based on context
   - `confidence_level`: High/Medium/Low based on how certain you are about the selection

## Interaction History:

{history}

## Available API Specifications:

{api_specs}

## Additional Context:
{user_prompt_addition}
- Rerank if the result of XGBRanking does not make sense but explain in reasoning why a certain rank differs from XGBRanker.
- Consider the current user's workflow state and business logic requirements
- If multiple endpoints seem equally valid, prioritize those that:
  1. Complete the current user task
  2. Provide essential follow-up information
  3. Enable the next logical step in the business process
- Account for any error conditions or validation requirements mentioned in the API specs
- Consider data dependencies between endpoints (e.g., needing invoice_id from previous GET /invoices/ call)

## XGBRanker 
We have trained an XGBRanker on our data that has ranked the actions as such:
{XGB_rankings}

## Output Format:
Return a JSON array containing the most likely actions ordered by probability (most likely first), incorporate the values passed by XGBRanker. Each action should include the endpoint and reasoning:
{exclude_delete}
{{
  {{"action": "GET /invoices/123/", "reasoning": "User just created invoice 123 and likely wants to view the complete details"}},
  {{"action": "PATCH /invoices/123/status", "reasoning": "Natural next step would be to update the invoice status from draft to pending"}},
  {{"action": "GET /invoices/123/line-items/", "reasoning": "User might want to review or modify the line items of the newly created invoice"}}
}}

Analyze the interaction history and return your top 3 most likely next actions with their reasoning.
"""