In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [43]:
from notebooks.engines.sailor_data_engineer import RouteGenConfig, SailorDataEngineer

_config = RouteGenConfig.from_env()
engineer = SailorDataEngineer(_config, cache_key='test_model', route_description="flight agency admin panel")

route_context = await engineer.generate_data(20, 200)

In [44]:
from sailor import RouteDocumentor
from sklearn.model_selection import train_test_split
from sailor.types.route_context import NavigationContext

if route_context is None:
	raise ValueError("route_context must be generated first")

train_sessions, test_sessions = train_test_split(route_context.sessions, test_size=0.2, random_state=14)
train_context = NavigationContext(routes=route_context.routes, sessions=train_sessions)

documentor = RouteDocumentor(train_context)
labels = documentor.fit_transform()

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('svc', LinearSVC(class_weight='balanced', max_iter=2000)),
])

pipeline.fit(documentor.documents, labels)

In [None]:
from sklearn.metrics import classification_report

test_queries = [s.context for s in test_sessions]
_predictions = pipeline.predict(test_queries)

_targets = documentor.transform([s.target for s in test_sessions])
print(classification_report(_targets, _predictions, target_names=documentor.labels_))

                   precision    recall  f1-score   support

admin-dashboard-5       1.00      1.00      1.00         7
  airport-info-10       0.69      0.85      0.76        13
booking-details-2       0.89      0.94      0.91        17
   deals-offers-9       0.83      0.50      0.62        10
  flight-search-1       1.00      0.80      0.89        15
  flight-status-6       0.73      1.00      0.84         8
    help-center-8       0.83      1.00      0.91        10
loyalty-program-7       1.00      0.91      0.95        11
payment-process-3       1.00      0.93      0.96        14
   user-profile-4       0.80      0.80      0.80         5

         accuracy                           0.87       110
        macro avg       0.88      0.87      0.87       110
     weighted avg       0.89      0.87      0.87       110



In [None]:
from typing import List
import numpy as np

from sailor.types.route_context import RouteContextResult

def _scored_routes(scores) -> List[RouteContextResult]:
    sorted_index = np.argsort(scores)[::-1]
    routes: List[RouteContextResult] = []
    for i in sorted_index:
      route = documentor.inverse_transform(i)
      if route is not None:
        route = route.copy_with_score(float(scores[i]))
        routes.append(route)
    return routes

def predict(query: str) -> List[RouteContextResult]:
    if not query.strip():
        return []

    scores = pipeline.decision_function([query])[0]
    return _scored_routes(scores)

In [None]:
routes = predict("I want to book a flight from São Paulo to New York")[:5]
for route in routes:
  print(f"Path: {route.path}, Score: {route.score:.2f}")
  print(f"Context: {route.context}")

Path: /flights/status/{flightNumber}, Score: -0.42
Context: flights status {flightNumber} flight status flight status track flight flight tracking check flight status User clicked link email check flight status User wants check status flight User entered flight number manually User wants check status flight User wants find flight time User wants status flight User clicked link track specific flight User wants find gate information flight User wants know reason delay flight User wants check status flight User trying locate flight map User clicked Track button flight path User wants flight affected air traffic control User wants find departure time flight User wants check status flight User wants check flight time connecting flight User clicked Check Status button entering flight number User wants check arrival time flight User tapped search icon entering flight number User voice command search flight status User wants check gate changes flight User wants confirm flight landed User wants