In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext dotenv
%dotenv

In [None]:
from sailor import RouteGenConfig, SailorDataWarehouse

_config = RouteGenConfig.from_env()
warehouse = SailorDataWarehouse(_config, db_path="../db/sailor_db.sqlite")

routes = await warehouse.create_routes(context="flight agency admin panel", count=20)

# engineer = SailorDataEngineer(_config, cache_key='test_model', route_description="flight agency admin panel")

# route_context = await engineer.generate_data(20, 200)

In [None]:
from sailor import RouteDocumentor
from sklearn.model_selection import train_test_split
from sailor.types.route_context import NavigationContext

if route_context is None:
	raise ValueError("route_context must be generated first")

train_sessions, test_sessions = train_test_split(route_context.sessions, test_size=0.2, random_state=14)
train_context = NavigationContext(routes=route_context.routes, sessions=train_sessions)

documentor = RouteDocumentor(train_context)
labels = documentor.fit_transform()

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('knn', KNeighborsClassifier(weights='distance', n_neighbors=2))
])

pipeline.fit(documentor.documents, labels)

In [None]:
from sklearn.metrics import classification_report

test_queries = [s.context for s in test_sessions]
_predictions = pipeline.predict(test_queries)

_targets = documentor.transform([s.target for s in test_sessions])
print(classification_report(_targets, _predictions, target_names=documentor.labels_))

In [None]:
from typing import List
import numpy as np

from sailor.types.route_context import RouteContextResult

def _scored_routes(scores) -> List[RouteContextResult]:
    sorted_index = np.argsort(scores)[::-1]
    routes: List[RouteContextResult] = []
    for i in sorted_index:
      route = documentor.inverse_transform(i)
      if route is not None:
        route = route.copy_with_score(float(scores[i]))
        routes.append(route)
    return routes

def predict(query: str) -> List[RouteContextResult]:
    if not query.strip():
        return []

    scores = pipeline.predict_proba([query])[0]
    return _scored_routes(scores)