### Embedding Model 
- Notebook to test embedding model

In [14]:
# Import library
from sentence_transformers import SentenceTransformer
import numpy as np

In [15]:
# Initialize model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [16]:
# Template with variations AND the SQL query
QUERY_TEMPLATES = {
    "exercise_progress": {
        "variations": [
            "How is my bench press progressing?",
            "Show progress for squats",
            "Am I getting stronger on deadlifts?",
            "Track my overhead press improvement",
            "Has my leg press improved?",
            "Squat gains over time",
            "Weight progression for bench press",
            "Is my deadlift going up?"
        ],
        "sql": """
            SELECT 
                exercise_name,
                workout_date,
                AVG(weight) as avg_weight,
                AVG(reps) as avg_reps,
                COUNT(*) as total_sets
            FROM sets
            JOIN workouts ON sets.workout_id = workouts.id
            JOIN exercises ON sets.exercise_id = exercises.id
            WHERE client_id = {client_id}
              AND exercise_name = {exercise_name}
              AND workout_date >= DATE('now', '-21 days')
            GROUP BY exercise_name, workout_date
            ORDER BY workout_date DESC
        """,
        "parameters": ["exercise_name"]
    }
}

### Client id is provided as a parameter via api call

In [17]:
# Pre-compute embeddings
template_embeddings = []
template_map = []

for template_id, template in QUERY_TEMPLATES.items():
    for variation in template['variations']:
        embedding = model.encode(variation)
        template_embeddings.append(embedding)
        template_map.append(template_id)

template_embeddings = np.array(template_embeddings)

In [18]:
# User asks a question
user_query = "How's my squat doing?"
query_embedding = model.encode(user_query)

# Find best match
similarities = np.dot(template_embeddings, query_embedding) / (
    np.linalg.norm(template_embeddings, axis=1) * np.linalg.norm(query_embedding)
)
best_idx = np.argmax(similarities)
best_template_id = template_map[best_idx]
confidence = similarities[best_idx]

print(f"Query: '{user_query}'")
print(f"Matched template: {best_template_id}")
print(f"Confidence: {confidence:.3f}")

# Get the SQL query
sql_query = QUERY_TEMPLATES[best_template_id]['sql']
print(f"\nSQL to execute:\n{sql_query}")

Query: 'How's my squat doing?'
Matched template: exercise_progress
Confidence: 0.653

SQL to execute:

            SELECT 
                exercise_name,
                workout_date,
                AVG(weight) as avg_weight,
                AVG(reps) as avg_reps,
                COUNT(*) as total_sets
            FROM sets
            JOIN workouts ON sets.workout_id = workouts.id
            JOIN exercises ON sets.exercise_id = exercises.id
            WHERE client_id = {client_id}
              AND exercise_name = {exercise_name}
              AND workout_date >= DATE('now', '-21 days')
            GROUP BY exercise_name, workout_date
            ORDER BY workout_date DESC
        
