In [2]:
import pandas as pd
import os
import pickle
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document
from dotenv import load_dotenv

load_dotenv()
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

# === Configuration ===
EXCEL_PATH = "dataset/Workouts.xlsx"
CHROMA_DB_PATH = os.path.expanduser("~/chroma_workout_db")
METADATA_FILE = "workout_chroma_metadata.pkl"

# === Embedding Model ===
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# === Load Excel ===
def load_excel_data(excel_path):
    df = pd.read_excel(excel_path)
    return df

# === Prepare Documents for Chroma ===
def prepare_documents(df):
    documents = []
    for idx, row in df.iterrows():
        # Convert the input columns to a natural language string
        input_str = row['input']
        output_str = row["output"]
        print(f"Processing row {idx} \n Input: {input_str} \n Output: {output_str}\n\n")
        metadata = {
            "row_index": idx,
            "input": input_str,
            "output": output_str
        }
        documents.append(Document(page_content=input_str, metadata=metadata))
    return documents

# === Create and Save Vector Database ===
def create_and_save_vector_db(documents):
    vector_store = Chroma.from_documents(documents, embeddings, persist_directory=CHROMA_DB_PATH)
    # vector_store.persist()
    with open(METADATA_FILE, "wb") as f:
        pickle.dump(documents, f)
    print("Vector DB and metadata saved.")

# === Entry point ===
if __name__ == "__main__":
    df = load_excel_data(EXCEL_PATH)
    documents = prepare_documents(df)
    create_and_save_vector_db(documents)


Processing row 0 
 Input: Fitness Level: Beginner; Workout Frequency: 2-3 days per week; Available Equipment: None; Primary Goal: Lose Weight; Workout Type: Strength Training; Specific Equipment Details: pull-up bar; Session Duration: 15-30 minutes; Focus Areas: arms, legs, chest; Health Limitation: None 
 Output: 
Overview & Strategy
As a beginner aiming to lose weight with no equipment except a pull-up bar, and training 2–3 days per week for 15–30 minutes per session, the goal is to focus on full-body strength training circuits using bodyweight exercises. These workouts are designed to be simple, effective, and progressively build strength while maximizing calorie burn.

We’ll split the week into two structured strength-focused days and an optional third day for light conditioning or mobility.

Weekly Workout Plan
Day 1 – Full Body Strength Focus (Arms + Legs + Chest)
Workout Type: Circuit Training

Duration: ~25 minutes

Warm-up (5 min)

Arm circles x 20

Bodyweight squats x 15

Hig