### import dependencies

In [45]:
import numpy as np
import pandas as pd
import pickle
import faiss
import google.generativeai as genai

## setup API key

In [46]:
API_KEY = "ENTER the api key here"
genai.configure(api_key=API_KEY)

## Create KB

### 1. Loading data CSV

In [49]:
print("1. Loading CSV data...")
path = "assets/Cosmetic_Product_Database.csv"
try:
    df = pd.read_csv(path)
except FileNotFoundError:
    print(f"❌ Error : {path} not Found. Create it first!")

1. Loading CSV data...


### 2. Combine columns for rich search context

In [51]:
print("2. Preparing text....")
df["combined_text"] = df.apply(lambda row: (
    f"Product : {row['product_name']} | Brand: {row['brand']} | "
    f"Category: {row['category']} | Skin Type: {row['skin_type']} | "
    f"Concern : {row['concern']} | Description: {row['description']} | "
    f"Ingredients: {row['key_ingredients']} | How to use : {row['how_to_use']} | Pro Tips: {row['pro_tip']} "
), axis=1)

2. Preparing text....


### 3. Generating Embeddings

In [56]:
print("3. Generating Embeddings (taking to Google)...")
embedding = []
model = "gemini-embedding-001"

# Loop through data (in production, we would batch this)
for text in df['combined_text']:
    vector = genai.embed_content(model=model, content=text, task_type="retrieval_document")["embedding"]
    embedding.append(vector)

3. Generating Embeddings (taking to Google)...


### 4. Creating the vector index using the FAISS

In [53]:
# Convert to NUmpy Array (FAISS requires float32)
embedding_metrix = np.array(embedding).astype(np.float32)

# Build FAISS Index
print("4. Building FAISS index...")
dimension = embedding_metrix.shape[1]
# L2 = Euclidean Distance (standard search)
index = faiss.IndexFlatL2(dimension)
index.add(embedding_metrix)

4. Building FAISS index...


### 5. Save the DISK

In [58]:
print("5. Saving Files...")
# save te vector index (the math)
print("Saving the vectors...")
path_vector = "assets/Cosmetic_Product_Database.index"
faiss.write_index(index, path_vector)

# save the Actual data/text (the Content)
# We need this because FAISS only stores numbers. We need to map Number -> Text.
print("Save the pkl file...")
path_pickle = "assets/Cosmetic_Product_backup pickle.pkl"
with open(path_pickle, "wb") as f:
    pickle.dump(df, f)
print("✅ Success! 'Cosmetic_Product_Database.index' and 'Cosmetic_Product_backup pickle.pkl' created.")

5. Saving Files...
Saving the vectors...
Save the pkl file...
✅ Success! 'Cosmetic_Product_Database.index' and 'Cosmetic_Product_backup pickle.pkl' created.
