# Indexing Process

This notebooks is to index the documents to the vector database

In [30]:
from dotenv import load_dotenv 

load_dotenv() 

True

In [31]:
import chromadb

chroma_client = chromadb.PersistentClient(path="chromadb")

In [32]:
import os
from supabase import create_client, Client

supabase_url = os.getenv("SUPABASE_URL")
supabase_key = os.getenv("SUPABASE_KEY")
supabase_client = create_client(supabase_url, supabase_key)

## Article

In [17]:
import pandas as pd

articles = supabase_client.table("article").select("*").execute().model_dump()['data']
df_article = pd.DataFrame(articles)
df_article

Unnamed: 0,article_id,title,category,short_desc,created_date,author,body
0,1,Mastering Productivity: A Beginner's Guide,Productivity,A beginner's guide to enhancing productivity b...,2024-05-12,,In a world brimming with distractions and dema...
1,2,"Balancing Work, Hobbies, and Family: The Ultim...",Work-Life Balance,Exploring practical strategies for achieving a...,2024-04-29,,"In the modern whirlwind of responsibilities, f..."
2,4,"The Art of Harmonizing Career, Personal Life, ...",Work-Life Balance,"discusses techniques for balancing career, per...",2024-05-25,,"In the intricate tapestry of life, mastering t..."
3,5,Achieving Peak Productivity: Essential Strateg...,Productivity,"Strategies for boosting productivity, such as ...",2024-05-26,,In today's fast-paced and competitive landscap...
4,3,Current Market Outlook for China: Finance and ...,Finance,China's finance and banking sectors in 2024 pr...,2024-05-25,Patricia Kim,"As 2024 progresses, China's financial and bank..."
5,6,The Evolution of Mobile Banking,Finance,Explores the rapid advancements in mobile bank...,2024-05-27,Emily Johnson,"In recent years, mobile banking has transforme..."
6,7,The Impact of Interest Rate Changes on the Sto...,Finance,Analyzing how alterations in interest rates in...,2024-05-28,Michael Roberts,Interest rates are among the most significant ...
7,30,Optimizing Workspace for Maximum Productivity,Productivity,Designing workspaces that enhance focus and ef...,2024-06-20,David Chen,The physical environment of a workspace can si...
8,8,Financial Literacy and Its Importance in Econo...,Finance,Discussing the critical role of financial lite...,2024-05-29,Linda Harper,Financial literacy is an essential skill in th...
9,9,Trends in Global Wealth Management,Finance,Exploring current trends that are shaping the ...,2024-05-30,Jonathan Clarke,The wealth management industry is experiencing...


In [18]:
# Indexing to ChromaDB

try:
    article_collection = chroma_client.get_collection("article")
except:
    article_collection = chroma_client.create_collection("article")

In [19]:
def create_article_sentence(article):
    return f"{article['title']}. {article['short_desc']}"

In [20]:
article_collection.add(
    ids=[str(article['article_id']) for article in articles],
    documents=[create_article_sentence(article) for article in articles],
    metadatas=[
        {
            "category": article['category'],
            "created_date": article['created_date'],
        } for article in articles        
    ]
)

## Mentor

In [33]:
import pandas as pd

mentors = supabase_client.table("mentor").select("*").execute().model_dump()['data']
df_mentor = pd.DataFrame(mentors)
df_mentor

Unnamed: 0,mentor_id,name,department,position,short_desc,email,specialization
0,1,John Doe,Finance,Financial Analyst,Analyzes financial data and provides investmen...,john.doe@bank.com,Investment Analysis
1,2,Jane Smith,IT,Software Developer,Develops and maintains bank software systems.,jane.smith@bank.com,System Development
2,3,Michael Brown,Risk Management,Risk Manager,Manages risks to minimize losses through effec...,michael.brown@bank.com,Risk Assessment
3,4,Emily White,Human Resources,HR Manager,"Oversees recruitment, training, and employee r...",emily.white@bank.com,Employee Relations
4,5,Alex Johnson,Compliance,Compliance Officer,Ensures the bank adheres to legal standards an...,alex.johnson@bank.com,Regulatory Compliance
5,6,Linda Green,Marketing,Marketing Director,Directs the bank's marketing strategies and ca...,linda.green@bank.com,Strategic Marketing
6,7,Robert Garcia,IT,Network Administrator,Responsible for maintaining the bank's compute...,robert.garcia@bank.com,Network Security
7,8,Sarah Miller,Legal,Legal Advisor,Provides legal advice and supports litigation ...,sarah.miller@bank.com,Banking Law
8,9,James Wilson,Finance,Treasurer,"Manages the organization’s budget, investments...",james.wilson@bank.com,Financial Management
9,10,Patricia Taylor,Operations,Operations Manager,Manages daily operations and ensures efficient...,patricia.taylor@bank.com,Operational Efficiency


In [34]:
# Indexing to ChromaDB

try:
    mentor_collection = chroma_client.get_collection("mentor")
except:
    mentor_collection = chroma_client.create_collection("mentor")

In [35]:
def create_mentor_sentence(mentor):
    return f"A mentor from {mentor['department']} department, working as a {mentor['position']} " \
           f"specializing in {mentor['specialization']}. {mentor['short_desc']}"

In [36]:
create_mentor_sentence(mentors[0])

'A mentor from Finance department, working as a Financial Analyst specializing in Investment Analysis. Analyzes financial data and provides investment guidance.'

In [37]:
mentor_collection.add(
    ids=[str(mentor['mentor_id']) for mentor in mentors],
    documents=[create_mentor_sentence(mentor) for mentor in mentors],
    metadatas=[
        {
            "name": mentor['name'],
            "department": mentor['department'],
            "position": mentor['position'],
            "short_desc": mentor['short_desc'],
            "email": mentor['email'],
        } for mentor in mentors        
    ]
)

Insert of existing embedding ID: 1
Insert of existing embedding ID: 2
Insert of existing embedding ID: 3
Insert of existing embedding ID: 4
Insert of existing embedding ID: 5
Insert of existing embedding ID: 6
Insert of existing embedding ID: 7
Insert of existing embedding ID: 8
Insert of existing embedding ID: 9
Insert of existing embedding ID: 10
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10


In [38]:
result = mentor_collection.query(
            query_texts=create_mentor_sentence(mentors[0]),
            n_results=1,
        )
result

{'ids': [['1']],
 'distances': [[0.0]],
 'metadatas': [[{'department': 'Finance',
    'email': 'john.doe@bank.com',
    'name': 'John Doe',
    'position': 'Financial Analyst',
    'short_desc': 'Analyzes financial data and provides investment guidance.'}]],
 'embeddings': None,
 'documents': [['A mentor from Finance department, working as a Financial Analyst specializing in Investment Analysis. Analyzes financial data and provides investment guidance.']],
 'uris': None,
 'data': None}

In [39]:
result['ids'][0][0]

'1'

## Buddy

In [40]:
import pandas as pd

buddies = supabase_client.table("buddy").select("*").execute().model_dump()['data']
df_buddy = pd.DataFrame(buddies)
df_buddy

Unnamed: 0,buddy_id,name,short_desc,email,linkedin,instagram,years_of_experience,role,professional_interest,interaction_frequency,hobby,language,meeting_preference,buddy_program_goal
0,1,Alice Morgan,Handles customer inquiries and support.,alice.morgan@ubs.com,alice-morgan-bank,alice_morgan,12,Management,Leadership skills,Weekly,"[""Travel"", ""Cooking"", ""Sports""]","[""English""]",Combination of both,Networking
1,2,Bob Hughes,Manages software systems and IT support.,bob.hughes@ubs.com,bob-hughes-tech,bob_hughes_tech,3,Finance/Accounting,Regulatory compliance,Bi-weekly,"[""Reading"", ""Arts and Crafts""]","[""English"", ""French""]",Virtual only,Skill development
2,3,Carol Jennings,Manages employee relations and benefits.,carol.jennings@ubs.com,carol-jennings-hr,caroljennings_hr,1,Sales/Marketing,Technical expertise,Monthly,"[""Technology/Gadgets"", ""Travel""]","[""Mandarin"", ""English""]",In-person only,Receiving mentorship
3,4,Dave Franklin,Ensures compliance with financial laws.,dave.franklin@ubs.com,dave-franklin-finance,dave_franklin_finance,8,Operations,Project management,Daily,"[""Sports"", ""Cooking""]","[""English"", ""Cantonese""]",Combination of both,Providing mentorship
4,5,Emma Clark,Leads marketing campaigns and outreach.,emma.clark@ubs.com,emma-clark-marketing,emmaclarkmarketing,15,Technology,Customer service excellence,Weekly,"[""Reading"", ""Technology/Gadgets""]","[""French""]",Virtual only,Cultural integration
5,6,Frank Irwin,Oversees bank operations and logistics.,frank.irwin@ubs.com,frank-irwin-ops,frankirwin_ops,4,Compliance,Regulatory compliance,Bi-weekly,"[""Travel"", ""Arts and Crafts""]","[""English"", ""Mandarin""]",Combination of both,Networking
6,7,Grace Kim,Manages risk and assessment strategies.,grace.kim@ubs.com,grace-kim-risk,grace_kim_risk,2,Management,Leadership skills,Monthly,"[""Sports"", ""Reading""]","[""English""]",In-person only,Providing mentorship
7,8,Henry Stanton,Processes and evaluates loan applications.,henry.stanton@ubs.com,henry-stanton-loan,henrystanton_loan,20,Finance/Accounting,Technical expertise,Bi-weekly,"[""Cooking"", ""Arts and Crafts""]","[""French"", ""English""]",Virtual only,Skill development
8,9,Isabella Foster,Provides legal advice and representation.,isabella.foster@ubs.com,isabella-foster-legal,isabella_foster_legal,11,Technology,Project management,Weekly,"[""Sports"", ""Cooking""]","[""English""]",Combination of both,Providing mentorship
9,10,Jake Norton,Manages client investment portfolios.,jake.norton@ubs.com,jake-norton-invest,jake_norton_invest,5,Operations,Customer service excellence,Monthly,"[""Reading"", ""Technology/Gadgets""]","[""Mandarin""]",In-person only,Receiving mentorship


In [41]:
# Indexing to ChromaDB

try:
    buddy_collection = chroma_client.get_collection("buddy")
except:
    buddy_collection = chroma_client.create_collection("buddy")

In [42]:
def create_buddy_sentence(buddy):
    return f"A buddy from {buddy['role']}, with interest in {buddy['professional_interest']} " \
           f"likes {', '.join(eval(buddy['hobby']))}. proficient in { ', '.join(eval(buddy['language']))} " \
           f"with a goal in {buddy['buddy_program_goal']}"

In [43]:
create_buddy_sentence(buddies[0])

'A buddy from Management, with interest in Leadership skills likes Travel, Cooking, Sports. proficient in English with a goal in Networking'

In [44]:
buddy_collection.add(
    ids=[str(buddy['buddy_id']) for buddy in buddies],
    documents=[create_buddy_sentence(buddy) for buddy in buddies],
    metadatas=[
        {
            "name": buddy['name'],
            "short_desc": buddy['short_desc'],
            "email": buddy['email'],
            "linkedin": buddy['linkedin'],
            "instagram": buddy['instagram'],
            "years_of_experience": buddy['years_of_experience'],
            "role": buddy['role'],
            "professional_interest": buddy['professional_interest'],
            "interaction_frequency": buddy['interaction_frequency'],
            "hobby": buddy['hobby'],
            "language": buddy['language'],
            "meeting_preference": buddy['meeting_preference'],
            "buddy_program_goal": buddy['buddy_program_goal'] 
        } for buddy in buddies        
    ]
)

Insert of existing embedding ID: 1
Insert of existing embedding ID: 2
Insert of existing embedding ID: 3
Insert of existing embedding ID: 4
Insert of existing embedding ID: 5
Insert of existing embedding ID: 6
Insert of existing embedding ID: 7
Insert of existing embedding ID: 8
Insert of existing embedding ID: 9
Insert of existing embedding ID: 10
Insert of existing embedding ID: 11
Insert of existing embedding ID: 12
Insert of existing embedding ID: 13
Insert of existing embedding ID: 14
Insert of existing embedding ID: 15
Insert of existing embedding ID: 16
Insert of existing embedding ID: 17
Insert of existing embedding ID: 18
Insert of existing embedding ID: 19
Insert of existing embedding ID: 20
Insert of existing embedding ID: 21
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID:

In [45]:
result = buddy_collection.get('21')
result

{'ids': ['21'],
 'embeddings': None,
 'metadatas': [{'buddy_program_goal': 'Cultural Integration',
   'email': 'jonathan@ubs.com',
   'hobby': "['Sports & Working Out', 'Arts and Crafts']",
   'instagram': 'jonathan_park',
   'interaction_frequency': 'Weekly',
   'language': "['Chinese', 'Cantonese']",
   'linkedin': 'jonathan-park',
   'meeting_preference': 'Combination of both',
   'name': 'Jonathan',
   'professional_interest': 'Leadership Skills',
   'role': 'Operations',
   'short_desc': 'Manages frontend development and compliance',
   'years_of_experience': 2}],
 'documents': ['A buddy from Operations, with interest in Leadership Skills likes Sports & Working Out, Arts and Crafts. proficient in Chinese, Cantonese with a goal in Cultural Integration'],
 'uris': None,
 'data': None}

In [46]:
result['documents'][0]

'A buddy from Operations, with interest in Leadership Skills likes Sports & Working Out, Arts and Crafts. proficient in Chinese, Cantonese with a goal in Cultural Integration'

In [47]:
query_sentence = "A buddy from Operations, with interest in Leadership Skills likes Sports & Working Out, Arts and Crafts. proficient in Chinese, Cantonese with a goal in Cultural Integration"

result = buddy_collection.query(
        query_texts=query_sentence,
        n_results=1,
        where={
            "$and" : [
                {"years_of_experience" : {"$gte" : 0}},      
                {"years_of_experience" : {"$lte" : 4}},

                {"interaction_frequency" : "Weekly"},

                {"meeting_preference" :  "Combination of both"},

                {"email" :  {"$ne" : "jonathan@ubs.com"}},
            ]
        }
    )
print(result)

{'ids': [['13']], 'distances': [[0.3957486788025282]], 'metadatas': [[{'buddy_program_goal': 'Providing mentorship', 'email': 'nora.quinn@ubs.com', 'hobby': '["Sports", "Technology/Gadgets"]', 'instagram': 'noraquinn_bizdev', 'interaction_frequency': 'Weekly', 'language': '["English"]', 'linkedin': 'nora-quinn-bizdev', 'meeting_preference': 'Combination of both', 'name': 'Nora Quinn', 'professional_interest': 'Leadership skills', 'role': 'Human Resources', 'short_desc': 'Explores new business opportunities.', 'years_of_experience': 1}]], 'embeddings': None, 'documents': [['A buddy from Human Resources, with interest in Leadership skills likes Sports, Technology/Gadgets. proficient in English with a goal in Providing mentorship']], 'uris': None, 'data': None}
