In [1]:
import pandas as pd
import numpy as np
import openai
import faiss

In [2]:
f = open('apiKey.bin', 'r')
apiKey = f.read() 

In [3]:
openai.api_key =apiKey
f.close()

In [4]:
embeddings = np.load('embeddingData/investorEmbeddings.npy')

In [5]:
def build_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index

index = build_faiss_index(embeddings)

In [6]:
index

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x2db6fa220> >

In [21]:
def search_faiss(query, index, k=17):
    query_embedding = get_batch_embeddings([query])
    distances, indices = index.search(query_embedding, k)
    return indices[0], distances[0]

In [22]:
def get_batch_embeddings(texts, model="text-embedding-ada-002"):
    response = openai.Embedding.create(input=texts, model=model)
    return np.array([data['embedding'] for data in response['data']])

In [23]:
def query_gpt_4o_mini(query, retrieved_indices, df):
    retrieved_data = "\n".join([str(df.iloc[idx].to_dict()) for idx in retrieved_indices])
    
    prompt = f"""
    You are an expert AI assistant specialized in analyzing investors data. Your task is to provide concise, relevant, and actionable answers to the user query based on the provided data.

    Data context:
    {retrieved_data}

    User Query: "{query}"

    Your answer should directly address the user's query based on the context and data provided. If the answer is not explicitly found, provide the most relevant information or indicate uncertainty.
    """
    
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a highly efficient AI assistant focused on delivering quick and accurate responses related to investors data."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
        max_tokens=150, 
        top_p=1.0, 
        frequency_penalty=0.0, 
        presence_penalty=0.0 
    )
    
    return response['choices'][0]['message']['content']


In [24]:
def perform_query(query, index, df):
    indices, distances = search_faiss(query, index)
    answer = query_gpt_4o_mini(query, indices, df)
    return answer

In [25]:
df = pd.read_csv('dataStuff/synthDataInvestor.csv')
df

Unnamed: 0,Full Name,Email,Phone Number,How do you like to Invest as,Company Name,Designation,Primary Sectors of Interest,Stage of Startup Preferred,Preferred Ticket Size of Investment,No of Startups to invest in this quarter,Aware of compliance requirements,Experience Level as Investor,Want Startup Singam Support?,Additional Comments
0,Kavya Srinivas,kavya.srinivas@fundgroup.com,+91 9319083963,Individual,Wealth Advisors,VC Partner,"Health Care, Technology - STEM Based, Impact I...",Early Growth Stage,5 Lakhs to 10 Lakhs,10,I have some knowledge but would appreciate fur...,I have invested in a few startup ventures in t...,Yes,Would like to explore ESG-focused startups.
1,Gokul Talwar,gokul.talwar@visiongroup.com,+91 7655411811,Individual,Summit Advisors,Investor,"Infrastructure & Manufacturing, Health Care, I...",Growth,5 Lakhs to 10 Lakhs,5,I have some knowledge but would appreciate fur...,I regularly invest in startup ventures as part...,Yes,Looking for disruptive startups in the infrast...
2,Ayesha Bandi,ayesha.bandi@summitsyndicate.com,+91 9603448258,Family Office,Fund Partners,Investment Analyst,"Health Care, Information Technology, Impact In...",Growth,25 Lakhs to 50 Lakhs,5,"Yes, I am familiar with the compliance require...",I have invested in a few startup ventures in t...,Yes,Looking for impact-driven startups in AgriTech.
3,Renee Shetty,renee.shetty@nexuscapital.com,+91 9358440299,Firm,Growth Investments,Director,"Deep Tech, Information Technology",MVP/Prototype Developed,50 Lakhs to 1 Crore,3,I have some knowledge but would appreciate fur...,This is my first time considering Investment i...,Yes,Looking for impact-driven startups in AgriTech.
4,Drishya Badami,drishya.badami@growthsyndicate.com,+91 9930194644,Individual,Growth Syndicate,VC Partner,"Infrastructure & Manufacturing, ESG (Environme...",MVP/Prototype Developed,10 Lakhs to 25 Lakhs,9,I have some knowledge but would appreciate fur...,I regularly invest in startup ventures as part...,No,Interested in D2C brands with a strong custome...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Suhana Mall,suhana.mall@fundholdings.com,+91 6445640374,Firm,Nexus Syndicate,CEO,"Health Care, Infrastructure & Manufacturing, E...",Early Growth Stage,25 Lakhs to 50 Lakhs,1,I have some knowledge but would appreciate fur...,This is my first time considering Investment i...,Yes,Focused on deep-tech innovations with sustaina...
996,Bhamini Char,bhamini.char@summitadvisors.com,+91 7749990454,Family Office,Nexus Capital,Managing Partner,"Technology - STEM Based, ESG (Environmental, S...",Early Growth Stage,50 Lakhs to 1 Crore,1,I have some knowledge but would appreciate fur...,This is my first time considering Investment i...,No,Focused on deep-tech innovations with sustaina...
997,Arnav Rau,arnav.rau@capitalpartners.com,+91 8719003870,Family Office,Ventures Partners,Investment Analyst,"Technology - STEM Based, Infrastructure & Manu...",Growth,25 Lakhs to 50 Lakhs,2,"No, I am not familiar with the compliance requ...",I regularly invest in startup ventures as part...,No,Looking for disruptive startups in the infrast...
998,Divyansh Wali,divyansh.wali@growthcapital.com,+91 9029812904,Individual,Ventures Syndicate,Director,"Technology - STEM Based, ESG (Environmental, S...",Early Growth Stage,25 Lakhs to 50 Lakhs,7,I have some knowledge but would appreciate fur...,I have invested in a few startup ventures in t...,Yes,Prefer startups with proven traction before in...


In [26]:
df.columns

Index(['Full Name', 'Email', 'Phone Number', 'How do you like to Invest as',
       'Company Name', 'Designation', 'Primary Sectors of Interest',
       'Stage of Startup Preferred', 'Preferred Ticket Size of Investment',
       'No of Startups to invest in this quarter',
       'Aware of compliance requirements', 'Experience Level as Investor',
       'Want Startup Singam Support?', 'Additional Comments'],
      dtype='object')

In [27]:
query = "I need the List of Investors that prefer Technology & manufacturing sector"

In [28]:
outResult = perform_query(query, index, df)
outResult

'Here is the list of investors that prefer the Technology and Manufacturing sector:\n\n1. **Dharmajan Taneja**\n   - Email: dharmajan.taneja@nexusadvisors.com\n   - Phone Number: +91 6955924702\n   - Primary Sectors of Interest: AgriTech, Infrastructure & Manufacturing, Technology - STEM Based\n\n2. **Priyansh Gill**\n   - Email: priyansh.gill@capitalholdings.com\n   - Phone Number: +91 6615280201\n   - Primary Sectors of Interest: Infrastructure & Manufacturing, Technology - STEM Based, Health Care\n\n3. **Taran Bhalla**\n   - Email: taran.bhalla'