In [84]:
import pandas as pd
import numpy as np
import openai
import faiss

In [85]:
f = open('apiKey.bin', 'r')
apiKey = f.read() 

In [86]:
openai.api_key =apiKey
f.close()

In [87]:
embeddings = np.load('embeddingData/startupEmbeddings.npy')

In [88]:
def build_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index

index = build_faiss_index(embeddings)

In [89]:
index

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x2dd121f20> >

In [None]:
def search_faiss(query, index, k=17):
    query_embedding = get_batch_embeddings([query])
    distances, indices = index.search(query_embedding, k)
    return indices[0], distances[0]

In [91]:
def get_batch_embeddings(texts, model="text-embedding-ada-002"):
    response = openai.Embedding.create(input=texts, model=model)
    return np.array([data['embedding'] for data in response['data']])

In [92]:
def query_gpt_4o_mini(query, retrieved_indices, df):
    retrieved_data = "\n".join([str(df.iloc[idx].to_dict()) for idx in retrieved_indices])
    
    prompt = f"""
    You are an expert AI assistant specialized in analyzing startup data. Your task is to provide concise, relevant, and actionable answers to the user query based on the provided data.

    Data context:
    {retrieved_data}

    User Query: "{query}"

    Your answer should directly address the user's query based on the context and data provided. If the answer is not explicitly found, provide the most relevant information or indicate uncertainty.
    """
    
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a highly efficient AI assistant focused on delivering quick and accurate responses related to startup and investor data."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
        max_tokens=150, 
        top_p=1.0, 
        frequency_penalty=0.0, 
        presence_penalty=0.0 
    )
    
    return response['choices'][0]['message']['content']


In [93]:
def perform_query(query, index, df):
    indices, distances = search_faiss(query, index)
    answer = query_gpt_4o_mini(query, indices, df)
    return answer

In [94]:
df = pd.read_csv('dataStuff/synthDataStartup.csv')
df

Unnamed: 0,Company Name,Founder Name,Email,Designation,Mobile Number,Website,City,Brief about the Company,I am a,Legal Entity,Current Stage of Startup,Sector,I want to apply for funding
0,TechLabs596,Santosh1878 Venkat1878,santosh1878@techlabs596.com,Managing Director,9260756423,www.techlabs596.com,Kanyakumari,TechLabs596 is an innovative venture focusing ...,Startup,One Person Company,Pre-Revenue,Cybersecurity,No
1,TechInnovations442,Arun450 Rajan450,arun450@techinnovations442.com,CEO,9202075872,www.techinnovations442.com,Erode,TechInnovations442 is an innovative venture fo...,MSME,Public Limited,Ideation,Green Energy,Yes
2,CyberDynamics1100,Vishnu1092 Subramanian1092,vishnu1092@cyberdynamics1100.com,CTO,8613702655,www.cyberdynamics1100.com,Trichy,CyberDynamics1100 is an innovative venture foc...,Startup,Sole Proprietorship,Revenue & Growth,Gaming & Entertainment,No
3,AgroLabs1445,Karthik431 Krishnan431,karthik431@agrolabs1445.com,Founder,9592930839,www.agrolabs1445.com,Tirunelveli,AgroLabs1445 is an innovative venture focusing...,MSME,Private Limited,Expansion,Logistics,Yes
4,SmartSystems1094,Hari855 Murthy855,hari855@smartsystems1094.com,Managing Director,9711332659,www.smartsystems1094.com,Salem,SmartSystems1094 is an innovative venture focu...,Startup,LLP,Mature Business,Energy,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,QuantumInnovations1294,Suresh1483 Sridhar1483,suresh1483@quantuminnovations1294.com,Founder,8072330589,www.quantuminnovations1294.com,Ooty,QuantumInnovations1294 is an innovative ventur...,MSME,One Person Company,Ideation,Healthcare,Yes
1496,FutureSystems1449,Karthik371 Krishnan371,karthik371@futuresystems1449.com,Managing Director,8265279913,www.futuresystems1449.com,Chennai,FutureSystems1449 is an innovative venture foc...,Startup,Public Limited,Revenue & Growth,Cybersecurity,No
1497,AquaAI800,Santosh1248 Venkat1248,santosh1248@aquaai800.com,CEO,7618081494,www.aquaai800.com,Vellore,AquaAI800 is an innovative venture focusing on...,MSME,Sole Proprietorship,Expansion,Green Energy,Yes
1498,EcoDynamics709,Santosh638 Venkat638,santosh638@ecodynamics709.com,CTO,7049538257,www.ecodynamics709.com,Coimbatore,EcoDynamics709 is an innovative venture focusi...,Startup,Private Limited,Mature Business,Gaming & Entertainment,No


In [95]:
df.columns

Index(['Company Name', 'Founder Name', 'Email', 'Designation', 'Mobile Number',
       'Website', 'City', 'Brief about the Company', 'I am a', 'Legal Entity',
       'Current Stage of Startup', 'Sector', 'I want to apply for funding'],
      dtype='object')

In [96]:
query = "I need the List of Startups that are from coimbatore"

In [97]:
outResult = perform_query(query, index, df)
outResult

'Here is the list of startups from Coimbatore:\n\n1. **SmartInnovations1399**\n   - Founder: Suresh673 Sridhar673\n   - Email: suresh673@smartinnovations1399.com\n   - Website: [www.smartinnovations1399.com](http://www.smartinnovations1399.com)\n   - Sector: Automobile\n   - Current Stage: Pre-Revenue\n\n2. **SmartInnovations753**\n   - Founder: Gopal1237 Natarajan1237\n   - Email: gopal1237@smartinnovations753.com\n   - Website: [www.smartinnovations753.com](http://www.smartinnovations753.com)\n   - Sector: Energy\n'