In [1]:
import pandas as pd
import numpy as np
import openai
import faiss

In [2]:
f = open('apiKey.bin', 'r')
apiKey = f.read() 

In [3]:
openai.api_key =apiKey
f.close()

In [36]:
def get_embedding(text, model="text-embedding-ada-002"):
    response = openai.Embedding.create(input=[text], model=model)
    return response['data'][0]['embedding']


In [37]:
def load_csv(file_path):
    return pd.read_csv(file_path)

In [38]:
def create_faiss_index(df, text_column):
    embeddings = np.array([get_embedding(str(row)) for row in df[text_column]])
    dimension = len(embeddings[0])
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, embeddings, df

In [39]:
def search_faiss(index, query, top_k=10):
    query_embedding = np.array([get_embedding(query)])
    distances, indices = index.search(query_embedding, top_k)
    return indices[0]

In [40]:
def format_for_gpt(results, df):
    result_data = []
    for idx in results:
        result_data.append(df.iloc[idx].to_dict())
    return "\n".join([str(row) for row in result_data])

In [41]:
def query_gpt(query, results, df):
    formatted_results = format_for_gpt(results, df)
    prompt = f"""
    You are an AI assistant analyzing investor & startup data.
    Based on the following retrieved data, answer the user query.

    Retrieved Data:
    {formatted_results}

    User Query: "{query}"
    """

    response = openai.chat_completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "system", "content": "You analyze investor & startup data."},
                  {"role": "user", "content": prompt}],
        temperature=0.3
    )

    return response.choices[0].message.content


In [42]:
def perform_query(query, df, text_column="Company Name", top_k=10):
    index, embeddings, df = create_faiss_index(df, text_column)
    indices = search_faiss(index, query, top_k)
    results = indices  # Get the indices from FAISS search
    answer = query_gpt(query, results, df)
    return answer

In [43]:
df = load_csv("dataStuff/synthDataStartup.csv")

In [44]:
df

Unnamed: 0,Company Name,Founder Name,Email,Designation,Mobile Number,Website,City,Brief about the Company,I am a,Legal Entity,Current Stage of Startup,Sector,I want to apply for funding
0,TechLabs596,Santosh1878 Venkat1878,santosh1878@techlabs596.com,Managing Director,9260756423,www.techlabs596.com,Kanyakumari,TechLabs596 is an innovative venture focusing ...,Startup,One Person Company,Pre-Revenue,Cybersecurity,No
1,TechInnovations442,Arun450 Rajan450,arun450@techinnovations442.com,CEO,9202075872,www.techinnovations442.com,Erode,TechInnovations442 is an innovative venture fo...,MSME,Public Limited,Ideation,Green Energy,Yes
2,CyberDynamics1100,Vishnu1092 Subramanian1092,vishnu1092@cyberdynamics1100.com,CTO,8613702655,www.cyberdynamics1100.com,Trichy,CyberDynamics1100 is an innovative venture foc...,Startup,Sole Proprietorship,Revenue & Growth,Gaming & Entertainment,No
3,AgroLabs1445,Karthik431 Krishnan431,karthik431@agrolabs1445.com,Founder,9592930839,www.agrolabs1445.com,Tirunelveli,AgroLabs1445 is an innovative venture focusing...,MSME,Private Limited,Expansion,Logistics,Yes
4,SmartSystems1094,Hari855 Murthy855,hari855@smartsystems1094.com,Managing Director,9711332659,www.smartsystems1094.com,Salem,SmartSystems1094 is an innovative venture focu...,Startup,LLP,Mature Business,Energy,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,QuantumInnovations1294,Suresh1483 Sridhar1483,suresh1483@quantuminnovations1294.com,Founder,8072330589,www.quantuminnovations1294.com,Ooty,QuantumInnovations1294 is an innovative ventur...,MSME,One Person Company,Ideation,Healthcare,Yes
1496,FutureSystems1449,Karthik371 Krishnan371,karthik371@futuresystems1449.com,Managing Director,8265279913,www.futuresystems1449.com,Chennai,FutureSystems1449 is an innovative venture foc...,Startup,Public Limited,Revenue & Growth,Cybersecurity,No
1497,AquaAI800,Santosh1248 Venkat1248,santosh1248@aquaai800.com,CEO,7618081494,www.aquaai800.com,Vellore,AquaAI800 is an innovative venture focusing on...,MSME,Sole Proprietorship,Expansion,Green Energy,Yes
1498,EcoDynamics709,Santosh638 Venkat638,santosh638@ecodynamics709.com,CTO,7049538257,www.ecodynamics709.com,Coimbatore,EcoDynamics709 is an innovative venture focusi...,Startup,Private Limited,Mature Business,Gaming & Entertainment,No


In [45]:
df.columns

Index(['Company Name', 'Founder Name', 'Email', 'Designation', 'Mobile Number',
       'Website', 'City', 'Brief about the Company', 'I am a', 'Legal Entity',
       'Current Stage of Startup', 'Sector', 'I want to apply for funding'],
      dtype='object')

In [46]:
query = "I need the List of Startups that are in IT Sector & in Pre Revenue Stage"

In [47]:
outResult = perform_query(query, df)
outResult

KeyboardInterrupt: 