In [1]:
 import os
 import json
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [6]:
def flatten_json(y):
    out = []
    def flatten(x):
        if isinstance(x, dict):
            for v in x.values():
                flatten(v)
        elif isinstance(x, list):
            for v in x:
                flatten(v)
        else:
            out.append(str(x))
    flatten(y)
    return " ".join(out)

data_folder = "data"
json_files = [f for f in os.listdir(data_folder) if f.endswith('.json')]
documents = []
file_names = []

for file in json_files:
    with open(os.path.join(data_folder, file), 'r', encoding='utf-8') as f:
        data = json.load(f)
        text = flatten_json(data)
        documents.append(text)
        file_names.append(file)

In [7]:
model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = model.encode(documents)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [9]:
def search(query, doc_embeddings, documents, file_names, top_k=3):
    query_emb = model.encode([query])
    similarities = cosine_similarity(query_emb, doc_embeddings)[0]
    top_indices = np.argsort(similarities)[::-1][:top_k]
    results = []
    for idx in top_indices:
        results.append({
            "file": file_names[idx],
            "similarity": similarities[idx],
            "snippet": documents[idx][:300] + "..."
        })
    return results

# Example usage:
query = "AI powered crop monitoring and sustainable farming"
results = search(query, doc_embeddings, documents, file_names)
for r in results:
    print(f"File: {r['file']}, Similarity: {r['similarity']:.3f}\nSnippet: {r['snippet']}\n")

File: agritech-solutions.json, Similarity: 0.583
Snippet: 6 AgriTech Solutions Smart Farming with IoT & AI 936000000 AgriTech Solutions is revolutionizing Indian agriculture through IoT sensors, AI-powered crop monitoring, and precision farming techniques. Our platform helps 5000+ farmers increase crop yields by 35% while reducing water usage by 40%. We're...

File: greenergy-innovations.json, Similarity: 0.471
Snippet: 2 GreenEnergy Innovations Smart Solar Solutions for Urban Infrastructure 832000000 GreenEnergy Innovations is revolutionizing urban solar infrastructure with next-generation solar panels that deliver 30% higher efficiency than traditional solutions. Our integrated IoT monitoring system provides real...

File: techflow-solutions.json, Similarity: 0.369
Snippet: 1 TechFlow Solutions AI-Powered Business Analytics Platform 2080000000 TechFlow Solutions is revolutionizing how Indian businesses analyze and act on their data. Our AI-powered platform provides real-time insights

In [None]:
import requests
import json

# Load the top result's JSON data
top_result_file = results[0]['file']
with open(os.path.join(data_folder, top_result_file), 'r', encoding='utf-8') as f:
    startup_data = json.load(f)

# Load the investor schema
with open('investor_data_schema.json', 'r', encoding='utf-8') as f:
    investor_data = json.load(f)

# Construct the prompt for Gemini
prompt = f"""
You are an expert AI investment advisor. 
Given the following startup data and investor profile, provide a detailed AI insight on why this startup is the best match for the investor. 
Highlight the alignment in business model, financials, market, and team, and any unique synergies.

STARTUP DATA:
{json.dumps(startup_data, indent=2)}

INVESTOR PROFILE:
{json.dumps(investor_data, indent=2)}

Return your insight in a clear, concise, and persuasive manner.
"""

# Call Gemini API with updated model name
api_key = "AIzaSyBhOVNdIpyxotsDd5H-9iu58jBB0LfcpKc"
url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=" + api_key

headers = {"Content-Type": "application/json"}
data = {
    "contents": [
        {"parts": [{"text": prompt}]}
    ]
}

response = requests.post(url, headers=headers, data=json.dumps(data))
if response.ok:
    ai_insight = response.json()['candidates'][0]['content']['parts'][0]['text']
    print("AI Insight from Gemini:\n", ai_insight)
else:
    print("Error:", response.text)

AI Insight from Gemini:
 AgriTech Solutions is an excellent match for Sarah Chen of Chen Ventures, exhibiting strong alignment across multiple key criteria.  Here's why:

**1. Business Model & Financials:** AgriTech Solutions' B2B + B2G model aligns perfectly with Chen Ventures' preferences.  While the current EBITDA is negative,  the startup demonstrates impressive revenue growth (260% YoY) exceeding Chen Ventures' 100% expectation, and projects profitability within the next year.  The $18M annual revenue comfortably surpasses Chen Ventures' minimum annual revenue requirement of $1M.  The LTV/CAC ratio of 10 significantly exceeds the investor's minimum of 3, indicating strong unit economics.  The 22-month runway falls within Chen Ventures' comfortable range (18-36 months).  The $936M valuation falls within Chen Ventures' acceptable range, and the Series A target of ₹40 Cr (~$5M USD) is well within the investor's maximum investment capacity.


**2. Market & TAM/SAM:** The Indian agricu

In [6]:
# FastAPI implementation for AI insight endpoint with persistent document storage and dynamic investor data

from fastapi import FastAPI
from fastapi.responses import JSONResponse
import uvicorn
import nest_asyncio
import json
import os
import requests
from pydantic import BaseModel
from typing import Any, Dict

# Ensure FastAPI runs in notebook
nest_asyncio.apply()

app = FastAPI()

# Persistent storage for documents and embeddings
# (Assume documents, doc_embeddings, file_names, data_folder are already defined in previous cells)

# Gemini API setup (replace with your actual API key)
GEMINI_API_KEY = "AIzaSyBhOVNdIpyxotsDd5H-9iu58jBB0LfcpKc"
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=" + GEMINI_API_KEY

class QueryRequest(BaseModel):
    query: str
    investor_data: Dict[str, Any]

@app.post("/ai_insight")
async def ai_insight(request: QueryRequest):
    query = request.query
    investor_data = request.investor_data
    if not query:
        return JSONResponse(content={'error': 'Missing query string'}, status_code=400)
    if not investor_data:
        return JSONResponse(content={'error': 'Missing investor_data'}, status_code=400)

    # Cosine search
    results = search(query, doc_embeddings, documents, file_names, top_k=1)
    if not results:
        return JSONResponse(content={'error': 'No matching document found'}, status_code=404)
    top_result_file = results[0]['file']
    with open(os.path.join(data_folder, top_result_file), 'r', encoding='utf-8') as f:
        startup_data = json.load(f)

    # Construct prompt
    prompt = f"""
You are an expert AI investment advisor.\nGiven the following startup data and investor profile, provide a detailed AI insight on why this startup is the best match for the investor.\nHighlight the alignment in business model, financials, market, and team, and any unique synergies.\n\nSTARTUP DATA:\n{json.dumps(startup_data, indent=2)}\n\nINVESTOR PROFILE:\n{json.dumps(investor_data, indent=2)}\n\nReturn your insight in a clear, concise, and persuasive manner.\n"""

    headers = {"Content-Type": "application/json"}
    payload = {
        "contents": [
            {"parts": [{"text": prompt}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, data=json.dumps(payload))
    if response.ok:
        ai_insight = response.json()['candidates'][0]['content']['parts'][0]['text']
        return {"ai_insight": ai_insight, "matched_file": top_result_file}
    else:
        return JSONResponse(content={'error': response.text}, status_code=500)
from typing import List

class SuggestionsRequest(BaseModel):
    reviews: List[Dict[str, Any]]

@app.post("/improvement_suggestions")
async def improvement_suggestions(request: SuggestionsRequest):
    reviews = request.reviews
    if not reviews:
        return JSONResponse(content={'error': 'Missing reviews list'}, status_code=400)

    prompt = f"""
You are an expert AI product advisor for startups.
Given the following structured review feedback, generate a list of actionable improvement suggestions for the startup’s pitch deck. For each suggestion, provide:
- A clear, concise title (e.g., 'Market Validation Enhancement')
- A one-sentence actionable recommendation
- Priority (High/Medium/Low) and estimated Impact (as a percentage)
- (Optional) Use an emoji or icon for visual clarity

Here is the review feedback (in JSON):
{json.dumps(reviews, indent=2)}

Return the output as a JSON array, following this format:
[
  {{
    "title": "Market Validation Enhancement",
    "suggestion": "Add customer testimonials, usage metrics, and NPS scores to strengthen market validation narrative.",
    "priority": "High",
    "impact": "85%",
    "icon": "🔍"
  }},
  ...
]
"""
    headers = {"Content-Type": "application/json"}
    payload = {
        "contents": [
            {"parts": [{"text": prompt}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, data=json.dumps(payload))
    if response.ok:
        suggestions = response.json()['candidates'][0]['content']['parts'][0]['text']
        try:
            suggestions_json = json.loads(suggestions)
        except Exception:
            suggestions_json = suggestions
        return {"improvement_suggestions": suggestions_json}
    else:
        return JSONResponse(content={'error': response.text}, status_code=500)
# Run FastAPI in notebook
uvicorn.run(app, host="0.0.0.0", port=5000)

# Example POST request (for testing in notebook):
# import requests
# investor_data = { ... } # your investor profile as a dict
# response = requests.post('http://127.0.0.1:5000/ai_insight', json={'query': 'AI powered crop monitoring and sustainable farming', 'investor_data': investor_data})
# print(response.json())


INFO:     Started server process [23596]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 5000): only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.


INFO:     Application shutdown complete.


SystemExit: 1

In [None]:
# FastAPI implementation for AI insight endpoint with persistent document storage and dynamic investor data

from fastapi import FastAPI
from fastapi.responses import JSONResponse
import uvicorn
import nest_asyncio
import json
import os
import requests
from pydantic import BaseModel
from typing import Any, Dict

# Ensure FastAPI runs in notebook
nest_asyncio.apply()

app = FastAPI()

# Persistent storage for documents and embeddings
# (Assume documents, doc_embeddings, file_names, data_folder are already defined in previous cells)

# Gemini API setup (replace with your actual API key)
GEMINI_API_KEY = "AIzaSyBhOVNdIpyxotsDd5H-9iu58jBB0LfcpKc"
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=" + GEMINI_API_KEY

class QueryRequest(BaseModel):
    query: str
    investor_data: Dict[str, Any]

@app.post("/ai_insight")
async def ai_insight(request: QueryRequest):
    query = request.query
    investor_data = request.investor_data
    if not query:
        return JSONResponse(content={'error': 'Missing query string'}, status_code=400)
    if not investor_data:
        return JSONResponse(content={'error': 'Missing investor_data'}, status_code=400)

    # Cosine search
    results = search(query, doc_embeddings, documents, file_names, top_k=1)
    if not results:
        return JSONResponse(content={'error': 'No matching document found'}, status_code=404)
    top_result_file = results[0]['file']
    with open(os.path.join(data_folder, top_result_file), 'r', encoding='utf-8') as f:
        startup_data = json.load(f)

    # Construct prompt
    prompt = f"""
You are an expert AI investment advisor.\nGiven the following startup data and investor profile, provide a detailed AI insight on why this startup is the best match for the investor.\nHighlight the alignment in business model, financials, market, and team, and any unique synergies.\n\nSTARTUP DATA:\n{json.dumps(startup_data, indent=2)}\n\nINVESTOR PROFILE:\n{json.dumps(investor_data, indent=2)}\n\nReturn your insight in a clear, concise, and persuasive manner.\n"""

    headers = {"Content-Type": "application/json"}
    payload = {
        "contents": [
            {"parts": [{"text": prompt}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, data=json.dumps(payload))
    if response.ok:
        ai_insight = response.json()['candidates'][0]['content']['parts'][0]['text']
        return {"ai_insight": ai_insight, "matched_file": top_result_file}
    else:
        return JSONResponse(content={'error': response.text}, status_code=500)
from typing import List

class SuggestionsRequest(BaseModel):
    reviews: List[Dict[str, Any]]

@app.post("/improvement_suggestions")
async def improvement_suggestions(request: SuggestionsRequest):
    reviews = request.reviews
    if not reviews:
        return JSONResponse(content={'error': 'Missing reviews list'}, status_code=400)

    prompt = f"""
You are an expert AI product advisor for startups.
Given the following structured review feedback, generate a list of actionable improvement suggestions for the startup’s pitch deck. For each suggestion, provide:
- A clear, concise title (e.g., 'Market Validation Enhancement')
- A one-sentence actionable recommendation
- Priority (High/Medium/Low) and estimated Impact (as a percentage)
- (Optional) Use an emoji or icon for visual clarity

Here is the review feedback (in JSON):
{json.dumps(reviews, indent=2)}

Return the output as a JSON array, following this format:
[
  {{
    "title": "Market Validation Enhancement",
    "suggestion": "Add customer testimonials, usage metrics, and NPS scores to strengthen market validation narrative.",
    "priority": "High",
    "impact": "85%",
    "icon": "🔍"
  }},
  ...
]
"""
    headers = {"Content-Type": "application/json"}
    payload = {
        "contents": [
            {"parts": [{"text": prompt}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, data=json.dumps(payload))
    if response.ok:
        suggestions = response.json()['candidates'][0]['content']['parts'][0]['text']
        try:
            suggestions_json = json.loads(suggestions)
        except Exception:
            suggestions_json = suggestions
        return {"improvement_suggestions": suggestions_json}
    else:
        return JSONResponse(content={'error': response.text}, status_code=500)
# Run FastAPI in notebook
uvicorn.run(app, host="0.0.0.0", port=5000)

# Example POST request (for testing in notebook):
# import requests
# investor_data = { ... } # your investor profile as a dict
# response = requests.post('http://127.0.0.1:5000/ai_insight', json={'query': 'AI powered crop monitoring and sustainable farming', 'investor_data': investor_data})
# print(response.json())


INFO:     Started server process [23596]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 5000): only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.


INFO:     Application shutdown complete.


SystemExit: 1