In [16]:
import pandas as pd
import random

# Generate 100 sample founders data
industries = ["HealthTech", "FinTech", "CleanTech", "EdTech", "E-commerce", "AI", "SaaS", "Cybersecurity", "AgTech", "RetailTech"]
stages = ["Pre-seed", "Seed", "Series A", "Series B"]
business_models = ["B2B SaaS", "B2C Subscription", "Marketplace", "Hardware + Subscription", "Transaction Fee"]

founders_data = []
for i in range(1, 6):
    founders_data.append({
        "id": i,
        "name": f"Founder {i}",
        "industry": random.choice(industries),
        "startup_stage": random.choice(stages),
        "funding_required": f"${random.randint(100, 5000)}K",
        "traction": f"{random.randint(100, 50000)} active users",
        "business_model": random.choice(business_models),
        "location": f"City {random.randint(1, 50)}"
    })

# Generate 100 sample investors data
investment_ranges = ["$100K-500K", "$500K-3M", "$1M-10M", "$2M-15M"]
investor_focus = ["Digital Health", "Blockchain", "AI Applications", "Sustainability", "Cybersecurity", "Retail", "E-learning"]

investors_data = []
for i in range(1, 6):
    investors_data.append({
        "id": i,
        "name": f"Investor {i}",
        "preferred_industry": random.choice(industries),
        "investment_range": random.choice(investment_ranges),
        "preferred_stage": random.choice(stages),
        "key_focus_areas": random.choice(investor_focus),
        "previous_investments": random.randint(1, 20),
        "location": f"City {random.randint(1, 50)}"
    })

# Convert to DataFrames
founders_df = pd.DataFrame(founders_data)
investors_df = pd.DataFrame(investors_data)

# Save to CSV
founders_df.to_csv('founders.csv', index=False)
investors_df.to_csv('investors.csv', index=False)

print("Sample data files created successfully!")
print("- founders.csv: Contains information about 100 startup founders")
print("- investors.csv: Contains information about 100 investors")


Sample data files created successfully!
- founders.csv: Contains information about 100 startup founders
- investors.csv: Contains information about 100 investors


In [None]:
!pip install google-generativeai


In [4]:
import google.generativeai as genai


  from .autonotebook import tqdm as notebook_tqdm


In [17]:
import pandas as pd
import google.generativeai as genai
import os
import json
from typing import List, Dict, Any
import time

class FounderInvestorMatcher:
    def __init__(self, api_key: str):
        """
        Initialize the FounderInvestorMatcher with the Gemini API key.
        
        Args:
            api_key: Gemini API key for authentication
        """
        self.api_key = api_key
        genai.configure(api_key=self.api_key)
        self.model = genai.GenerativeModel('gemini-1.5-pro')
        self.founders_df = None
        self.investors_df = None
        
    def load_data(self, founders_path: str, investors_path: str) -> None:
        """
        Load founder and investor data from CSV files.
        
        Args:
            founders_path: Path to the founders CSV file
            investors_path: Path to the investors CSV file
        """
        self.founders_df = pd.read_csv(founders_path)
        self.investors_df = pd.read_csv(investors_path)
        
        print(f"Loaded {len(self.founders_df)} founders and {len(self.investors_df)} investors.")
        
    def _extract_founder_info(self, founder_id: int) -> Dict[str, Any]:
        """
        Extract founder information as a dictionary.
        
        Args:
            founder_id: ID of the founder to extract information for
            
        Returns:
            Dictionary containing founder information
        """
        founder_data = self.founders_df[self.founders_df['id'] == founder_id].iloc[0].to_dict()
        return founder_data
    
    def _get_all_investors_info(self) -> List[Dict[str, Any]]:
        """
        Get information for all investors.
        
        Returns:
            List of dictionaries containing investor information
        """
        return [row.to_dict() for _, row in self.investors_df.iterrows()]
    
    def calculate_match_score(self, founder_id: int) -> List[Dict[str, Any]]:
        """
        Calculate match scores between a founder and all investors using Gemini API.
        
        Args:
            founder_id: ID of the founder to match
            
        Returns:
            List of dictionaries containing investor information and match scores
        """
        founder_info = self._extract_founder_info(founder_id)
        investors_info = self._get_all_investors_info()
        
        matches = []
        
        for investor in investors_info:
            # Create a structured prompt for Gemini API
            prompt = f"""
            Task: Analyze the compatibility between a startup founder and an investor. Calculate a match score from 0-100.
            
            Founder Details:
            - Industry: {founder_info.get('industry', 'N/A')}
            - Startup Stage: {founder_info.get('startup_stage', 'N/A')}
            - Funding Required: {founder_info.get('funding_required', 'N/A')}
            - Traction: {founder_info.get('traction', 'N/A')}
            - Business Model: {founder_info.get('business_model', 'N/A')}
            
            Investor Preferences:
            - Name: {investor.get('name', 'N/A')}
            - Preferred Industry: {investor.get('preferred_industry', 'N/A')}
            - Investment Range: {investor.get('investment_range', 'N/A')}
            - Preferred Stage: {investor.get('preferred_stage', 'N/A')}
            - Key Focus Areas: {investor.get('key_focus_areas', 'N/A')}
            
            Calculate a match score from 0-100 based on alignment of industry, stage, funding requirements, and business model.
            Return your analysis and reasoning, followed by ONLY the numerical score in the format "Match Score: X".
            """
            
            try:
                # Make API call to Gemini
                response = self.model.generate_content(prompt)
                response_text = response.text
                
                # Extract the match score from the response
                score_line = [line for line in response_text.split('\n') if "Match Score:" in line]
                if score_line:
                    try:
                        score = int(score_line[0].split("Match Score:")[1].strip())
                    except:
                        # Fallback if parsing fails
                        score = 50  # Default middle score
                else:
                    score = 50  # Default middle score
                
                # Add investor info and match score to results
                investor_result = investor.copy()
                investor_result['match_score'] = score
                matches.append(investor_result)
                
                # Add small delay to avoid rate limiting
                time.sleep(0.5)
                
            except Exception as e:
                print(f"Error calculating match for investor {investor.get('name')}: {str(e)}")
                # Still include the investor but with a default score
                investor_result = investor.copy()
                investor_result['match_score'] = 0
                matches.append(investor_result)
        
        # Sort matches by score in descending order
        matches.sort(key=lambda x: x['match_score'], reverse=True)
        return matches
    
    def display_matches(self, matches: List[Dict[str, Any]]) -> None:
        """
        Display matches in a formatted way.
        
        Args:
            matches: List of dictionaries containing investor information and match scores
        """
        print("\n=== FOUNDER-INVESTOR MATCHES ===\n")
        print(f"{'Rank':<5} {'Investor':<20} {'Industry':<20} {'Stage':<15} {'Match Score':<10}")
        print("-" * 70)
        
        for i, match in enumerate(matches, 1):
            print(f"{i:<5} {match.get('name', 'N/A'):<20} {match.get('preferred_industry', 'N/A'):<20} {match.get('preferred_stage', 'N/A'):<15} {match.get('match_score', 0):<10}")

# Example usage
if __name__ == "__main__":
    # Replace with your actual API key
    API_KEY = ""  # You should replace this with your actual API key
    
    # Initialize the matcher
    matcher = FounderInvestorMatcher(API_KEY)
    
    # Load data
    matcher.load_data('founders.csv', 'investors.csv')
    
    # Calculate matches for founder with ID 1 (for example)
    founder_id = 1
    matches = matcher.calculate_match_score(founder_id)
    
    # Display matches
    matcher.display_matches(matches)

Loaded 5 founders and 5 investors.
Error calculating match for investor Investor 4: 429 Resource has been exhausted (e.g. check quota).
Error calculating match for investor Investor 5: 429 Resource has been exhausted (e.g. check quota).

=== FOUNDER-INVESTOR MATCHES ===

Rank  Investor             Industry             Stage           Match Score
----------------------------------------------------------------------
1     Investor 2           FinTech              Seed            65        
2     Investor 3           FinTech              Series B        35        
3     Investor 1           EdTech               Series B        30        
4     Investor 4           HealthTech           Seed            0         
5     Investor 5           CleanTech            Pre-seed        0         


# Flask app
