# üöÄ ImaraFund Backend Code Generator (10 Steps)

This notebook generates the complete FastAPI backend for ImaraFund in 10 steps:

1. **Database Configuration** (`app/database.py`)
2. **Settings Management** (`app/core/config.py`) 
3. **Database Models** (`app/models.py`) - 63-column grants + companies
4. **API Schemas** (`app/schemas.py`) - Pydantic validation
5. **Intelligent Matcher** (`app/services/intelligent_matcher.py`) - Your 40/30/20/10 algorithm
6. **Gemini AI Service** (`app/services/gemini_service.py`) - AI recommendations
7. **API Endpoints** (`app/api/endpoints.py`) - RESTful routes
8. **Main Application** (`app/main.py`) - FastAPI app entry point
9. **Migration Script** (`migrations/migration_script.py`) - Import cleaned CSV data
10. **Environment Setup** (`.env` configuration)

**Prerequisites:** 
- Run the data cleaning script first to create `data/cleaned/grants_cleaned_latest.csv`
- Ensure project structure exists (from Part 1 setup)
- Have your GEMINI_API_KEY ready

**Execution:** Run cells sequentially. Each step will create the corresponding file.


In [1]:
"""
ImaraFund Backend Generator - Setup and Validation
"""
from pathlib import Path
from datetime import datetime
import os

# Project configuration
PROJECT_ROOT = Path(r"D:\D1\WTF\ImaraFund")
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

print("=" * 70)
print("üèóÔ∏è ImaraFund Backend Code Generator")
print("=" * 70)
print(f"üìÅ Project Root: {PROJECT_ROOT}")
print(f"‚è∞ Generation Time: {TIMESTAMP}")

# Ensure all required directories exist
required_dirs = [
    "app", "app/api", "app/services", "app/core", 
    "migrations", "data/cleaned", "data/companies"
]

for dir_path in required_dirs:
    full_path = PROJECT_ROOT / dir_path
    full_path.mkdir(parents=True, exist_ok=True)

# Create __init__.py files for Python packages
init_files = [
    "app/__init__.py", "app/api/__init__.py", 
    "app/services/__init__.py", "app/core/__init__.py"
]

for init_file in init_files:
    init_path = PROJECT_ROOT / init_file
    if not init_path.exists():
        init_path.write_text("", encoding="utf-8")

print("‚úÖ Project structure validated and ready")
print("üìã Next: Run the generator class cell below")


üèóÔ∏è ImaraFund Backend Code Generator
üìÅ Project Root: D:\D1\WTF\ImaraFund
‚è∞ Generation Time: 20260219_202224
‚úÖ Project structure validated and ready
üìã Next: Run the generator class cell below


In [3]:
"""
ImaraFund Backend Generator Class
Comprehensive generator for all 10 backend files
"""

class ImaraFundBackendGenerator:
    def __init__(self, project_root=PROJECT_ROOT):
        self.root = Path(project_root)
        self.files_created = []
    
    def write_file(self, step_name, file_path, content):
        """Write a file with progress tracking"""
        full_path = self.root / file_path
        full_path.parent.mkdir(parents=True, exist_ok=True)
        
        with open(full_path, 'w', encoding='utf-8') as f:
            f.write(content.strip())
        
        self.files_created.append(file_path)
        print(f"‚úÖ {step_name}: {file_path}")
        return full_path
    
    # Step 1: Database Configuration
    def generate_database(self):
        content = '''"""
ImaraFund Database Configuration
SQLAlchemy setup optimized for the matching algorithm
"""

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from app.core.config import settings

# Database engine with SQLite optimization
connect_args = {}
if "sqlite" in settings.DATABASE_URL:
    connect_args = {"check_same_thread": False}

engine = create_engine(
    settings.DATABASE_URL,
    connect_args=connect_args,
    echo=settings.DEBUG,
    pool_pre_ping=True
)

SessionLocal = sessionmaker(
    autocommit=False,
    autoflush=False,
    bind=engine
)

Base = declarative_base()


def get_db():
    """Database dependency for FastAPI endpoints"""
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()


def init_db():
    """Initialize all database tables"""
    Base.metadata.create_all(bind=engine)
    print("‚úÖ ImaraFund database tables created successfully!")


def drop_all_tables():
    """Development helper - use with caution!"""
    Base.metadata.drop_all(bind=engine)
    print("‚ö†Ô∏è All tables dropped!")
'''
        return self.write_file("Step 1", "app/database.py", content)
    
    # Step 2: Configuration Settings
    def generate_config(self):
        content = '''"""
ImaraFund Configuration Management
Environment-based settings with your proven algorithm weights
"""

from pydantic_settings import BaseSettings
from typing import List


class Settings(BaseSettings):
    """ImaraFund application settings"""
    
    # Project Information
    PROJECT_NAME: str = "ImaraFund"
    VERSION: str = "1.0.0"
    DESCRIPTION: str = "AI-powered funding matcher for African SMEs"
    
    # API Configuration
    API_V1_PREFIX: str = "/api/v1"
    DEBUG: bool = False
    
    # Database
    DATABASE_URL: str = "sqlite:///./imarafund.db"
    
    # AI Configuration (Your Gemini 2.5 Flash setup)
    GEMINI_API_KEY: str = ""
    GEMINI_MODEL: str = "gemini-2.5-flash"
    
    # Your Proven Matching Algorithm Weights (40/30/20/10)
    GEOGRAPHY_WEIGHT: float = 0.40  # Most important for African SMEs
    SECTOR_WEIGHT: float = 0.30     # Business alignment
    FUNDING_WEIGHT: float = 0.20    # Financial feasibility
    STAGE_WEIGHT: float = 0.10      # Development stage compatibility
    
    # Security
    SECRET_KEY: str = "imarafund-secret-key-change-in-production"
    
    # CORS Settings
    ALLOWED_ORIGINS: List[str] = [
        "http://localhost:3000",
        "http://localhost:8000",
        "http://127.0.0.1:3000",
        "http://127.0.0.1:8000"
    ]
    
    class Config:
        env_file = ".env"
        case_sensitive = True


settings = Settings()
'''
        return self.write_file("Step 2", "app/core/config.py", content)
    
    # Step 3: Database Models (63-column structure)
    def generate_models(self):
        content = '''"""
ImaraFund Database Models
Optimized for 63-column grants CSV and IntelligentMatcher algorithm
"""

from sqlalchemy import Column, Integer, String, Float, Boolean, Text, DateTime, JSON
from sqlalchemy.sql import func
from app.database import Base


class Grant(Base):
    """
    Grant model for ImaraFund's 63-column CSV structure
    Optimized for the 40/30/20/10 scoring algorithm
    """
    
    __tablename__ = "grants"
    
    # Primary Key
    id = Column(Integer, primary_key=True, index=True)
    
    # Core Identification
    program_id = Column(String(100), unique=True, index=True)
    program_name = Column(String(500), nullable=False, index=True)
    institution_name = Column(String(500), nullable=False, index=True)
    
    # Geographic Fields (CRITICAL for 40% geography scoring)
    country = Column(String(200), index=True)
    region = Column(String(200), index=True)
    geographic_scope = Column(String(200), index=True)
    
    # Financial Fields (CRITICAL for 20% funding scoring)
    currency_code = Column(String(10))
    estimated_value_amount = Column(Float)
    minimum_amount = Column(Float)
    maximum_amount = Column(Float)
    repayment_required = Column(Boolean, default=False, index=True)
    interest_rate = Column(String(50))
    
    # Sector Fields (CRITICAL for 30% sector scoring)
    program_type = Column(String(200))
    target_sectors = Column(Text, index=True)
    
    # Business Requirements
    duration_months = Column(Integer)
    minimum_employees = Column(Integer)
    maximum_employees = Column(Integer)
    minimum_revenue = Column(Float)
    maximum_revenue = Column(Float)
    
    # Application Process & Links (‚úÖ data_source_url filled from website_url)
    eligibility_criteria = Column(Text)
    application_process = Column(Text)
    application_deadline = Column(String(200))
    language_requirements = Column(String(200))
    website_url = Column(String(500))
    data_source_url = Column(String(500))  # Fixed by data cleaning script
    
    # Contact Information
    contact_email = Column(String(200))
    contact_phone = Column(String(100))
    
    # Demographics and Target Groups
    target_beneficiaries = Column(String(200))
    target_demographics = Column(String(200))
    age_restrictions = Column(String(100))
    gender_focus = Column(String(50))
    
    # Focus Areas (Boolean flags - cleaned from CSV)
    environmental_focus = Column(Boolean, default=False)
    innovation_focus = Column(Boolean, default=False)
    digital_focus = Column(Boolean, default=False)
    export_focus = Column(Boolean, default=False)
    women_focused = Column(Boolean, default=False, index=True)
    youth_focused = Column(Boolean, default=False, index=True)
    agriculture_focused = Column(Boolean, default=False, index=True)
    green_climate_focused = Column(Boolean, default=False)
    
    # Support Services
    technical_assistance = Column(Boolean, default=False)
    mentorship_available = Column(Boolean, default=False)
    networking_opportunities = Column(Boolean, default=False)
    training_provided = Column(Boolean, default=False)
    co_financing_required = Column(Boolean, default=False)
    co_financing_available = Column(Boolean, default=False)
    export_support = Column(Boolean, default=False)
    technology_innovation = Column(Boolean, default=False)
    digital_application = Column(Boolean, default=False)
    
    # Financial Terms
    collateral_required = Column(String(50))
    grace_period_months = Column(Integer)
    guarantee_coverage = Column(String(50))
    
    # Program Metrics and History
    success_rate = Column(Float)
    total_beneficiaries = Column(Integer)
    year_established = Column(Integer)
    funding_source = Column(String(500))
    program_start_date = Column(String(100))
    
    # Status and Verification
    verified = Column(Boolean, default=False, index=True)
    last_verified_date = Column(String(50))
    last_updated = Column(String(50))
    verification_date = Column(String(50))
    special_features = Column(Text)
    notes = Column(Text)
    
    # Flexible storage for additional CSV columns
    additional_data = Column(JSON)
    
    # System timestamps
    created_at = Column(DateTime, server_default=func.now())
    updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
    
    def __repr__(self):
        return f"<Grant(id={self.id}, name='{self.program_name}')>"


class Company(Base):
    """
    Company model matching your synthetic companies dataset structure
    Optimized for IntelligentMatcher algorithm
    """
    
    __tablename__ = "companies"
    
    # Primary Key
    id = Column(Integer, primary_key=True, index=True)
    
    # Core Identification
    company_id = Column(String(100), unique=True, index=True)
    company_name = Column(String(500), nullable=False, index=True)
    
    # Business Classification (CRITICAL for matching)
    sector = Column(String(200), nullable=False, index=True)
    business_stage = Column(String(100), nullable=False, index=True)
    innovation_level = Column(String(50))
    
    # Geographic Information (CRITICAL for 40% geography scoring)
    nationality = Column(String(100), nullable=False, index=True)
    business_registered_in = Column(String(100))
    
    # Founder Demographics
    founder_age = Column(Integer)
    founder_gender = Column(String(20))
    
    # Business Metrics
    business_age_months = Column(Integer)
    annual_revenue_usd = Column(Float)
    employees = Column(Integer)
    
    # Funding Requirements (CRITICAL for 20% funding scoring)
    funding_need_usd = Column(Float, nullable=False, index=True)
    has_prototype = Column(Boolean, default=False)
    targets_underserved = Column(Boolean, default=False)
    
    # System timestamps
    created_date = Column(String(50))
    created_at = Column(DateTime, server_default=func.now())
    updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
    
    def __repr__(self):
        return f"<Company(id={self.id}, name='{self.company_name}')>"
'''
        return self.write_file("Step 3", "app/models.py", content)
    
    # Step 4: Pydantic Schemas
    def generate_schemas(self):
        content = '''"""
ImaraFund Pydantic Schemas
Request/Response models for API validation
"""

from pydantic import BaseModel, Field
from typing import List, Optional
from datetime import datetime


# Grant Schemas
class GrantBase(BaseModel):
    """Base grant schema with essential fields"""
    program_name: str = Field(..., min_length=1, max_length=500)
    institution_name: str
    country: Optional[str] = None
    target_sectors: Optional[str] = None
    estimated_value_amount: Optional[float] = None


class GrantResponse(GrantBase):
    """Schema for grant API responses"""
    id: int
    program_id: Optional[str]
    region: Optional[str]
    geographic_scope: Optional[str]
    repayment_required: bool
    program_type: Optional[str]
    website_url: Optional[str]
    data_source_url: Optional[str]  # ‚úÖ Fixed by data cleaning
    women_focused: bool
    youth_focused: bool
    agriculture_focused: bool
    verified: bool
    created_at: datetime
    
    class Config:
        from_attributes = True


# Company Schemas
class CompanyBase(BaseModel):
    """Base company schema"""
    company_name: str = Field(..., min_length=1, max_length=500)
    sector: str = Field(..., min_length=1, max_length=200)
    nationality: str = Field(..., min_length=2, max_length=100)
    business_stage: str = Field(..., min_length=1, max_length=100)
    funding_need_usd: float = Field(..., gt=0)


class CompanyCreate(CompanyBase):
    """Schema for creating a new company"""
    company_id: Optional[str] = None
    business_registered_in: Optional[str] = None
    founder_age: Optional[int] = Field(None, ge=18, le=100)
    founder_gender: Optional[str] = None
    business_age_months: Optional[int] = Field(None, ge=0)
    annual_revenue_usd: Optional[float] = Field(None, ge=0)
    employees: Optional[int] = Field(None, ge=0)
    innovation_level: Optional[str] = None
    has_prototype: bool = False
    targets_underserved: bool = False


class CompanyResponse(CompanyBase):
    """Schema for company API responses"""
    id: int
    company_id: Optional[str]
    founder_age: Optional[int]
    business_age_months: Optional[int]
    annual_revenue_usd: Optional[float]
    employees: Optional[int]
    created_at: datetime
    
    class Config:
        from_attributes = True


# Matching Schemas (Your IntelligentMatcher scoring breakdown)
class ScoreBreakdown(BaseModel):
    """Your exact IntelligentMatcher scoring breakdown (40/30/20/10)"""
    geographic: float = Field(..., ge=0, le=40, description="Geography match (0-40 points)")
    sector: float = Field(..., ge=0, le=30, description="Sector alignment (0-30 points)")
    amount_fit: float = Field(..., ge=0, le=20, description="Funding amount fit (0-20 points)")
    stage: float = Field(..., ge=0, le=10, description="Business stage (0-10 points)")


class MatchResult(BaseModel):
    """Individual match result with grant details and scoring"""
    program_name: str
    institution: str
    country: str
    funding_amount: float
    match_score: float = Field(..., ge=0, le=100)
    score_breakdown: ScoreBreakdown
    target_sectors: str
    website: str
    data_source_url: str  # ‚úÖ Fixed by data cleaning
    repayment_required: str
    grant_details: GrantResponse


class MatchResponse(BaseModel):
    """Complete matching response with AI recommendation"""
    company: CompanyResponse
    matches: List[MatchResult]
    ai_recommendation: Optional[str] = None
    total_matches_found: int
    algorithm_version: str = "ImaraFund v1.0 (40/30/20/10)"
'''
        return self.write_file("Step 4", "app/schemas.py", content)
    
    # Step 5: Intelligent Matcher Service (Your exact algorithm)
    def generate_matcher(self):
        content = '''"""
ImaraFund Intelligent Matching Service
Your exact IntelligentMatcher algorithm ported to SQLAlchemy
"""

from typing import List, Tuple, Dict
from sqlalchemy.orm import Session
from app.models import Grant, Company
import logging

logger = logging.getLogger(__name__)


class IntelligentMatcher:
    """
    Your proven matching algorithm integrated with ImaraFund database
    Preserves exact scoring logic: 40% Geography, 30% Sector, 20% Funding, 10% Stage
    """
    
    def __init__(self, db: Session):
        self.db = db
        logger.info("ImaraFund IntelligentMatcher initialized")
    
    def find_matches(self, company_id: int, top_n: int = 5) -> Tuple[Company, List[Dict]]:
        """Find best matching grants using your exact scoring algorithm"""
        company = self.db.query(Company).filter(Company.id == company_id).first()
        if not company:
            raise ValueError(f"Company with ID {company_id} not found")
        
        grants = self.db.query(Grant).all()
        logger.info(f"Processing {len(grants)} grants for company {company.company_name}")
        
        matches = []
        for grant in grants:
            score, breakdown = self._calculate_match_score(company, grant)
            
            if score > 30:  # Your threshold from the original script
                matches.append({
                    'grant': grant,
                    'program_name': grant.program_name or 'Unknown Program',
                    'institution': grant.institution_name or 'Unknown Institution',
                    'country': grant.country or 'Unknown',
                    'funding_amount': grant.estimated_value_amount or 0,
                    'match_score': round(score, 1),
                    'score_breakdown': breakdown,
                    'target_sectors': grant.target_sectors or 'General',
                    'website': grant.website_url or 'Not available',
                    'data_source_url': grant.data_source_url or 'Not available',
                    'repayment_required': str(grant.repayment_required) if grant.repayment_required is not None else 'Unknown'
                })
        
        matches_sorted = sorted(matches, key=lambda x: x['match_score'], reverse=True)[:top_n]
        logger.info(f"Found {len(matches)} matches above threshold, returning top {len(matches_sorted)}")
        return company, matches_sorted
    
    def _calculate_match_score(self, company: Company, grant: Grant) -> Tuple[float, Dict]:
        """Your exact scoring algorithm (0-100 points)"""
        score = 0.0
        breakdown = {}
        
        # 1. Geographic Match (40 points) - Most important
        geo_score = self._score_geography(company, grant)
        score += geo_score
        breakdown['geographic'] = geo_score
        
        # 2. Sector Match (30 points)
        sector_score = self._score_sector(company, grant)
        score += sector_score
        breakdown['sector'] = sector_score
        
        # 3. Funding Amount Fit (20 points)
        amount_score = self._score_funding_amount(company, grant)
        score += amount_score
        breakdown['amount_fit'] = amount_score
        
        # 4. Stage Bonus (10 points)
        stage_score = self._score_business_stage(company, grant)
        score += stage_score
        breakdown['stage'] = stage_score
        
        return min(100.0, score), breakdown
    
    def _score_geography(self, company: Company, grant: Grant) -> float:
        """Score geographic eligibility (0-40 points) - Your exact logic"""
        company_country = str(company.nationality or '').lower().strip()
        grant_scope = str(grant.geographic_scope or '').lower().strip()
        grant_country = str(grant.country or '').lower().strip()
        
        # Global programs get full points
        if 'global' in grant_scope:
            return 40.0
        
        # Exact country match
        if company_country in grant_country or company_country in grant_scope:
            return 40.0
        
        # Regional matches - Your exact Africa countries list
        africa_countries = [
            'nigeria', 'kenya', 'south africa', 'ghana', 'uganda', 'egypt',
            'tanzania', 'rwanda', 'ethiopia', 'senegal', 'botswana', 'zambia',
            'zimbabwe', 'morocco', 'tunisia', 'algeria', 'libya', 'cameroon',
            'ivory coast', 'mali', 'burkina faso', 'niger', 'madagascar'
        ]
        
        if company_country in africa_countries:
            if 'africa' in grant_scope or 'african' in grant_scope:
                return 35.0
        
        return 0.0
    
    def _score_sector(self, company: Company, grant: Grant) -> float:
        """Score sector alignment (0-30 points) - Your exact logic"""
        company_sector = str(company.sector or '').lower().strip()
        target_sectors = str(grant.target_sectors or '').lower().strip()
        
        # All sectors accepted
        if any(keyword in target_sectors for keyword in ['all', 'general', 'any']):
            return 25.0
        
        # Exact sector match
        if company_sector in target_sectors:
            return 30.0
        
        # Partial match (e.g., "tech" in "technology") - Your logic
        sector_words = company_sector.split()
        if any(word in target_sectors for word in sector_words if len(word) > 3):
            return 20.0
        
        return 10.0
    
    def _score_funding_amount(self, company: Company, grant: Grant) -> float:
        """Score funding amount fit (0-20 points) - Your exact logic"""
        need = company.funding_need_usd or 0.0
        available = grant.estimated_value_amount or 0.0
        
        if available == 0 or need == 0:
            return 15.0  # Unknown amount gets partial credit - your logic
        
        ratio = need / available
        
        # Perfect fit: need is 10%-200% of available - your ranges
        if 0.1 <= ratio <= 2.0:
            return 20.0
        
        # Good fit: need is 5%-500% of available - your ranges
        elif 0.05 <= ratio <= 5.0:
            return 15.0
        
        # Poor fit but not impossible - your logic
        else:
            return 8.0
    
    def _score_business_stage(self, company: Company, grant: Grant) -> float:
        """Score business stage fit (0-10 points) - Your exact logic"""
        stage = str(company.business_stage or '').lower().strip()
        
        # Most grants are flexible on stage - your comment
        if stage in ['startup', 'early growth']:
            return 10.0
        elif stage == 'idea':
            return 8.0
        elif stage in ['growth', 'scale-up', 'expansion']:
            return 9.0
        else:
            return 7.0
    
    def get_company_profile_dict(self, company: Company) -> Dict:
        """Convert Company model to dict for AI service - matches your format"""
        return {
            'company_name': company.company_name,
            'sector': company.sector,
            'nationality': company.nationality,
            'business_stage': company.business_stage,
            'funding_need_usd': company.funding_need_usd,
            'founder_age': company.founder_age,
            'founder_gender': company.founder_gender,
            'business_age_months': company.business_age_months,
            'annual_revenue_usd': company.annual_revenue_usd,
            'employees': company.employees
        }
'''
        return self.write_file("Step 5", "app/services/intelligent_matcher.py", content)
    
    # Step 6: Gemini AI Service (Your proven prompts)
    def generate_gemini_service(self):
        content = '''"""
ImaraFund AI Recommendation Service
Using your proven Gemini 2.5 Flash prompts and configuration
"""

from typing import Dict
import logging

try:
    import google.generativeai as genai
    GEMINI_AVAILABLE = True
except ImportError:
    GEMINI_AVAILABLE = False
    logging.warning("google-generativeai not installed")

from app.core.config import settings

logger = logging.getLogger(__name__)


class GeminiService:
    """Your proven AI recommendation service using Gemini 2.5 Flash"""
    
    def __init__(self):
        self.ai_enabled = False
        
        if not GEMINI_AVAILABLE:
            logger.warning("‚ö†Ô∏è google-generativeai not installed. AI recommendations disabled.")
            return
        
        api_key = settings.GEMINI_API_KEY
        
        if api_key:
            try:
                genai.configure(api_key=api_key)
                self.model = genai.GenerativeModel(settings.GEMINI_MODEL)
                self.ai_enabled = True
                logger.info("‚úÖ ImaraFund Gemini AI enabled!")
            except Exception as e:
                logger.warning(f"‚ö†Ô∏è AI setup failed: {e}")
        else:
            logger.info("‚ÑπÔ∏è AI disabled - Add GEMINI_API_KEY to .env")
    
    def get_ai_recommendation(self, company_profile: Dict, match: Dict) -> str:
        """Generate clear, simple AI recommendation using your exact prompt"""
        if not self.ai_enabled:
            return "üîë Add your Gemini API key to .env file to get AI-powered recommendations!"
        
        # Your exact prompt that works - preserved completely
        prompt = f"""You are a friendly business advisor helping someone who is NOT a finance expert.

COMPANY:
- Business: {company_profile.get('company_name', 'Startup')}
- What they do: {company_profile.get('sector', 'Unknown')}
- Location: {company_profile.get('nationality', 'Unknown')}
- Stage: {company_profile.get('business_stage', 'Unknown')}
- Money needed: ${company_profile.get('funding_need_usd', 0):,}

FUNDING MATCH:
- Program: {match['program_name']}
- Institution: {match['institution']}
- Amount: ${match['funding_amount']:,}
- Match Score: {match['match_score']}/100

Write advice using SIMPLE language that anyone can understand. Include these 4 sections:

**WHY THIS WORKS:**
Explain in 2-3 simple sentences why this funding fits their business. Use everyday words.

**WHAT TO DO NEXT:**
Give 3 specific actions they can take today. Use simple words like "create a budget" not "develop financial projections."

**WATCH OUT FOR:**
Mention 1-2 realistic challenges in plain English. Be honest but encouraging.

**YOUR CHANCES:**
Say "Excellent", "Good", "Fair", or "Challenging" and explain why in one sentence.

Use everyday words. No jargon. Be encouraging but honest. Keep under 200 words."""

        try:
            response = self.model.generate_content(
                prompt,
                generation_config={
                    'temperature': 0.8,
                    'top_p': 0.9,
                    'max_output_tokens': 600,
                }
            )
            return response.text
        
        except Exception as e:
            error_msg = str(e)
            logger.error(f"ImaraFund Gemini AI error: {error_msg}")
            
            # Your exact error handling logic
            if "quota" in error_msg.lower() or "rate" in error_msg.lower():
                return "‚ö†Ô∏è Too many requests. Free tier: 15 requests/minute. Please wait 60 seconds."
            elif "404" in error_msg or "not found" in error_msg.lower():
                return "‚ö†Ô∏è Model not available. Check your API key."
            else:
                return f"‚ö†Ô∏è AI temporarily unavailable: {error_msg}"
'''
        return self.write_file("Step 6", "app/services/gemini_service.py", content)
    
    # Continue with remaining methods...
    def generate_all_files(self):
        """Generate all 10 backend files"""
        print("\nüöÄ Generating all ImaraFund backend files...\n")
        
        try:
            self.generate_database()
            self.generate_config()
            self.generate_models()
            self.generate_schemas()
            self.generate_matcher()
            self.generate_gemini_service()
            self.generate_endpoints()
            self.generate_main()
            self.generate_migration()
            self.generate_env()
            
            print(f"\n‚úÖ Successfully generated {len(self.files_created)} files!")
            return True
            
        except Exception as e:
            print(f"\n‚ùå Error during generation: {e}")
            return False

# Create generator instance
generator = ImaraFundBackendGenerator()
print("‚úÖ Generator class loaded and ready")


‚úÖ Generator class loaded and ready


In [4]:
# Step 7: API Endpoints
def generate_endpoints():
    content = '''"""
ImaraFund API Endpoints
RESTful API for grant matching with comprehensive filtering
"""

from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from typing import List, Optional
from app.database import get_db
from app.models import Grant, Company
from app.schemas import (
    GrantResponse, CompanyResponse, CompanyCreate, MatchResponse,
    ScoreBreakdown, MatchResult
)
from app.services.intelligent_matcher import IntelligentMatcher
from app.services.gemini_service import GeminiService

router = APIRouter()


@router.get("/grants", response_model=List[GrantResponse])
def list_grants(
    sector: Optional[str] = Query(None, description="Filter by target sector"),
    country: Optional[str] = Query(None, description="Filter by country"),
    repayment_required: Optional[bool] = Query(None, description="Filter by repayment requirement"),
    min_amount: Optional[float] = Query(None, description="Minimum grant amount"),
    max_amount: Optional[float] = Query(None, description="Maximum grant amount"),
    women_focused: Optional[bool] = Query(None, description="Filter women-focused programs"),
    youth_focused: Optional[bool] = Query(None, description="Filter youth-focused programs"),
    agriculture_focused: Optional[bool] = Query(None, description="Filter agriculture-focused programs"),
    verified: Optional[bool] = Query(None, description="Filter verified programs only"),
    skip: int = Query(0, ge=0, description="Records to skip for pagination"),
    limit: int = Query(100, ge=1, le=500, description="Maximum records to return"),
    db: Session = Depends(get_db)
):
    """List grants with comprehensive filtering based on your CSV structure"""
    query = db.query(Grant)
    
    # Apply filters
    if sector:
        query = query.filter(Grant.target_sectors.ilike(f"%{sector}%"))
    if country:
        query = query.filter(
            (Grant.country.ilike(f"%{country}%")) |
            (Grant.geographic_scope.ilike(f"%{country}%"))
        )
    if repayment_required is not None:
        query = query.filter(Grant.repayment_required == repayment_required)
    if min_amount is not None:
        query = query.filter(Grant.estimated_value_amount >= min_amount)
    if max_amount is not None:
        query = query.filter(Grant.estimated_value_amount <= max_amount)
    if women_focused is not None:
        query = query.filter(Grant.women_focused == women_focused)
    if youth_focused is not None:
        query = query.filter(Grant.youth_focused == youth_focused)
    if agriculture_focused is not None:
        query = query.filter(Grant.agriculture_focused == agriculture_focused)
    if verified is not None:
        query = query.filter(Grant.verified == verified)
    
    grants = query.offset(skip).limit(limit).all()
    return grants


@router.get("/grants/{grant_id}", response_model=GrantResponse)
def get_grant(grant_id: int, db: Session = Depends(get_db)):
    """Get detailed grant information"""
    grant = db.query(Grant).filter(Grant.id == grant_id).first()
    if not grant:
        raise HTTPException(status_code=404, detail=f"Grant {grant_id} not found")
    return grant


@router.get("/companies", response_model=List[CompanyResponse])
def list_companies(
    sector: Optional[str] = Query(None),
    nationality: Optional[str] = Query(None),
    business_stage: Optional[str] = Query(None),
    skip: int = Query(0, ge=0),
    limit: int = Query(100, ge=1, le=500),
    db: Session = Depends(get_db)
):
    """List companies with filtering"""
    query = db.query(Company)
    
    if sector:
        query = query.filter(Company.sector.ilike(f"%{sector}%"))
    if nationality:
        query = query.filter(Company.nationality.ilike(f"%{nationality}%"))
    if business_stage:
        query = query.filter(Company.business_stage.ilike(f"%{business_stage}%"))
    
    companies = query.offset(skip).limit(limit).all()
    return companies


@router.post("/companies", response_model=CompanyResponse, status_code=201)
def create_company(company: CompanyCreate, db: Session = Depends(get_db)):
    """Register a new company in ImaraFund"""
    db_company = Company(**company.dict())
    db.add(db_company)
    db.commit()
    db.refresh(db_company)
    return db_company


@router.post("/match/{company_id}", response_model=MatchResponse)
def match_company_with_grants(
    company_id: int,
    top_n: int = Query(5, ge=1, le=20, description="Number of top matches"),
    db: Session = Depends(get_db)
):
    """
    Run ImaraFund's intelligent matching algorithm with AI recommendations
    Uses your proven 40/30/20/10 scoring system
    """
    matcher = IntelligentMatcher(db)
    ai_service = GeminiService()
    
    try:
        company, matches = matcher.find_matches(company_id, top_n=top_n)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Matching error: {str(e)}")
    
    if not matches:
        return MatchResponse(
            company=CompanyResponse.from_orm(company),
            matches=[],
            ai_recommendation="No suitable matches found with score > 30.",
            total_matches_found=0
        )
    
    # Process matches for API response
    match_results = []
    for match in matches:
        breakdown = match['score_breakdown']
        
        match_result = MatchResult(
            program_name=match['program_name'],
            institution=match['institution'],
            country=match['country'],
            funding_amount=match['funding_amount'],
            match_score=match['match_score'],
            score_breakdown=ScoreBreakdown(
                geographic=breakdown['geographic'],
                sector=breakdown['sector'],
                amount_fit=breakdown['amount_fit'],
                stage=breakdown['stage']
            ),
            target_sectors=match['target_sectors'],
            website=match['website'],
            data_source_url=match['data_source_url'],
            repayment_required=match['repayment_required'],
            grant_details=GrantResponse.from_orm(match['grant'])
        )
        match_results.append(match_result)
    
    # Generate AI recommendation for top match
    company_profile = matcher.get_company_profile_dict(company)
    ai_recommendation = ai_service.get_ai_recommendation(company_profile, matches[0])
    
    return MatchResponse(
        company=CompanyResponse.from_orm(company),
        matches=match_results,
        ai_recommendation=ai_recommendation,
        total_matches_found=len(matches)
    )
'''
    return generator.write_file("Step 7", "app/api/endpoints.py", content)

# Generate endpoints
generate_endpoints()


‚úÖ Step 7: app/api/endpoints.py


WindowsPath('D:/D1/WTF/ImaraFund/app/api/endpoints.py')

In [5]:
# Step 8: Main Application
def generate_main():
    content = '''"""
ImaraFund Main Application
FastAPI app with CORS, startup events, and API routing
"""

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.core.config import settings
from app.database import init_db
from app.api.endpoints import router

# Initialize FastAPI application
app = FastAPI(
    title=settings.PROJECT_NAME,
    description="AI-powered matching platform for African SME funding",
    version="1.0.0",
    docs_url="/docs",
    redoc_url="/redoc"
)

# Configure CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=settings.ALLOWED_ORIGINS,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Include API routes
app.include_router(router, prefix=settings.API_V1_PREFIX)


@app.on_event("startup")
async def startup_event():
    """Initialize database on application startup"""
    init_db()
    print("üöÄ ImaraFund API started successfully!")


@app.get("/")
async def root():
    """Root endpoint with ImaraFund API information"""
    return {
        "message": "Welcome to ImaraFund API",
        "description": "AI-powered matching platform for African SME funding",
        "version": "1.0.0",
        "documentation": "/docs",
        "algorithm": "IntelligentMatcher v1.0 (40/30/20/10 scoring)",
        "endpoints": {
            "grants": f"{settings.API_V1_PREFIX}/grants",
            "companies": f"{settings.API_V1_PREFIX}/companies",
            "matching": f"{settings.API_V1_PREFIX}/match/{{company_id}}"
        }
    }


@app.get("/health")
async def health_check():
    """Health check endpoint for monitoring"""
    return {
        "status": "healthy",
        "service": "imarafund-api",
        "version": "1.0.0"
    }


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000, reload=settings.DEBUG)
'''
    return generator.write_file("Step 8", "app/main.py", content)

# Step 9: Migration Script
def generate_migration():
    content = '''"""
ImaraFund Data Migration Script
Import your cleaned CSV data into the database
"""

import pandas as pd
import sys
from pathlib import Path
from typing import Dict, Any, Optional

# Add parent directory to path
sys.path.append(str(Path(__file__).parent.parent))

from app.database import SessionLocal, init_db
from app.models import Grant, Company


class ImaraFundMigrator:
    """Data migration utility for ImaraFund cleaned datasets"""
    
    def __init__(self):
        self.db = SessionLocal()
        
    def __enter__(self):
        return self
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.db.close()
    
    def safe_float(self, value: Any) -> Optional[float]:
        """Safely convert value to float"""
        if pd.isna(value) or value == "" or value is None:
            return None
        try:
            if isinstance(value, str):
                cleaned = value.replace("$", "").replace(",", "").strip()
                return float(cleaned) if cleaned else None
            return float(value)
        except (ValueError, TypeError):
            return None
    
    def safe_int(self, value: Any) -> Optional[int]:
        """Safely convert value to integer"""
        if pd.isna(value) or value == "" or value is None:
            return None
        try:
            return int(float(value))
        except (ValueError, TypeError):
            return None
    
    def safe_bool(self, value: Any) -> bool:
        """Safely convert value to boolean"""
        if pd.isna(value) or value == "" or value is None:
            return False
        if isinstance(value, bool):
            return value
        str_val = str(value).lower().strip()
        return str_val in ["true", "yes", "1", "y", "on", "t"]
    
    def safe_string(self, value: Any, max_length: Optional[int] = None) -> Optional[str]:
        """Safely convert value to string"""
        if pd.isna(value) or value is None:
            return None
        result = str(value).strip()
        if not result or result.lower() in ['nan', 'null', 'none']:
            return None
        if max_length and len(result) > max_length:
            result = result[:max_length-3] + "..."
        return result
    
    def import_cleaned_grants(self, csv_path: str = "data/cleaned/grants_cleaned_latest.csv") -> Dict[str, int]:
        """Import the cleaned grants CSV (with fixed data_source_url)"""
        print(f"üìä Importing grants from: {csv_path}")
        
        csv_file = Path(csv_path)
        if not csv_file.exists():
            raise FileNotFoundError(f"Cleaned grants CSV not found: {csv_path}")
        
        df = pd.read_csv(csv_file, encoding='utf-8')
        print(f"‚úÖ Loaded {len(df)} grants")
        
        imported_count = 0
        error_count = 0
        
        for idx, row in df.iterrows():
            try:
                grant = Grant(
                    program_id=self.safe_string(row.get('program_id'), 100) or f"GRANT_{idx+1}",
                    program_name=self.safe_string(row.get('program_name'), 500) or f"Program {idx+1}",
                    institution_name=self.safe_string(row.get('institution_name'), 500) or "Unknown",
                    country=self.safe_string(row.get('country'), 200),
                    region=self.safe_string(row.get('region'), 200),
                    geographic_scope=self.safe_string(row.get('geographic_scope'), 200),
                    currency_code=self.safe_string(row.get('currency_code'), 10),
                    estimated_value_amount=self.safe_float(row.get('estimated_value_amount')),
                    minimum_amount=self.safe_float(row.get('minimum_amount')),
                    maximum_amount=self.safe_float(row.get('maximum_amount')),
                    repayment_required=self.safe_bool(row.get('repayment_required')),
                    interest_rate=self.safe_string(row.get('interest_rate'), 50),
                    program_type=self.safe_string(row.get('program_type'), 200),
                    target_sectors=self.safe_string(row.get('target_sectors')),
                    duration_months=self.safe_int(row.get('duration_months')),
                    minimum_employees=self.safe_int(row.get('minimum_employees')),
                    maximum_employees=self.safe_int(row.get('maximum_employees')),
                    minimum_revenue=self.safe_float(row.get('minimum_revenue')),
                    maximum_revenue=self.safe_float(row.get('maximum_revenue')),
                    eligibility_criteria=self.safe_string(row.get('eligibility_criteria')),
                    application_process=self.safe_string(row.get('application_process')),
                    application_deadline=self.safe_string(row.get('application_deadline'), 200),
                    language_requirements=self.safe_string(row.get('language_requirements'), 200),
                    website_url=self.safe_string(row.get('website_url'), 500),
                    data_source_url=self.safe_string(row.get('data_source_url'), 500),  # ‚úÖ Fixed by cleaning
                    contact_email=self.safe_string(row.get('contact_email'), 200),
                    contact_phone=self.safe_string(row.get('contact_phone'), 100),
                    target_beneficiaries=self.safe_string(row.get('target_beneficiaries'), 200),
                    target_demographics=self.safe_string(row.get('target_demographics'), 200),
                    age_restrictions=self.safe_string(row.get('age_restrictions'), 100),
                    gender_focus=self.safe_string(row.get('gender_focus'), 50),
                    environmental_focus=self.safe_bool(row.get('environmental_focus')),
                    innovation_focus=self.safe_bool(row.get('innovation_focus')),
                    digital_focus=self.safe_bool(row.get('digital_focus')),
                    export_focus=self.safe_bool(row.get('export_focus')),
                    women_focused=self.safe_bool(row.get('women_focused')),
                    youth_focused=self.safe_bool(row.get('youth_focused')),
                    agriculture_focused=self.safe_bool(row.get('agriculture_focused')),
                    green_climate_focused=self.safe_bool(row.get('green_climate_focused')),
                    technical_assistance=self.safe_bool(row.get('technical_assistance')),
                    mentorship_available=self.safe_bool(row.get('mentorship_available')),
                    networking_opportunities=self.safe_bool(row.get('networking_opportunities')),
                    training_provided=self.safe_bool(row.get('training_provided')),
                    co_financing_required=self.safe_bool(row.get('co_financing_required')),
                    co_financing_available=self.safe_bool(row.get('co_financing_available')),
                    export_support=self.safe_bool(row.get('export_support')),
                    technology_innovation=self.safe_bool(row.get('technology_innovation')),
                    digital_application=self.safe_bool(row.get('digital_application')),
                    collateral_required=self.safe_string(row.get('collateral_required'), 50),
                    grace_period_months=self.safe_int(row.get('grace_period_months')),
                    guarantee_coverage=self.safe_string(row.get('guarantee_coverage'), 50),
                    success_rate=self.safe_float(row.get('success_rate')),
                    total_beneficiaries=self.safe_int(row.get('total_beneficiaries')),
                    year_established=self.safe_int(row.get('year_established')),
                    funding_source=self.safe_string(row.get('funding_source'), 500),
                    program_start_date=self.safe_string(row.get('program_start_date'), 100),
                    verified=self.safe_bool(row.get('verified')),
                    last_verified_date=self.safe_string(row.get('last_verified_date'), 50),
                    last_updated=self.safe_string(row.get('last_updated'), 50),
                    verification_date=self.safe_string(row.get('verification_date'), 50),
                    special_features=self.safe_string(row.get('special_features')),
                    notes=self.safe_string(row.get('notes'))
                )
                
                self.db.add(grant)
                imported_count += 1
                
                if imported_count % 20 == 0:
                    self.db.commit()
                    print(f"  ‚úì Imported {imported_count} grants...")
                
            except Exception as e:
                error_count += 1
                print(f"  ‚úó Error on row {idx + 2}: {str(e)}")
                continue
        
        self.db.commit()
        print(f"\\n‚úÖ Grants import completed!")
        
        return {"imported": imported_count, "errors": error_count, "total_rows": len(df)}
    
    def import_companies(self, csv_path: str = "data/companies/synthetic_companies.csv") -> Dict[str, int]:
        """Import companies CSV"""
        print(f"\\nüìä Importing companies from: {csv_path}")
        
        csv_file = Path(csv_path)
        if not csv_file.exists():
            print(f"  ‚ö† Companies CSV not found: {csv_path}")
            return {"imported": 0, "errors": 0, "total_rows": 0}
        
        df = pd.read_csv(csv_file, encoding='utf-8')
        print(f"‚úÖ Loaded {len(df)} companies")
        
        imported_count = 0
        for idx, row in df.iterrows():
            try:
                company = Company(
                    company_id=self.safe_string(row.get('company_id'), 100) or f"COMP_{idx+1}",
                    company_name=self.safe_string(row.get('company_name'), 500) or f"Company {idx+1}",
                    sector=self.safe_string(row.get('sector'), 200) or "General",
                    nationality=self.safe_string(row.get('nationality'), 100) or "Unknown",
                    business_registered_in=self.safe_string(row.get('business_registered_in'), 100),
                    business_stage=self.safe_string(row.get('business_stage'), 100) or "Unknown",
                    innovation_level=self.safe_string(row.get('innovation_level'), 50),
                    founder_age=self.safe_int(row.get('founder_age')),
                    founder_gender=self.safe_string(row.get('founder_gender'), 20),
                    business_age_months=self.safe_int(row.get('business_age_months')),
                    annual_revenue_usd=self.safe_float(row.get('annual_revenue_usd')),
                    employees=self.safe_int(row.get('employees')),
                    funding_need_usd=self.safe_float(row.get('funding_need_usd')) or 0.0,
                    has_prototype=self.safe_bool(row.get('has_prototype')),
                    targets_underserved=self.safe_bool(row.get('targets_underserved')),
                    created_date=self.safe_string(row.get('created_date'), 50)
                )
                
                self.db.add(company)
                imported_count += 1
                
                if imported_count % 10 == 0:
                    self.db.commit()
                    print(f"  ‚úì Imported {imported_count} companies...")
                
            except Exception as e:
                print(f"  ‚úó Error on row {idx + 2}: {str(e)}")
                continue
        
        self.db.commit()
        print(f"\\n‚úÖ Companies import completed!")
        
        return {"imported": imported_count, "errors": 0, "total_rows": len(df)}


def main():
    """Interactive migration"""
    print("=" * 70)
    print("üöÄ ImaraFund Data Migration")
    print("=" * 70)
    
    print("\\nüì¶ Initializing database...")
    init_db()
    
    with ImaraFundMigrator() as migrator:
        print("\\n" + "=" * 70)
        print("IMPORTING GRANTS")
        print("=" * 70)
        
        try:
            results = migrator.import_cleaned_grants()
            print(f"\\n‚úÖ Grants: {results['imported']}/{results['total_rows']}")
        except Exception as e:
            print(f"‚úó Grant import failed: {str(e)}")
        
        print("\\n" + "=" * 70)
        print("IMPORTING COMPANIES")
        print("=" * 70)
        
        try:
            results = migrator.import_companies()
            print(f"\\n‚úÖ Companies: {results['imported']}/{results['total_rows']}")
        except Exception as e:
            print(f"‚úó Company import failed: {str(e)}")
    
    print("\\n" + "=" * 70)
    print("‚úÖ Migration completed!")
    print("=" * 70)


if __name__ == "__main__":
    main()
'''
    return generator.write_file("Step 9", "migrations/migration_script.py", content)

# Step 10: Environment Configuration
def generate_env():
    content = '''# ImaraFund Environment Configuration

# Database Configuration
DATABASE_URL=sqlite:///./imarafund.db

# API Configuration
PROJECT_NAME=ImaraFund
API_V1_PREFIX=/api/v1
DEBUG=True

# AI Configuration - Add your actual Gemini API key here
GEMINI_API_KEY=your_gemini_api_key_here

# Matching Algorithm Weights (Your proven 40/30/20/10 system)
GEOGRAPHY_WEIGHT=0.40
SECTOR_WEIGHT=0.30
FUNDING_WEIGHT=0.20
STAGE_WEIGHT=0.10
'''
    return generator.write_file("Step 10", ".env", content)

# Generate remaining files
print("üîÑ Generating remaining backend files...\n")
generate_main()
generate_migration()
generate_env()

print(f"\n‚úÖ All 10 ImaraFund backend files generated successfully!")
print(f"üìÅ Files created: {len(generator.files_created)}")


üîÑ Generating remaining backend files...

‚úÖ Step 8: app/main.py
‚úÖ Step 9: migrations/migration_script.py
‚úÖ Step 10: .env

‚úÖ All 10 ImaraFund backend files generated successfully!
üìÅ Files created: 4


In [8]:
import os
from pathlib import Path

print("üîç Validating generated files...\n")

expected_files = [
    "app/database.py",
    "app/core/config.py", 
    "app/models.py",
    "app/schemas.py",
    "app/services/intelligent_matcher.py",
    "app/services/gemini_service.py",
    "app/api/endpoints.py",
    "app/main.py",
    "migrations/migration_script.py",
    ".env"
]

all_good = True
for file_path in expected_files:
    full_path = PROJECT_ROOT / file_path
    if full_path.exists():
        size = full_path.stat().st_size
        print(f"‚úÖ {file_path} ({size:,} bytes)")
    else:
        print(f"‚ùå {file_path} - NOT FOUND")
        all_good = False

if all_good:
    print(f"\nüéâ All {len(expected_files)} files successfully created!")
    print("\nüìã Ready for next steps:")
    print("   1. Edit .env with your GEMINI_API_KEY")
    print("   2. Run: python migrations/migration_script.py")
    print("   3. Start API: uvicorn app.main:app --reload")
else:
    print("\n‚ö†Ô∏è Some files are missing. Check the generation process.")

üîç Validating generated files...

‚ùå app/database.py - NOT FOUND
‚ùå app/core/config.py - NOT FOUND
‚ùå app/models.py - NOT FOUND
‚ùå app/schemas.py - NOT FOUND
‚ùå app/services/intelligent_matcher.py - NOT FOUND
‚ùå app/services/gemini_service.py - NOT FOUND
‚úÖ app/api/endpoints.py (6,778 bytes)
‚úÖ app/main.py (1,934 bytes)
‚úÖ migrations/migration_script.py (12,535 bytes)
‚úÖ .env (435 bytes)

‚ö†Ô∏è Some files are missing. Check the generation process.


## ‚úÖ ImaraFund Backend Generation Complete!

All 10 backend files have been successfully generated:

### **Generated Files:**
1. ‚úÖ `app/database.py` - SQLAlchemy database configuration
2. ‚úÖ `app/core/config.py` - Environment-based settings
3. ‚úÖ `app/models.py` - 63-column Grant + Company models
4. ‚úÖ `app/schemas.py` - Pydantic validation schemas
5. ‚úÖ `app/services/intelligent_matcher.py` - Your 40/30/20/10 algorithm
6. ‚úÖ `app/services/gemini_service.py` - Gemini 2.5 Flash AI service
7. ‚úÖ `app/api/endpoints.py` - RESTful API routes
8. ‚úÖ `app/main.py` - FastAPI application entry point
9. ‚úÖ `migrations/migration_script.py` - CSV data import script
10. ‚úÖ `.env` - Environment configuration file

### **Next Steps:**

#### 1. Configure Your API Key
Edit the `.env` file and replace `your_gemini_api_key_here` with your actual Gemini API key.

#### 2. Ensure Data is Ready
Make sure you have:
- `data/cleaned/grants_cleaned_latest.csv` (from the data cleaning script)
- `data/companies/synthetic_companies.csv` (your companies dataset)

#### 3. Install Dependencies
```bash
pip install -r requirements.txt


## üîß ImaraFund Missing Backend Files Generator

**Problem:** Only 4 out of 10 backend files were created (steps 7-10). Steps 1-6 were skipped.

**Solution:** This cell generates the 6 missing core backend files:

1. `app/database.py` - SQLAlchemy database configuration
2. `app/core/config.py` - Environment-based settings  
3. `app/models.py` - 63-column Grant + Company models
4. `app/schemas.py` - Pydantic validation schemas
5. `app/services/intelligent_matcher.py` - Your 40/30/20/10 algorithm
6. `app/services/gemini_service.py` - Gemini 2.5 Flash AI service

**Note:** This will NOT overwrite your existing files (endpoints.py, main.py, migration_script.py, .env).


In [9]:
"""
ImaraFund Missing Backend Files Generator
Generates the 6 missing core files while preserving existing ones
"""

from pathlib import Path

# Project configuration
PROJECT_ROOT = Path(r"D:\D1\WTF\ImaraFund")

def write_file(file_path, content, description):
    """Write a file with progress tracking and validation"""
    full_path = PROJECT_ROOT / file_path
    full_path.parent.mkdir(parents=True, exist_ok=True)
    
    with open(full_path, 'w', encoding='utf-8') as f:
        f.write(content.strip())
    
    size = full_path.stat().st_size
    print(f"‚úÖ {description}: {file_path} ({size:,} bytes)")
    return full_path

print("=" * 70)
print("üîß Generating Missing ImaraFund Backend Files")
print("=" * 70)
print()

# ============================================================================
# FILE 1: app/database.py
# ============================================================================
database_content = '''"""
ImaraFund Database Configuration
SQLAlchemy setup optimized for the matching algorithm
"""

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from app.core.config import settings

# Database engine with SQLite optimization
connect_args = {}
if "sqlite" in settings.DATABASE_URL:
    connect_args = {"check_same_thread": False}

engine = create_engine(
    settings.DATABASE_URL,
    connect_args=connect_args,
    echo=settings.DEBUG,
    pool_pre_ping=True
)

SessionLocal = sessionmaker(
    autocommit=False,
    autoflush=False,
    bind=engine
)

Base = declarative_base()


def get_db():
    """Database dependency for FastAPI endpoints"""
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()


def init_db():
    """Initialize all database tables"""
    Base.metadata.create_all(bind=engine)
    print("‚úÖ ImaraFund database tables created successfully!")


def drop_all_tables():
    """Development helper - use with caution!"""
    Base.metadata.drop_all(bind=engine)
    print("‚ö†Ô∏è All tables dropped!")
'''

write_file("app/database.py", database_content, "Database Config")

# ============================================================================
# FILE 2: app/core/config.py
# ============================================================================
config_content = '''"""
ImaraFund Configuration Management
Environment-based settings with your proven algorithm weights
"""

from pydantic_settings import BaseSettings
from typing import List


class Settings(BaseSettings):
    """ImaraFund application settings"""
    
    # Project Information
    PROJECT_NAME: str = "ImaraFund"
    VERSION: str = "1.0.0"
    DESCRIPTION: str = "AI-powered funding matcher for African SMEs"
    
    # API Configuration
    API_V1_PREFIX: str = "/api/v1"
    DEBUG: bool = False
    
    # Database
    DATABASE_URL: str = "sqlite:///./imarafund.db"
    
    # AI Configuration (Your Gemini 2.5 Flash setup)
    GEMINI_API_KEY: str = ""
    GEMINI_MODEL: str = "gemini-2.5-flash"
    
    # Your Proven Matching Algorithm Weights (40/30/20/10)
    GEOGRAPHY_WEIGHT: float = 0.40  # Most important for African SMEs
    SECTOR_WEIGHT: float = 0.30     # Business alignment
    FUNDING_WEIGHT: float = 0.20    # Financial feasibility
    STAGE_WEIGHT: float = 0.10      # Development stage compatibility
    
    # Security
    SECRET_KEY: str = "imarafund-secret-key-change-in-production"
    
    # CORS Settings
    ALLOWED_ORIGINS: List[str] = [
        "http://localhost:3000",
        "http://localhost:8000",
        "http://127.0.0.1:3000",
        "http://127.0.0.1:8000"
    ]
    
    class Config:
        env_file = ".env"
        case_sensitive = True


settings = Settings()
'''

write_file("app/core/config.py", config_content, "Configuration Settings")

# ============================================================================
# FILE 3: app/models.py (Complete 63-column structure)
# ============================================================================
models_content = '''"""
ImaraFund Database Models
Optimized for 63-column grants CSV and IntelligentMatcher algorithm
"""

from sqlalchemy import Column, Integer, String, Float, Boolean, Text, DateTime, JSON
from sqlalchemy.sql import func
from app.database import Base


class Grant(Base):
    """
    Grant model for ImaraFund's 63-column CSV structure
    Optimized for the 40/30/20/10 scoring algorithm
    """
    
    __tablename__ = "grants"
    
    # Primary Key
    id = Column(Integer, primary_key=True, index=True)
    
    # Core Identification
    program_id = Column(String(100), unique=True, index=True)
    program_name = Column(String(500), nullable=False, index=True)
    institution_name = Column(String(500), nullable=False, index=True)
    
    # Geographic Fields (CRITICAL for 40% geography scoring)
    country = Column(String(200), index=True)
    region = Column(String(200), index=True)
    geographic_scope = Column(String(200), index=True)
    
    # Financial Fields (CRITICAL for 20% funding scoring)
    currency_code = Column(String(10))
    estimated_value_amount = Column(Float)
    minimum_amount = Column(Float)
    maximum_amount = Column(Float)
    repayment_required = Column(Boolean, default=False, index=True)
    interest_rate = Column(String(50))
    
    # Sector Fields (CRITICAL for 30% sector scoring)
    program_type = Column(String(200))
    target_sectors = Column(Text, index=True)
    
    # Business Requirements
    duration_months = Column(Integer)
    minimum_employees = Column(Integer)
    maximum_employees = Column(Integer)
    minimum_revenue = Column(Float)
    maximum_revenue = Column(Float)
    
    # Application Process & Links (‚úÖ data_source_url filled from website_url by cleaning script)
    eligibility_criteria = Column(Text)
    application_process = Column(Text)
    application_deadline = Column(String(200))
    language_requirements = Column(String(200))
    website_url = Column(String(500))
    data_source_url = Column(String(500))  # Fixed by data cleaning script
    
    # Contact Information
    contact_email = Column(String(200))
    contact_phone = Column(String(100))
    
    # Demographics and Target Groups
    target_beneficiaries = Column(String(200))
    target_demographics = Column(String(200))
    age_restrictions = Column(String(100))
    gender_focus = Column(String(50))
    
    # Focus Areas (Boolean flags - cleaned from CSV)
    environmental_focus = Column(Boolean, default=False)
    innovation_focus = Column(Boolean, default=False)
    digital_focus = Column(Boolean, default=False)
    export_focus = Column(Boolean, default=False)
    women_focused = Column(Boolean, default=False, index=True)
    youth_focused = Column(Boolean, default=False, index=True)
    agriculture_focused = Column(Boolean, default=False, index=True)
    green_climate_focused = Column(Boolean, default=False)
    
    # Support Services
    technical_assistance = Column(Boolean, default=False)
    mentorship_available = Column(Boolean, default=False)
    networking_opportunities = Column(Boolean, default=False)
    training_provided = Column(Boolean, default=False)
    co_financing_required = Column(Boolean, default=False)
    co_financing_available = Column(Boolean, default=False)
    export_support = Column(Boolean, default=False)
    technology_innovation = Column(Boolean, default=False)
    digital_application = Column(Boolean, default=False)
    
    # Financial Terms
    collateral_required = Column(String(50))
    grace_period_months = Column(Integer)
    guarantee_coverage = Column(String(50))
    
    # Program Metrics and History
    success_rate = Column(Float)
    total_beneficiaries = Column(Integer)
    year_established = Column(Integer)
    funding_source = Column(String(500))
    program_start_date = Column(String(100))
    
    # Status and Verification
    verified = Column(Boolean, default=False, index=True)
    last_verified_date = Column(String(50))
    last_updated = Column(String(50))
    verification_date = Column(String(50))
    special_features = Column(Text)
    notes = Column(Text)
    
    # Flexible storage for additional CSV columns
    additional_data = Column(JSON)
    
    # System timestamps
    created_at = Column(DateTime, server_default=func.now())
    updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
    
    def __repr__(self):
        return f"<Grant(id={self.id}, name='{self.program_name}')>"


class Company(Base):
    """
    Company model matching your synthetic companies dataset structure
    Optimized for IntelligentMatcher algorithm
    """
    
    __tablename__ = "companies"
    
    # Primary Key
    id = Column(Integer, primary_key=True, index=True)
    
    # Core Identification
    company_id = Column(String(100), unique=True, index=True)
    company_name = Column(String(500), nullable=False, index=True)
    
    # Business Classification (CRITICAL for matching)
    sector = Column(String(200), nullable=False, index=True)
    business_stage = Column(String(100), nullable=False, index=True)
    innovation_level = Column(String(50))
    
    # Geographic Information (CRITICAL for 40% geography scoring)
    nationality = Column(String(100), nullable=False, index=True)
    business_registered_in = Column(String(100))
    
    # Founder Demographics
    founder_age = Column(Integer)
    founder_gender = Column(String(20))
    
    # Business Metrics
    business_age_months = Column(Integer)
    annual_revenue_usd = Column(Float)
    employees = Column(Integer)
    
    # Funding Requirements (CRITICAL for 20% funding scoring)
    funding_need_usd = Column(Float, nullable=False, index=True)
    has_prototype = Column(Boolean, default=False)
    targets_underserved = Column(Boolean, default=False)
    
    # System timestamps
    created_date = Column(String(50))
    created_at = Column(DateTime, server_default=func.now())
    updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
    
    def __repr__(self):
        return f"<Company(id={self.id}, name='{self.company_name}')>"
'''

write_file("app/models.py", models_content, "Database Models")

# ============================================================================
# FILE 4: app/schemas.py
# ============================================================================
schemas_content = '''"""
ImaraFund Pydantic Schemas
Request/Response models for API validation
"""

from pydantic import BaseModel, Field
from typing import List, Optional
from datetime import datetime


# Grant Schemas
class GrantBase(BaseModel):
    """Base grant schema with essential fields"""
    program_name: str = Field(..., min_length=1, max_length=500)
    institution_name: str
    country: Optional[str] = None
    target_sectors: Optional[str] = None
    estimated_value_amount: Optional[float] = None


class GrantResponse(GrantBase):
    """Schema for grant API responses"""
    id: int
    program_id: Optional[str]
    region: Optional[str]
    geographic_scope: Optional[str]
    repayment_required: bool
    program_type: Optional[str]
    website_url: Optional[str]
    data_source_url: Optional[str]  # ‚úÖ Fixed by data cleaning
    women_focused: bool
    youth_focused: bool
    agriculture_focused: bool
    verified: bool
    created_at: datetime
    
    class Config:
        from_attributes = True


# Company Schemas
class CompanyBase(BaseModel):
    """Base company schema"""
    company_name: str = Field(..., min_length=1, max_length=500)
    sector: str = Field(..., min_length=1, max_length=200)
    nationality: str = Field(..., min_length=2, max_length=100)
    business_stage: str = Field(..., min_length=1, max_length=100)
    funding_need_usd: float = Field(..., gt=0)


class CompanyCreate(CompanyBase):
    """Schema for creating a new company"""
    company_id: Optional[str] = None
    business_registered_in: Optional[str] = None
    founder_age: Optional[int] = Field(None, ge=18, le=100)
    founder_gender: Optional[str] = None
    business_age_months: Optional[int] = Field(None, ge=0)
    annual_revenue_usd: Optional[float] = Field(None, ge=0)
    employees: Optional[int] = Field(None, ge=0)
    innovation_level: Optional[str] = None
    has_prototype: bool = False
    targets_underserved: bool = False


class CompanyResponse(CompanyBase):
    """Schema for company API responses"""
    id: int
    company_id: Optional[str]
    founder_age: Optional[int]
    business_age_months: Optional[int]
    annual_revenue_usd: Optional[float]
    employees: Optional[int]
    created_at: datetime
    
    class Config:
        from_attributes = True


# Matching Schemas (Your IntelligentMatcher scoring breakdown)
class ScoreBreakdown(BaseModel):
    """Your exact IntelligentMatcher scoring breakdown (40/30/20/10)"""
    geographic: float = Field(..., ge=0, le=40, description="Geography match (0-40 points)")
    sector: float = Field(..., ge=0, le=30, description="Sector alignment (0-30 points)")
    amount_fit: float = Field(..., ge=0, le=20, description="Funding amount fit (0-20 points)")
    stage: float = Field(..., ge=0, le=10, description="Business stage (0-10 points)")


class MatchResult(BaseModel):
    """Individual match result with grant details and scoring"""
    program_name: str
    institution: str
    country: str
    funding_amount: float
    match_score: float = Field(..., ge=0, le=100)
    score_breakdown: ScoreBreakdown
    target_sectors: str
    website: str
    data_source_url: str  # ‚úÖ Fixed by data cleaning
    repayment_required: str
    grant_details: GrantResponse


class MatchResponse(BaseModel):
    """Complete matching response with AI recommendation"""
    company: CompanyResponse
    matches: List[MatchResult]
    ai_recommendation: Optional[str] = None
    total_matches_found: int
    algorithm_version: str = "ImaraFund v1.0 (40/30/20/10)"
'''

write_file("app/schemas.py", schemas_content, "Pydantic Schemas")

# ============================================================================
# FILE 5: app/services/intelligent_matcher.py (Your exact algorithm)
# ============================================================================
matcher_content = '''"""
ImaraFund Intelligent Matching Service
Your exact IntelligentMatcher algorithm ported to SQLAlchemy
"""

from typing import List, Tuple, Dict
from sqlalchemy.orm import Session
from app.models import Grant, Company
import logging

logger = logging.getLogger(__name__)


class IntelligentMatcher:
    """
    Your proven matching algorithm integrated with ImaraFund database
    Preserves exact scoring logic: 40% Geography, 30% Sector, 20% Funding, 10% Stage
    """
    
    def __init__(self, db: Session):
        self.db = db
        logger.info("ImaraFund IntelligentMatcher initialized")
    
    def find_matches(self, company_id: int, top_n: int = 5) -> Tuple[Company, List[Dict]]:
        """Find best matching grants using your exact scoring algorithm"""
        company = self.db.query(Company).filter(Company.id == company_id).first()
        if not company:
            raise ValueError(f"Company with ID {company_id} not found")
        
        grants = self.db.query(Grant).all()
        logger.info(f"Processing {len(grants)} grants for company {company.company_name}")
        
        matches = []
        for grant in grants:
            score, breakdown = self._calculate_match_score(company, grant)
            
            if score > 30:  # Your threshold from the original script
                matches.append({
                    'grant': grant,
                    'program_name': grant.program_name or 'Unknown Program',
                    'institution': grant.institution_name or 'Unknown Institution',
                    'country': grant.country or 'Unknown',
                    'funding_amount': grant.estimated_value_amount or 0,
                    'match_score': round(score, 1),
                    'score_breakdown': breakdown,
                    'target_sectors': grant.target_sectors or 'General',
                    'website': grant.website_url or 'Not available',
                    'data_source_url': grant.data_source_url or 'Not available',
                    'repayment_required': str(grant.repayment_required) if grant.repayment_required is not None else 'Unknown'
                })
        
        matches_sorted = sorted(matches, key=lambda x: x['match_score'], reverse=True)[:top_n]
        logger.info(f"Found {len(matches)} matches above threshold, returning top {len(matches_sorted)}")
        return company, matches_sorted
    
    def _calculate_match_score(self, company: Company, grant: Grant) -> Tuple[float, Dict]:
        """Your exact scoring algorithm (0-100 points)"""
        score = 0.0
        breakdown = {}
        
        # 1. Geographic Match (40 points) - Most important
        geo_score = self._score_geography(company, grant)
        score += geo_score
        breakdown['geographic'] = geo_score
        
        # 2. Sector Match (30 points)
        sector_score = self._score_sector(company, grant)
        score += sector_score
        breakdown['sector'] = sector_score
        
        # 3. Funding Amount Fit (20 points)
        amount_score = self._score_funding_amount(company, grant)
        score += amount_score
        breakdown['amount_fit'] = amount_score
        
        # 4. Stage Bonus (10 points)
        stage_score = self._score_business_stage(company, grant)
        score += stage_score
        breakdown['stage'] = stage_score
        
        return min(100.0, score), breakdown
    
    def _score_geography(self, company: Company, grant: Grant) -> float:
        """Score geographic eligibility (0-40 points) - Your exact logic"""
        company_country = str(company.nationality or '').lower().strip()
        grant_scope = str(grant.geographic_scope or '').lower().strip()
        grant_country = str(grant.country or '').lower().strip()
        
        # Global programs get full points
        if 'global' in grant_scope:
            return 40.0
        
        # Exact country match
        if company_country in grant_country or company_country in grant_scope:
            return 40.0
        
        # Regional matches - Your exact Africa countries list
        africa_countries = [
            'nigeria', 'kenya', 'south africa', 'ghana', 'uganda', 'egypt',
            'tanzania', 'rwanda', 'ethiopia', 'senegal', 'botswana', 'zambia',
            'zimbabwe', 'morocco', 'tunisia', 'algeria', 'libya', 'cameroon',
            'ivory coast', 'mali', 'burkina faso', 'niger', 'madagascar'
        ]
        
        if company_country in africa_countries:
            if 'africa' in grant_scope or 'african' in grant_scope:
                return 35.0
        
        return 0.0
    
    def _score_sector(self, company: Company, grant: Grant) -> float:
        """Score sector alignment (0-30 points) - Your exact logic"""
        company_sector = str(company.sector or '').lower().strip()
        target_sectors = str(grant.target_sectors or '').lower().strip()
        
        # All sectors accepted
        if any(keyword in target_sectors for keyword in ['all', 'general', 'any']):
            return 25.0
        
        # Exact sector match
        if company_sector in target_sectors:
            return 30.0
        
        # Partial match (e.g., "tech" in "technology") - Your logic
        sector_words = company_sector.split()
        if any(word in target_sectors for word in sector_words if len(word) > 3):
            return 20.0
        
        return 10.0
    
    def _score_funding_amount(self, company: Company, grant: Grant) -> float:
        """Score funding amount fit (0-20 points) - Your exact logic"""
        need = company.funding_need_usd or 0.0
        available = grant.estimated_value_amount or 0.0
        
        if available == 0 or need == 0:
            return 15.0  # Unknown amount gets partial credit - your logic
        
        ratio = need / available
        
        # Perfect fit: need is 10%-200% of available - your ranges
        if 0.1 <= ratio <= 2.0:
            return 20.0
        
        # Good fit: need is 5%-500% of available - your ranges
        elif 0.05 <= ratio <= 5.0:
            return 15.0
        
        # Poor fit but not impossible - your logic
        else:
            return 8.0
    
    def _score_business_stage(self, company: Company, grant: Grant) -> float:
        """Score business stage fit (0-10 points) - Your exact logic"""
        stage = str(company.business_stage or '').lower().strip()
        
        # Most grants are flexible on stage - your comment
        if stage in ['startup', 'early growth']:
            return 10.0
        elif stage == 'idea':
            return 8.0
        elif stage in ['growth', 'scale-up', 'expansion']:
            return 9.0
        else:
            return 7.0
    
    def get_company_profile_dict(self, company: Company) -> Dict:
        """Convert Company model to dict for AI service - matches your format"""
        return {
            'company_name': company.company_name,
            'sector': company.sector,
            'nationality': company.nationality,
            'business_stage': company.business_stage,
            'funding_need_usd': company.funding_need_usd,
            'founder_age': company.founder_age,
            'founder_gender': company.founder_gender,
            'business_age_months': company.business_age_months,
            'annual_revenue_usd': company.annual_revenue_usd,
            'employees': company.employees
        }
'''

write_file("app/services/intelligent_matcher.py", matcher_content, "Intelligent Matcher")

# ============================================================================
# FILE 6: app/services/gemini_service.py (Your proven prompts)
# ============================================================================
gemini_content = '''"""
ImaraFund AI Recommendation Service
Using your proven Gemini 2.5 Flash prompts and configuration
"""

from typing import Dict
import logging

try:
    import google.generativeai as genai
    GEMINI_AVAILABLE = True
except ImportError:
    GEMINI_AVAILABLE = False
    logging.warning("google-generativeai not installed")

from app.core.config import settings

logger = logging.getLogger(__name__)


class GeminiService:
    """Your proven AI recommendation service using Gemini 2.5 Flash"""
    
    def __init__(self):
        self.ai_enabled = False
        
        if not GEMINI_AVAILABLE:
            logger.warning("‚ö†Ô∏è google-generativeai not installed. AI recommendations disabled.")
            return
        
        api_key = settings.GEMINI_API_KEY
        
        if api_key:
            try:
                genai.configure(api_key=api_key)
                self.model = genai.GenerativeModel(settings.GEMINI_MODEL)
                self.ai_enabled = True
                logger.info("‚úÖ ImaraFund Gemini AI enabled!")
            except Exception as e:
                logger.warning(f"‚ö†Ô∏è AI setup failed: {e}")
        else:
            logger.info("‚ÑπÔ∏è AI disabled - Add GEMINI_API_KEY to .env")
    
    def get_ai_recommendation(self, company_profile: Dict, match: Dict) -> str:
        """Generate clear, simple AI recommendation using your exact prompt"""
        if not self.ai_enabled:
            return "üîë Add your Gemini API key to .env file to get AI-powered recommendations!"
        
        # Your exact prompt that works - preserved completely
        prompt = f"""You are a friendly business advisor helping someone who is NOT a finance expert.

COMPANY:
- Business: {company_profile.get('company_name', 'Startup')}
- What they do: {company_profile.get('sector', 'Unknown')}
- Location: {company_profile.get('nationality', 'Unknown')}
- Stage: {company_profile.get('business_stage', 'Unknown')}
- Money needed: ${company_profile.get('funding_need_usd', 0):,}

FUNDING MATCH:
- Program: {match['program_name']}
- Institution: {match['institution']}
- Amount: ${match['funding_amount']:,}
- Match Score: {match['match_score']}/100

Write advice using SIMPLE language that anyone can understand. Include these 4 sections:

**WHY THIS WORKS:**
Explain in 2-3 simple sentences why this funding fits their business. Use everyday words.

**WHAT TO DO NEXT:**
Give 3 specific actions they can take today. Use simple words like "create a budget" not "develop financial projections."

**WATCH OUT FOR:**
Mention 1-2 realistic challenges in plain English. Be honest but encouraging.

**YOUR CHANCES:**
Say "Excellent", "Good", "Fair", or "Challenging" and explain why in one sentence.

Use everyday words. No jargon. Be encouraging but honest. Keep under 200 words."""

        try:
            response = self.model.generate_content(
                prompt,
                generation_config={
                    'temperature': 0.8,
                    'top_p': 0.9,
                    'max_output_tokens': 600,
                }
            )
            return response.text
        
        except Exception as e:
            error_msg = str(e)
            logger.error(f"ImaraFund Gemini AI error: {error_msg}")
            
            # Your exact error handling logic
            if "quota" in error_msg.lower() or "rate" in error_msg.lower():
                return "‚ö†Ô∏è Too many requests. Free tier: 15 requests/minute. Please wait 60 seconds."
            elif "404" in error_msg or "not found" in error_msg.lower():
                return "‚ö†Ô∏è Model not available. Check your API key."
            else:
                return f"‚ö†Ô∏è AI temporarily unavailable: {error_msg}"
'''

write_file("app/services/gemini_service.py", gemini_content, "Gemini AI Service")

print()
print("=" * 70)
print("‚úÖ ALL MISSING FILES GENERATED SUCCESSFULLY!")
print("=" * 70)
print()
print("üìã Generated Files (6/6):")
print("   1. ‚úÖ app/database.py - Database configuration")
print("   2. ‚úÖ app/core/config.py - Settings management")
print("   3. ‚úÖ app/models.py - 63-column Grant + Company models")
print("   4. ‚úÖ app/schemas.py - Pydantic validation schemas")
print("   5. ‚úÖ app/services/intelligent_matcher.py - Your 40/30/20/10 algorithm")
print("   6. ‚úÖ app/services/gemini_service.py - Gemini 2.5 Flash AI service")
print()
print("üìã Already Existing (4/4):")
print("   7. ‚úÖ app/api/endpoints.py")
print("   8. ‚úÖ app/main.py")
print("   9. ‚úÖ migrations/migration_script.py")
print("   10. ‚úÖ .env")
print()
print("üéâ Your ImaraFund backend is now complete!")


üîß Generating Missing ImaraFund Backend Files

‚úÖ Database Config: app/database.py (1,182 bytes)
‚úÖ Configuration Settings: app/core/config.py (1,422 bytes)
‚úÖ Database Models: app/models.py (6,159 bytes)
‚úÖ Pydantic Schemas: app/schemas.py (3,625 bytes)
‚úÖ Intelligent Matcher: app/services/intelligent_matcher.py (7,371 bytes)
‚úÖ Gemini AI Service: app/services/gemini_service.py (3,776 bytes)

‚úÖ ALL MISSING FILES GENERATED SUCCESSFULLY!

üìã Generated Files (6/6):
   1. ‚úÖ app/database.py - Database configuration
   2. ‚úÖ app/core/config.py - Settings management
   3. ‚úÖ app/models.py - 63-column Grant + Company models
   4. ‚úÖ app/schemas.py - Pydantic validation schemas
   5. ‚úÖ app/services/intelligent_matcher.py - Your 40/30/20/10 algorithm
   6. ‚úÖ app/services/gemini_service.py - Gemini 2.5 Flash AI service

üìã Already Existing (4/4):
   7. ‚úÖ app/api/endpoints.py
   8. ‚úÖ app/main.py
   9. ‚úÖ migrations/migration_script.py
   10. ‚úÖ .env

üéâ Your ImaraFu

In [10]:
# Final validation that all files are now present
import os
from pathlib import Path

print("üîç Final validation of all ImaraFund backend files...\n")

expected_files = [
    "app/database.py",
    "app/core/config.py", 
    "app/models.py",
    "app/schemas.py",
    "app/services/intelligent_matcher.py",
    "app/services/gemini_service.py",
    "app/api/endpoints.py",
    "app/main.py",
    "migrations/migration_script.py",
    ".env"
]

all_good = True
total_size = 0

for file_path in expected_files:
    full_path = PROJECT_ROOT / file_path
    if full_path.exists():
        size = full_path.stat().st_size
        total_size += size
        print(f"‚úÖ {file_path} ({size:,} bytes)")
    else:
        print(f"‚ùå {file_path} - NOT FOUND")
        all_good = False

if all_good:
    print(f"\n{'=' * 70}")
    print(f"üéâ SUCCESS! All {len(expected_files)} backend files are present!")
    print(f"üì¶ Total backend size: {total_size:,} bytes")
    print(f"{'=' * 70}")
    print("\nüìã Next Steps:")
    print("   1. ‚úÖ Edit .env with your GEMINI_API_KEY")
    print("   2. ‚úÖ Ensure data exists: data/cleaned/grants_cleaned_latest.csv")
    print("   3. ‚úÖ Run migration: python migrations/migration_script.py")
    print("   4. ‚úÖ Start API: uvicorn app.main:app --reload")
    print("   5. ‚úÖ Visit docs: http://localhost:8000/docs")
    print(f"\nüöÄ ImaraFund is ready for deployment!")
else:
    print("\n‚ö†Ô∏è Some files are still missing. Please re-run the generator cell above.")


üîç Final validation of all ImaraFund backend files...

‚úÖ app/database.py (1,182 bytes)
‚úÖ app/core/config.py (1,422 bytes)
‚úÖ app/models.py (6,159 bytes)
‚úÖ app/schemas.py (3,625 bytes)
‚úÖ app/services/intelligent_matcher.py (7,371 bytes)
‚úÖ app/services/gemini_service.py (3,776 bytes)
‚úÖ app/api/endpoints.py (6,778 bytes)
‚úÖ app/main.py (1,934 bytes)
‚úÖ migrations/migration_script.py (12,535 bytes)
‚úÖ .env (435 bytes)

üéâ SUCCESS! All 10 backend files are present!
üì¶ Total backend size: 45,217 bytes

üìã Next Steps:
   1. ‚úÖ Edit .env with your GEMINI_API_KEY
   2. ‚úÖ Ensure data exists: data/cleaned/grants_cleaned_latest.csv
   3. ‚úÖ Run migration: python migrations/migration_script.py
   4. ‚úÖ Start API: uvicorn app.main:app --reload
   5. ‚úÖ Visit docs: http://localhost:8000/docs

üöÄ ImaraFund is ready for deployment!


In [11]:
"""
Comprehensive validation of data files and system readiness
"""
from pathlib import Path
import pandas as pd

PROJECT_ROOT = Path(r"D:\D1\WTF\ImaraFund")

print("=" * 70)
print("üìä ImaraFund System Validation")
print("=" * 70)

# 1. Validate cleaned grants data
grants_paths = [
    PROJECT_ROOT / "data/cleaned/grants_cleaned_latest.csv",
    PROJECT_ROOT / "data/cleaned/grants_cleaned.csv"
]

grants_found = False
for path in grants_paths:
    if path.exists():
        try:
            df = pd.read_csv(path)
            print(f"‚úÖ Grants data: {path.name} ({len(df)} records)")
            
            # Validate key columns for matching algorithm
            key_columns = ['program_name', 'country', 'target_sectors', 'estimated_value_amount']
            missing_cols = [col for col in key_columns if col not in df.columns]
            
            if not missing_cols:
                print("   ‚úÖ All key matching columns present")
            else:
                print(f"   ‚ö†Ô∏è Missing columns: {missing_cols}")
            
            # Check data_source_url fix
            if 'data_source_url' in df.columns:
                missing_urls = df['data_source_url'].isna().sum()
                print(f"   ‚úÖ data_source_url: {len(df) - missing_urls}/{len(df)} filled ({missing_urls} missing)")
            
            grants_found = True
            break
        except Exception as e:
            print(f"   ‚ùå Error reading {path.name}: {e}")

if not grants_found:
    print("‚ùå No valid grants data found!")
    print("   Run the data cleaning script first")

# 2. Validate companies data
companies_path = PROJECT_ROOT / "data/companies/synthetic_companies.csv"
if companies_path.exists():
    try:
        df_companies = pd.read_csv(companies_path)
        print(f"‚úÖ Companies data: {companies_path.name} ({len(df_companies)} records)")
    except Exception as e:
        print(f"‚ùå Error reading companies data: {e}")
else:
    print("‚ö†Ô∏è Companies data not found - place synthetic_companies.csv in data/companies/")

# 3. Validate backend files
backend_files = [
    "app/database.py", "app/core/config.py", "app/models.py", "app/schemas.py",
    "app/services/intelligent_matcher.py", "app/services/gemini_service.py",
    "app/api/endpoints.py", "app/main.py", "migrations/migration_script.py", ".env"
]

all_files_present = True
for file_path in backend_files:
    full_path = PROJECT_ROOT / file_path
    if full_path.exists():
        size = full_path.stat().st_size
        print(f"‚úÖ {file_path} ({size:,} bytes)")
    else:
        print(f"‚ùå {file_path} - MISSING")
        all_files_present = False

print("\n" + "=" * 70)
if grants_found and companies_path.exists() and all_files_present:
    print("üéâ System validation complete - Ready for deployment!")
else:
    print("‚ö†Ô∏è Validation issues found - address before deployment")
print("=" * 70)


üìä ImaraFund System Validation
‚úÖ Grants data: grants_cleaned_latest.csv (103 records)
   ‚úÖ All key matching columns present
   ‚úÖ data_source_url: 103/103 filled (0 missing)
‚úÖ Companies data: synthetic_companies.csv (50 records)
‚úÖ app/database.py (1,182 bytes)
‚úÖ app/core/config.py (1,422 bytes)
‚úÖ app/models.py (6,159 bytes)
‚úÖ app/schemas.py (3,625 bytes)
‚úÖ app/services/intelligent_matcher.py (7,371 bytes)
‚úÖ app/services/gemini_service.py (3,776 bytes)
‚úÖ app/api/endpoints.py (6,778 bytes)
‚úÖ app/main.py (1,934 bytes)
‚úÖ migrations/migration_script.py (12,535 bytes)
‚úÖ .env (450 bytes)

üéâ System validation complete - Ready for deployment!


In [None]:
"""
Configure and validate environment settings
"""
from pathlib import Path
import os

PROJECT_ROOT = Path(r"D:\D1\WTF\ImaraFund")
env_path = PROJECT_ROOT / ".env"

print("üîë Environment Configuration")
print("=" * 70)

# Check if .env exists and validate
if env_path.exists():
    with open(env_path, 'r') as f:
        env_content = f.read()
    
    # Check API key configuration
    if "your_gemini_api_key_here" in env_content:
        print("‚ö†Ô∏è GEMINI_API_KEY needs configuration!")
        print("\nüìã To configure your API key:")
        print("1. Get your key from: https://makersuite.google.com/app/apikey")
        print("2. Replace 'your_gemini_api_key_here' with your actual key")
        print("\nOr run this after getting your key:")
        print("```python")
        print("# Replace with your actual key")
        print("actual_key = 'AIza...'")
        print("")
        print("with open('.env', 'r') as f:")
        print("    content = f.read()")
        print("content = content.replace('your_gemini_api_key_here', actual_key)")
        print("with open('.env', 'w') as f:")
        print("    f.write(content)")
        print("```")
    else:
        print("‚úÖ GEMINI_API_KEY appears to be configured")
        
    # Display current configuration (masked)
    print("\nüìã Current configuration:")
    for line in env_content.split('\n'):
        if line.strip() and not line.startswith('#'):
            if 'API_KEY' in line and '=' in line:
                key, value = line.split('=', 1)
                masked = value[:6] + '...' + value[-4:] if len(value) > 10 else '***'
                print(f"   {key}={masked}")
            else:
                print(f"   {line}")
else:
    print("‚ùå .env file not found!")

print("=" * 70)
