# ODF EU Funding Analysis - With Comprehensive Logging

This notebook processes EU funding opportunities and analyzes their relevance to ODF projects.
Enhanced with comprehensive logging for better execution tracking.

In [None]:
# Import logging configuration
import sys
import os

# Add current directory to path to import our logging module
sys.path.append('.')

from logging_config import setup_notebook_logging, ODFLogger
import pandas as pd
import numpy as np
from datetime import datetime

# Setup logging for this notebook
logger = setup_notebook_logging("LLMODF_ANALYSIS", "INFO")
odf_logger = ODFLogger("LLMODF_ANALYSIS")
odf_logger.logger = logger

logger.info("Notebook initialized with logging")
logger.info(f"Working directory: {os.getcwd()}")
logger.info(f"Python version: {sys.version}")

## 1. Data Loading and Initial Processing

In [None]:
odf_logger.log_section_start("Data Loading")

def parse_project_file(filepath):
    """Parse project file with logging"""
    logger.info(f"Parsing project file: {filepath}")
    
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            lines = [line.strip() for line in f if line.strip()]
        
        logger.info(f"Read {len(lines)} non-empty lines from file")
        
        data = []
        for i in range(0, len(lines), 5):
            if i + 4 < len(lines):  # Ensure we have all 5 lines
                data.append({
                    "Title": lines[i].split(":", 1)[1].strip() if ":" in lines[i] else lines[i],
                    "Link": lines[i+1].split(":", 1)[1].strip() if ":" in lines[i+1] else lines[i+1],
                    "Status": lines[i+2].split(":", 1)[1].strip() if ":" in lines[i+2] else lines[i+2],
                    "Start_date": lines[i+3].split(":", 1)[1].strip() if ":" in lines[i+3] else lines[i+3],
                    "Deadline": lines[i+4].split(":", 1)[1].strip() if ":" in lines[i+4] else lines[i+4]
                })
        
        logger.info(f"Successfully parsed {len(data)} project records")
        return pd.DataFrame(data)
        
    except FileNotFoundError:
        logger.error(f"File not found: {filepath}")
        return pd.DataFrame()
    except Exception as e:
        logger.error(f"Error parsing file {filepath}: {str(e)}")
        return pd.DataFrame()

# Try to load the project file
project_files = ["projets_EU_All.txt", "AllProjects_ue.txt"]
df = pd.DataFrame()

for filename in project_files:
    if os.path.exists(filename):
        logger.info(f"Found project file: {filename}")
        df = parse_project_file(filename)
        break
    else:
        logger.warning(f"Project file not found: {filename}")

if df.empty:
    logger.error("No project data could be loaded")
else:
    odf_logger.log_dataframe_info(df, "EU Projects DataFrame")

odf_logger.log_section_end("Data Loading")

## 2. ODF Dataset Creation

In [None]:
odf_logger.log_section_start("ODF Dataset Creation")

# ODF project data
logger.info("Creating ODF project dataset...")

odf_data = [
    {
        "Project Name": "FACTORIAT",
        "Description": "Support Deeptech & Hardware startups in prototyping and tech maturation with technical and financial help.",
        "Period": "2022–2023",
        "Axes / Thematic Areas": "Deeptech, Hardware, Incubation, Prototyping, Acceleration",
        "Region": "Tunisia",
        "Partners / Funders": "Industrial partners, ODF network",
        "Key Figures / Impact": "7 prototypes, 4 pre-industrial units"
    },
    {
        "Project Name": "National Entrepreneurship Program",
        "Description": "Design and implementation of Lesotho's national entrepreneurship ecosystem.",
        "Period": "2022–2025",
        "Axes / Thematic Areas": "Capacity building, Strategy, Startup Ecosystem",
        "Region": "Lesotho",
        "Partners / Funders": "Local government, ODF",
        "Key Figures / Impact": "500 entrepreneurs, 15 ESOs supported"
    },
    {
        "Project Name": "Arab Bank Strategy",
        "Description": "Develop funding & partnership strategy for African digital economy projects.",
        "Period": "N/A",
        "Axes / Thematic Areas": "Digital Economy, Strategic Development, Financing",
        "Region": "Sub-Saharan Africa",
        "Partners / Funders": "Arab Bank for Economic Development in Africa",
        "Key Figures / Impact": "Strategy developed"
    },
    {
        "Project Name": "World Bank Collaboration",
        "Description": "Develop startup ecosystem in 5 Southern African countries including Lesotho.",
        "Period": "2020–2022",
        "Axes / Thematic Areas": "Startup Ecosystem, Digital Economy, Entrepreneurship",
        "Region": "Southern Africa",
        "Partners / Funders": "World Bank",
        "Key Figures / Impact": "SA, Lesotho, Namibia, Botswana, Eswatini"
    },
    {
        "Project Name": "MDBAN – Business Angels Network",
        "Description": "Support early-stage Maghreb startups via diaspora angel investment.",
        "Period": "2021–Present",
        "Axes / Thematic Areas": "Startup Support, Investment, Diaspora Engagement",
        "Region": "MENA, Diaspora",
        "Partners / Funders": "MDBAN, ODF",
        "Key Figures / Impact": "56 startups financed, 33 angels"
    },
    {
        "Project Name": "BIATLABS",
        "Description": "Incubation program by BIAT Bank, run by ODF.",
        "Period": "2016–2018",
        "Axes / Thematic Areas": "Startup Incubation, Early-Stage Innovation",
        "Region": "Tunisia",
        "Partners / Funders": "BIAT (Private Bank)",
        "Key Figures / Impact": "4 cohorts, 45 startups, 15 labeled, 6 funded"
    },
    {
        "Project Name": "TECHNORIAT PPP Program",
        "Description": "Bridge research & entrepreneurship via incubation/acceleration of researchers.",
        "Period": "2021–2023",
        "Axes / Thematic Areas": "Scientific Research, Deeptech, Acceleration, Entrepreneurship",
        "Region": "Tunisia",
        "Partners / Funders": "TECHNORIAT, PPP",
        "Key Figures / Impact": "800 sensitized, 136 preselected, 13 incubated, 8 accelerated"
    },
    {
        "Project Name": "ABI – Applied Biotech & Innovation",
        "Description": "Turn biotech discoveries into marketable solutions with IP licensing model.",
        "Period": "N/A",
        "Axes / Thematic Areas": "Biotech, One Health, IP Licensing, Innovation",
        "Region": "N/A",
        "Partners / Funders": "ODF internal program",
        "Key Figures / Impact": "IP Model: License IN → Maturation → License OUT"
    }
]

# Create ODF DataFrame
odf_df = pd.DataFrame(odf_data)
logger.info(f"Created ODF dataset with {len(odf_df)} projects")
odf_logger.log_dataframe_info(odf_df, "ODF Projects DataFrame")

# Save ODF dataset
try:
    odf_df.to_csv("ODF_project_dataset.csv", index=False)
    odf_logger.log_file_operation("CSV export", "ODF_project_dataset.csv", True)
except Exception as e:
    odf_logger.log_file_operation("CSV export", "ODF_project_dataset.csv", False, str(e))

odf_logger.log_section_end("ODF Dataset Creation")

## 3. Keywords and Matching Setup

In [None]:
odf_logger.log_section_start("Keywords Setup")

# Load keywords dictionary
logger.info("Setting up keywords dictionary for matching...")

all_keywords_text = {
    "strategic consulting": "conseil stratégique",
    "technical assistance": "assistance technique",
    "institutional support": "appui institutionnel",
    "tailored support": "accompagnement personnalisé",
    "innovation support": "accompagnement à l'innovation",
    "fundraising support": "accompagnement à la levée de fonds",
    "organizational development": "développement organisationnel",
    "capacity building": "développement de capacités",
    "program structuring": "structuration de programme",
    "project design": "ingénierie de projet",
    "project management": "gestion de projet",
    "program steering": "pilotage de programme",
    "growth strategy": "stratégie de croissance",
    "sector expertise": "expertise sectorielle",
    "project evaluation": "évaluation de projets",
    "due diligence": "due diligence",
    "project implementation": "mise en œuvre de projet",
    "roadmap development": "élaboration de feuille de route",
    "financial engineering": "ingénierie financière",
    "strategic diagnosis": "diagnostic stratégique",
    "monitoring and evaluation": "suivi-évaluation",
    "operational action plan": "plan d'action opérationnel",
    "public-private partnership": "partenariat public-privé",
    "ecosystem animation": "animation d'écosystème",
    "impact analysis": "analyse d'impact",
    "partnership facilitation": "facilitation de partenariats",
    "stakeholder mapping": "cartographie des acteurs",
    "knowledge transfer": "transfert de compétences",
    "collaborative innovation": "innovation collaborative",
    "open innovation": "open innovation",
    "deeptech": "deeptech",
    "biotechnology": "biotechnologie",
    "artificial intelligence": "intelligence artificielle",
    "machine learning": "machine learning",
    "sustainable development": "développement durable",
    "digital transformation": "transformation numérique",
    "startup ecosystem": "écosystème startup",
    "entrepreneurship": "entrepreneuriat",
    "incubation": "incubation",
    "acceleration": "accélération"
}

logger.info(f"Loaded {len(all_keywords_text)} keyword pairs for matching")
logger.info(f"Sample keywords: {list(all_keywords_text.keys())[:5]}")

# Save keywords to file for reference
try:
    with open('mots_cles_ODF.txt', 'w', encoding='utf-8') as f:
        for en_key, fr_key in all_keywords_text.items():
            f.write(f"{en_key} : {fr_key}\n")
    odf_logger.log_file_operation("Keywords export", "mots_cles_ODF.txt", True)
except Exception as e:
    odf_logger.log_file_operation("Keywords export", "mots_cles_ODF.txt", False, str(e))

odf_logger.log_section_end("Keywords Setup")

## 4. Web Scraping Analysis (Simulated)

This section would normally contain the web scraping logic. For demonstration, we'll simulate the process.

In [None]:
odf_logger.log_section_start("Web Scraping Analysis")

# Simulate web scraping results
logger.info("Simulating web scraping analysis...")

if not df.empty:
    # Create a sample analysis result
    sample_results = []
    
    for i, row in df.head(10).iterrows():  # Process first 10 for demo
        logger.info(f"Analyzing project {i+1}: {row['Title'][:50]}...")
        
        # Simulate keyword matching
        matching_keywords = []
        project_text = f"{row['Title']} {row.get('Description', '')}".lower()
        
        for keyword in all_keywords_text.keys():
            if keyword.lower() in project_text:
                matching_keywords.append(keyword)
        
        # Determine relevance
        is_relevant = len(matching_keywords) > 0
        
        sample_results.append({
            'URL': row['Link'],
            'Title': row['Title'],
            'Status': row['Status'],
            'Start_date': row['Start_date'],
            'Deadline': row['Deadline'],
            'Pertinence': 'Yes' if is_relevant else 'No',
            'Matching Word(s)': ', '.join(matching_keywords) if matching_keywords else 'None'
        })
        
        if is_relevant:
            logger.info(f"  ✓ Relevant - Keywords: {', '.join(matching_keywords)}")
        else:
            logger.info(f"  ✗ Not relevant")
    
    # Create results DataFrame
    df_analysis = pd.DataFrame(sample_results)
    
    # Log analysis statistics
    relevant_count = len(df_analysis[df_analysis['Pertinence'] == 'Yes'])
    total_count = len(df_analysis)
    
    logger.info(f"Analysis completed:")
    logger.info(f"  - Total projects analyzed: {total_count}")
    logger.info(f"  - Relevant projects: {relevant_count}")
    logger.info(f"  - Relevance rate: {(relevant_count/total_count)*100:.1f}%")
    
    odf_logger.log_dataframe_info(df_analysis, "Analysis Results DataFrame")
    
    # Save results
    try:
        df_analysis.to_csv('projects_with_pertinence.csv', index=False)
        odf_logger.log_file_operation("Analysis results export", "projects_with_pertinence.csv", True)
    except Exception as e:
        odf_logger.log_file_operation("Analysis results export", "projects_with_pertinence.csv", False, str(e))
        
else:
    logger.warning("No data available for analysis")
    df_analysis = pd.DataFrame()

odf_logger.log_section_end("Web Scraping Analysis")