In [17]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

chrome_options = Options()
chrome_options.add_argument("--start-maximized")

service = Service(r"C:\Users\HADIL MARAI\Desktop\ODF\chromedriver-win64\chromedriver-win64\chromedriver.exe")
driver = webdriver.Chrome(service=service, options=chrome_options)

start_url = "https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/calls-for-proposals?pageNumber=1&pageSize=50&sortBy=startDate&status=31094501,31094502"
driver.get(start_url)

with open("projets_EU_All.txt", "w", encoding="utf-8") as f:
    for page_num in range(1, 9):  # Pages 1 to 50
        try:
            WebDriverWait(driver, 60).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a.cft-call-list-table-title-link, a[href*='topic-details'], a[href*='competitive-calls-cs']"))
            )
        except Exception:
            print(f"❌ Aucun projet trouvé sur la page {page_num}.")
            break

        time.sleep(2)
        
        project_links = driver.find_elements(
            By.CSS_SELECTOR,
            "a.cft-call-list-table-title-link, a[href*='topic-details'], a[href*='competitive-calls-cs']"
        )

        print(f"📄 Page {page_num} : {len(project_links)} projets trouvés.")

        for link in project_links:
            titre = link.text.strip()
            href = link.get_attribute("href")

            try:
                container = link.find_element(By.XPATH, "./ancestor::eui-card")
            except:
                container = None

            # --- Main Status ---
            try:
                status_element = container.find_element(
                    By.CSS_SELECTOR, "span.eui-u-text-nowrap.eui-label"
                )
                status = status_element.text.strip()
            except:
                status = "Statut inconnu"

            # --- Dates and Additional Status ---
            opening_date = "Non spécifiée"
            next_deadline = "Non spécifiée"
            additional_status = "Non spécifié"

            try:
                result_card = container.find_element(
                    By.CSS_SELECTOR, "sedia-result-card-type.eui-u-display-block.eui-u-mt-xs.ng-star-inserted"
                )

                # Get all <strong> elements with date values
                strongs = result_card.find_elements(By.CSS_SELECTOR, "strong.ng-star-inserted")
                if len(strongs) >= 1:
                    opening_date = strongs[0].text.strip()
                if len(strongs) >= 2:
                    next_deadline = strongs[1].text.strip()

                # Get the last <span> that is not a "|" separator
                spans = result_card.find_elements(By.CSS_SELECTOR, "span.ng-star-inserted")
                for span in reversed(spans):
                    span_text = span.text.strip()
                    if span_text and span_text != "|":
                        additional_status = span_text
                        break

            except:
                pass

            # --- Output Results ---
            if titre and href and href.startswith("http"):
                f.write(
                    f"Titre: {titre}\n"
                    f"Lien: {href}\n"
                    f"Statut: {status}\n"
                    f"Date d'ouverture: {opening_date}\n"
                    f"Date limite: {next_deadline}\n"
                    
                )
                print(
                    f"✔ {titre} | {href} | Statut: {status} | "
                    f"📅 Ouverture: {opening_date} → Deadline: {next_deadline} | Extra: {additional_status}"
                )








        # Try clicking the next page icon
        try:
            next_icon = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, 'eui-icon-svg[aria-label="Go to next page"]'))
            )

            # Check if it's disabled
            is_disabled = next_icon.get_attribute("aria-disabled")
            if is_disabled == "true":
                print("✅ Fin de la pagination (bouton désactivé).")
                break

            # Scroll into view and click using JavaScript
            driver.execute_script("arguments[0].scrollIntoView(true);", next_icon)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", next_icon)

            print("➡️ Passage à la page suivante...")
            time.sleep(3)

            # Optional cache cleanup
            driver.delete_all_cookies()
            driver.execute_script("window.localStorage.clear();")
            driver.execute_script("window.sessionStorage.clear();")

        except Exception as e:
            print(f"⚠️ Erreur lors de la tentative de passage à la page suivante : {e}")
            break

driver.quit()


📄 Page 1 : 50 projets trouvés.
✔ MSCA Choose Europe for Science 2025 | https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-MSCA-2025-COFUND-02-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate | Statut: Forthcoming | 📅 Ouverture: 01 October 2025 → Deadline: 03 December 2025 | Extra: Single-stage
✔ New CO2 capture technologies | https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-24?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate | Statut: Forthcoming | 📅 Ouverture: 16 September 2025 → Deadline: 17 February 2026 | Extra: Single-stage
✔ Innovative pathways for low carbon and climate resilient building stock and built environment (Built4People Partnership) | https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-0

In [18]:
import pandas as pd

def parse_project_file(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        lines = [line.strip() for line in f if line.strip()]

    data = []
    for i in range(0, len(lines), 5):
        data.append({
            "Title": lines[i].split(":", 1)[1].strip(),
            "Link": lines[i+1].split(":", 1)[1].strip(),
            "Status": lines[i+2].split(":", 1)[1].strip(),
            "Start_date": lines[i+3].split(":", 1)[1].strip(),
            "Deadline": lines[i+4].split(":", 1)[1].strip()
        })

    return pd.DataFrame(data)

df = parse_project_file("projets_EU_All.txt")
df.head()


Unnamed: 0,Title,Link,Status,Start_date,Deadline
0,MSCA Choose Europe for Science 2025,https://ec.europa.eu/info/funding-tenders/oppo...,Forthcoming,01 October 2025,03 December 2025
1,New CO2 capture technologies,https://ec.europa.eu/info/funding-tenders/oppo...,Forthcoming,16 September 2025,17 February 2026
2,Innovative pathways for low carbon and climate...,https://ec.europa.eu/info/funding-tenders/oppo...,Forthcoming,16 September 2025,17 February 2026
3,Optimal combination of low embodied carbon con...,https://ec.europa.eu/info/funding-tenders/oppo...,Forthcoming,16 September 2025,17 February 2026
4,Understand and minimise the environmental impa...,https://ec.europa.eu/info/funding-tenders/oppo...,Forthcoming,16 September 2025,17 February 2026


In [19]:
pd.set_option('display.max_colwidth', None)
print(df['Link'])

0      https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-MSCA-2025-COFUND-02-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
1           https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-24?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
2           https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
3           https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-05?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
4           https://ec.europa.eu/info/funding-tenders/opportunities/port

In [14]:
df.head(15)

Unnamed: 0,Title,Link,Status,Start_date,Deadline
0,MSCA Choose Europe for Science 2025,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-MSCA-2025-COFUND-02-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,01 October 2025,03 December 2025
1,De-risking wave energy technology development through transnational pre-commercial procurement of wave energy research and development,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-13?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
2,On-site innovative robotic and automated solutions and techniques for more sustainable and less disruptive building renovation and construction,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
3,Extending the lifetime of crystalline silicon PV modules (EUPI-PV Partnership),"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-12?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
4,Innovative approaches for the deployment of Positive Energy Districts,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-04?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
5,Towards commercialisation of Perovskite PV and development of dedicated manufacturing equipment (EUPI-PV Partnership),"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-10?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
6,Next generation distribution substation for increasing the system resilience,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-18?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
7,New CO2 capture technologies,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-24?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
8,Innovative pathways for low carbon and climate resilient building stock and built environment (Built4People Partnership),"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026
9,Large-scale production of liquid advanced biofuels and renewable fuels of non-biological origin,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Forthcoming,16 September 2025,17 February 2026


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Title       374 non-null    object
 1   Link        374 non-null    object
 2   Status      374 non-null    object
 3   Start_date  374 non-null    object
 4   Deadline    374 non-null    object
dtypes: object(5)
memory usage: 14.7+ KB


In [16]:
df.shape

(374, 5)

### ODF DATASET 

In [20]:
import pandas as pd

# Données extraites
data = [
    {
        "Project Name": "FACTORIAT",
        "Description": "Support Deeptech & Hardware startups in prototyping and tech maturation with technical and financial help.",
        "Period": "2022–2023",
        "Axes / Thematic Areas": "Deeptech, Hardware, Incubation, Prototyping, Acceleration",
        "Region": "Tunisia",
        "Partners / Funders": "Industrial partners, ODF network",
        "Key Figures / Impact": "7 prototypes, 4 pre-industrial units"
    },
    {
        "Project Name": "National Entrepreneurship Program",
        "Description": "Design and implementation of Lesotho's national entrepreneurship ecosystem.",
        "Period": "2022–2025",
        "Axes / Thematic Areas": "Capacity building, Strategy, Startup Ecosystem",
        "Region": "Lesotho",
        "Partners / Funders": "Local government, ODF",
        "Key Figures / Impact": "500 entrepreneurs, 15 ESOs supported"
    },
    {
        "Project Name": "Arab Bank Strategy",
        "Description": "Develop funding & partnership strategy for African digital economy projects.",
        "Period": "N/A",
        "Axes / Thematic Areas": "Digital Economy, Strategic Development, Financing",
        "Region": "Sub-Saharan Africa",
        "Partners / Funders": "Arab Bank for Economic Development in Africa",
        "Key Figures / Impact": "Strategy developed"
    },
    {
        "Project Name": "World Bank Collaboration",
        "Description": "Develop startup ecosystem in 5 Southern African countries including Lesotho.",
        "Period": "2020–2022",
        "Axes / Thematic Areas": "Startup Ecosystem, Digital Economy, Entrepreneurship",
        "Region": "Southern Africa",
        "Partners / Funders": "World Bank",
        "Key Figures / Impact": "SA, Lesotho, Namibia, Botswana, Eswatini"
    },
    {
        "Project Name": "MDBAN – Business Angels Network",
        "Description": "Support early-stage Maghreb startups via diaspora angel investment.",
        "Period": "2021–Present",
        "Axes / Thematic Areas": "Startup Support, Investment, Diaspora Engagement",
        "Region": "MENA, Diaspora",
        "Partners / Funders": "MDBAN, ODF",
        "Key Figures / Impact": "56 startups financed, 33 angels"
    },
    {
        "Project Name": "BIATLABS",
        "Description": "Incubation program by BIAT Bank, run by ODF.",
        "Period": "2016–2018",
        "Axes / Thematic Areas": "Startup Incubation, Early-Stage Innovation",
        "Region": "Tunisia",
        "Partners / Funders": "BIAT (Private Bank)",
        "Key Figures / Impact": "4 cohorts, 45 startups, 15 labeled, 6 funded"
    },
    {
        "Project Name": "TECHNORIAT PPP Program",
        "Description": "Bridge research & entrepreneurship via incubation/acceleration of researchers.",
        "Period": "2021–2023",
        "Axes / Thematic Areas": "Scientific Research, Deeptech, Acceleration, Entrepreneurship",
        "Region": "Tunisia",
        "Partners / Funders": "TECHNORIAT, PPP",
        "Key Figures / Impact": "800 sensitized, 136 preselected, 13 incubated, 8 accelerated"
    },
    {
        "Project Name": "ABI – Applied Biotech & Innovation",
        "Description": "Turn biotech discoveries into marketable solutions with IP licensing model.",
        "Period": "N/A",
        "Axes / Thematic Areas": "Biotech, One Health, IP Licensing, Innovation",
        "Region": "N/A",
        "Partners / Funders": "ODF internal program",
        "Key Figures / Impact": "IP Model: License IN → Maturation → License OUT"
    }
]

# Création du DataFrame
odf_df = pd.DataFrame(data)
# Export en CSV
odf_df.to_csv("ODF_project_dataset.csv", index=False)


In [21]:
# Dictionnaire de correspondance EN => FR
all_keywords_text = {
    "strategic consulting": "conseil stratégique",
    "technical assistance": "assistance technique",
    "institutional support": "appui institutionnel",
    "tailored support": "accompagnement personnalisé",
    "innovation support": "accompagnement à l’innovation",
    "fundraising support": "accompagnement à la levée de fonds",
    "organizational development": "développement organisationnel",
    "capacity building": "développement de capacités",
    "program structuring": "structuration de programme",
    "project design": "ingénierie de projet",
    "project management": "gestion de projet",
    "program steering": "pilotage de programme",
    "growth strategy": "stratégie de croissance",
    "sector expertise": "expertise sectorielle",
    "project evaluation": "évaluation de projets",
    "due diligence": "due diligence",
    "project implementation": "mise en œuvre de projet",
    "roadmap development": "élaboration de feuille de route",
    "financial engineering": "ingénierie financière",
    "strategic diagnosis": "diagnostic stratégique",
    "monitoring and evaluation": "suivi-évaluation",
    "operational action plan": "plan d’action opérationnel",
    "public-private partnership": "partenariat public-privé",
    "ecosystem animation": "animation d’écosystème",
    "impact analysis": "analyse d’impact",
    "partnership facilitation": "facilitation de partenariats",
    "stakeholder mapping": "cartographie des acteurs",
    "knowledge transfer": "transfert de compétences",
    "collaborative innovation": "innovation collaborative",
    "open innovation": "open innovation",
    "economic development": "développement économique",
    "ecosystem development": "développement d’écosystèmes",
    "expansion strategy": "stratégie d’expansion",
    "development strategy": "stratégie de développement",
    "research valorization": "valorisation de la recherche",
    "technology transfer": "transfert de technologie",
    "technology maturation": "maturation technologique",
    "prototype industrialization": "industrialisation de prototypes",
    "technological co-development": "co-développement technologique",
    "intellectual property": "propriété intellectuelle",
    "research results": "résultats de recherche",
    "technology readiness level": "niveau de maturité technologique",
    "university spin-off": "spin-off universitaire",
    "research-industry collaboration": "collaboration recherche-industrie",
    "technology startup": "start-up technologique",
    "technology portfolio": "portefeuille technologique",
    "innovation commercialization": "commercialisation d’innovations",
    "proof of concept": "preuve de concept",
    "prototyping": "prototypage",
    "technology incubator": "incubateur technologique",
    "R&D support": "accompagnement à la R&D",
    "patent exploitation": "exploitation de brevets",
    "call for projects": "appel à projets",
    "call for proposals": "appel à propositions",
    "call for expression of interest": "appel à manifestation d’intérêt",
    "call for applications": "appel à candidatures",
    "call for consultants": "appel à consultants",
    "innovation grant": "subvention à l’innovation",
    "R&D project funding": "financement de projet R&D",
    "support program": "programme d’accompagnement",
    "support fund": "fonds de soutien",
    "innovation competition": "concours d’innovation",
    "incubation program": "programme d’incubation",
    "acceleration program": "programme d’accélération",
    "consulting assignment": "mission de conseil",
    "public procurement": "marché public",
    "strategic partnership": "partenariat stratégique",
    "service delivery": "prestation de services",
    "expert mission": "mission d’expertise",
    "deeptech": "deeptech",
    "biotechnology": "biotechnologie",
    "medtech": "medtech",
    "greentech": "greentech",
    "agritech": "agritech",
    "climatetech": "climatetech",
    "healthtech": "healthtech",
    "nanotechnology": "nanotechnologie",
    "robotics": "robotique",
    "artificial intelligence": "intelligence artificielle",
    "machine learning": "machine learning",
    "industry 4.0": "industrie 4.0",
    "advanced materials": "matériaux avancés",
    "clean technologies": "technologies propres",
    "renewable energy": "énergies renouvelables",
    "cybersecurity": "cybersécurité",
    "data science": "data science",
    "internet of things": "internet des objets",
    "sustainable development": "développement durable",
    "climate resilience": "résilience climatique",
    "economic inclusion": "inclusion économique",
    "empowerment through innovation": "autonomisation par l’innovation",
    "social innovation": "innovation sociale",
    "inclusive growth": "croissance inclusive",
    "social impact": "impact sociétal",
    "youth empowerment": "autonomisation des jeunes",
    "services digitalization": "digitalisation des services",
    "technological sovereignty": "souveraineté technologique",
    "territorial development": "développement territorial",
    "digital transformation": "transformation numérique",
    "researcher empowerment": "autonomisation des chercheurs",
    "diaspora integration": "intégration de la diaspora",
    "cross-sector collaboration": "collaboration intersectorielle",
    "industrial partnerships": "partenariats industriels",
    "market-driven innovation": "innovation orientée marché",
    "innovative SMEs": "PME innovantes",
    "industrial entrepreneurship": "entrepreneuriat industriel",
    "market access": "accès au marché",
    "value chain strengthening": "renforcement des chaînes de valeur",
    "economic impact": "impact économique",
    "economic empowerment": "autonomisation économique",
    "industrial competitiveness": "compétitivité industrielle",
    "applied research": "recherche appliquée",
    "skills development": "montée en compétences",
    "know-how transfer": "transfert de savoir-faire",
    "co-development": "co-développement",
    "technology leadership": "leadership technologique"
}


In [22]:
all_keywords_text

{'strategic consulting': 'conseil stratégique',
 'technical assistance': 'assistance technique',
 'institutional support': 'appui institutionnel',
 'tailored support': 'accompagnement personnalisé',
 'innovation support': 'accompagnement à l’innovation',
 'fundraising support': 'accompagnement à la levée de fonds',
 'organizational development': 'développement organisationnel',
 'capacity building': 'développement de capacités',
 'program structuring': 'structuration de programme',
 'project design': 'ingénierie de projet',
 'project management': 'gestion de projet',
 'program steering': 'pilotage de programme',
 'growth strategy': 'stratégie de croissance',
 'sector expertise': 'expertise sectorielle',
 'project evaluation': 'évaluation de projets',
 'due diligence': 'due diligence',
 'project implementation': 'mise en œuvre de projet',
 'roadmap development': 'élaboration de feuille de route',
 'financial engineering': 'ingénierie financière',
 'strategic diagnosis': 'diagnostic stra

### All the links

#### Detection de la pertinence par les mots clés sur tout les sites 

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd

# === Configuration globale ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")


def clean_text(text):
    lines = text.split("\n")
    cleaned_lines = [line.strip() for line in lines if line.strip() and len(line.strip()) > 1]
    return "\n".join(cleaned_lines)

def analyze_url(url):
    local_driver = webdriver.Chrome(options=options)
    try:
        local_driver.get(url)

        # Attendre le contenu au lieu d'utiliser sleep
        try:
            WebDriverWait(local_driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div.showMore--three-lines"))
            )
        except:
            pass

        # Scroll pour forcer le chargement
        for _ in range(5):
            local_driver.execute_script("window.scrollBy(0, 500);")
        
        # Récupération du texte
        js_code = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : 'Pas de description de projet détectée.';
        """
        full_text = local_driver.execute_script(js_code)
        cleaned = clean_text(full_text)
        matched_keywords = [kw for kw in all_keywords_text if kw.lower() in cleaned.lower()]

        return {
            "URL": url,
            "Pertinence": "Yes" if matched_keywords else "No",
            "Matching Word(s)": ", ".join(matched_keywords) if matched_keywords else ""
        }

    except Exception as e:
        return {
            "URL": url,
            "Pertinence": "Error",
            "Matching Word(s)": str(e)
        }

    finally:
        local_driver.quit()

# === Lancer en parallèle ===
def run_parallel(df_links, max_workers=5):
    url_list = df_links['Link'].dropna().unique().tolist()
    results = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {executor.submit(analyze_url, url): url for url in url_list}
        for future in as_completed(future_to_url):
            results.append(future.result())

    return pd.DataFrame(results)


results = []
for url in df['Link']:
    print(f"Traitement : {url}")
    result = analyze_url(url)
    results.append(result)

# === Conversion en DataFrame final ===
df_final = pd.DataFrame(results)
df_final


Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-MSCA-2025-COFUND-02-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-24?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-05?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
Traitement : https://ec.europa.eu/info/funding-tenders/opportun

KeyboardInterrupt: 

In [22]:
df_yes = df_final[df_final["Pertinence"] == "Yes"]

In [23]:
resultsYES_df_keywords = pd.DataFrame(df_yes)
resultsYES_df_keywords.to_excel("resultats_pertinenceYES_keywords.xlsx", index=False)
print("\n✅ Résultats sauvegardés dans ''.")


✅ Résultats sauvegardés dans ''.


In [8]:
import pandas as pd
df_yes=pd.read_excel("resultats_pertinenceYES_keywords.xlsx")

#### Ajouter les colonnes de status et les dates

In [16]:
df_yes[['Title', 'Status', 'Start_date', 'Deadline']] = df[['Title', 'Status', 'Start_date', 'Deadline']]


NameError: name 'df' is not defined

### Sortir seulement les projets qui sont Pertinents

In [None]:
df_yes_final = df_yes[df_yes['Pertinence'] == 'Yes'].copy()


In [None]:
df_yes_final

Unnamed: 0,URL,Pertinence,Matching Word(s),Title,Status,Start_date,Deadline
0,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-PATHFINDERCHALLENGES-01-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Yes,"biotechnology, artificial intelligence, climate resilience",Biotech for Climate Resilient Crops and Plant-Based Biomanufacturing,Forthcoming,24 July 2025,29 October 2025
1,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/LIFE-2025-CET-INDUSTRY?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Yes,"capacity building, renewable energy",Supporting the clean energy transition of European industry and businesses,Open For Submission,24 April 2025,23 September 2025
2,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/LIFE-2025-CET-PRIVAFIN?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"technical assistance, renewable energy",Crowding in private finance,Open For Submission,24 April 2025,23 September 2025
3,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-BESTUSE-08-NETWORKSICs?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"internet of things, digital transformation",Network of Safer Internet Centres (SICs),Open For Submission,15 April 2025,02 September 2025
4,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-AI-08-AGRIFOOD?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"capacity building, digital transformation",Multi-Country project in Agri-Food,Open For Submission,15 April 2025,02 September 2025
...,...,...,...,...,...,...,...
69,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-ACCELERATOR-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate",Yes,technology readiness level,EIC Accelerator 2025 - Short application,Open For Submission,29 October 2024,18 December 2025
70,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CREA-MEDIA-2025-FILMOVE?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate",Yes,call for proposals,Films on the Move,Open For Submission,01 October 2024,17 July 2025
71,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/JTM-2022-2025-PSLF-LOAN-SCHEMES?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=8&pageSize=50&sortBy=startDate",Yes,renewable energy,PSLF-LOAN SCHEMES,Open For Submission,19 July 2022,11 September 2025
72,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/JTM-2022-2025-PSLF-STANDALONE-PROJECTS?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=8&pageSize=50&sortBy=startDate",Yes,renewable energy,PSLF-PROJECTS,Open For Submission,19 July 2022,11 September 2025


### TEST AVEC LE MAX WORKER 

In [11]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIG SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === TEXT CLEANUP & LIMITING FUNCTION ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis URL ===
def extract_text_from_url(url):
    try:
        driver.get(url)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(5)

        js_code = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : 'Pas de description de projet détectée.';
        """
        text = driver.execute_script(js_code)

        # Try also extracting UKRI accordion content
        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        full_text = f"{text}\n\n---\n\n{accordion_text}".strip()
        return full_text if full_text else "Pas de contenu détecté."

    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']}, axe {p['Axes / Thematic Areas']}" 
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets spécifique extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?  
Merci de répondre de façon claire :  
- Pertinence : Oui / Non  
- Projets similaires détectés : [liste]  
- Résumé rapide expliquant ta réponse.
"""

# === Requête à Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )

        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()

    except Exception as e:
        return f"Erreur Groq: {e}"

# === Analyse du résultat Groq ===
def parse_result(text):
    pertinence = "Inconnu"
    resume = ""
    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()
        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass
    return pertinence, resume

# === Analyse d'une seule URL ===
def analyze_url(url, projects_list):
    print(f"🟡 Traitement : {url}")
    try:
        site_text_raw = extract_text_from_url(url)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_sample = projects_list[:3]  # Limiter à 3 projets
        prompt = build_prompt(projects_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)

        return {
            "URL": url,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": url,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }





# === MAIN ===
if __name__ == "__main__":
    from concurrent.futures import ThreadPoolExecutor, as_completed

    try:
        # Préparation des données
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']] \
            .dropna().to_dict(orient='records')
        url_list = df_yes['URL'].dropna().unique().tolist()

        # Exécution parallèle
        results = []
        max_workers = min(6, len(url_list))  # Ajustable selon ta machine
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_url = {
                executor.submit(analyze_url, url, projects_list): url for url in url_list
            }
            for future in tqdm(as_completed(future_to_url), total=len(future_to_url), desc="🔍 Analyse des URLs"):
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    print(f"❌ Erreur avec l'URL {future_to_url[future]} : {e}")
                    results.append({'URL': future_to_url[future], 'Pertinence LLM': None, 'Résumé LLM': None})

        # Création du DataFrame des résultats
        results_df = pd.DataFrame(results)

        # Fusion avec les résultats existants
        df_final_llm = df_yes.merge(results_df, on="URL", how="left")

        # Sauvegarde
        df_final_llm.to_csv("df_yes_avec_pertinence_et_resume.csv", index=False)
        print("\n✅ Résultats enrichis sauvegardés dans 'df_yes_avec_pertinence_et_resume.csv'.")
        print(df_final_llm[['URL', 'Pertinence LLM', 'Résumé LLM']].head())

    except NameError:
        print("❌ Les DataFrames 'odf_df' et 'df_yes' doivent être définis avant l'exécution.")
    finally:
        driver.quit()


Error sending stats to Plausible: error sending request for url (https://plausible.io/api/event)


🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D4-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-20?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opp

🔍 Analyse des URLs:   1%|          | 1/114 [00:39<1:13:45, 39.16s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:   2%|▏         | 2/114 [00:39<30:21, 16.26s/it]  

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-02-D3-07?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:   4%|▎         | 4/114 [00:39<10:02,  5.48s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D2-04?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D6-07?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:   4%|▍         | 5/114 [00:40<06:42,  3.70s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D6-08?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:   5%|▌         | 6/114 [00:41<04:56,  2.74s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D6-14?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:   6%|▌         | 7/114 [01:04<16:43,  9.37s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D6-09?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:   9%|▉         | 10/114 [01:05<06:06,  3.53s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D6-06?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D6-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D6-10?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  10%|▉         | 11/114 [01:05<04:39,  2.72s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL5-2026-01-D2-09?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  11%|█         | 12/114 [01:06<03:45,  2.22s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIE-2026-01-CONNECT-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  11%|█▏        | 13/114 [01:29<13:22,  7.94s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIE-2026-01-CONNECT-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  12%|█▏        | 14/114 [01:30<09:50,  5.91s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-PATHFINDERCHALLENGES-01-04?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  13%|█▎        | 15/114 [01:30<07:17,  4.42s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-PATHFINDERCHALLENGES-01-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  15%|█▍        | 17/114 [01:31<03:45,  2.33s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-PATHFINDERCHALLENGES-01-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-PATHFINDERCHALLENGES-01-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-IRIS2-2025-QCI-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  18%|█▊        | 20/114 [02:18<14:17,  9.12s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CERV-2025-EQUAL-DIVERSITYMGT?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CERV-2025-EQUAL-ANTISEMITISM?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  18%|█▊        | 21/114 [02:19<10:35,  6.84s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CERV-2025-EQUAL-LGBTIQ?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  19%|█▉        | 22/114 [02:19<07:50,  5.12s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CERV-2025-EQUAL-RACI-DISC?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CERV-2025-EQUAL-RESTRICTED?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate



🔍 Analyse des URLs:  21%|██        | 24/114 [02:26<06:31,  4.35s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/EUBA-EFSA-2025-IDATA-01-01-Lot1?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  23%|██▎       | 26/114 [02:57<11:49,  8.06s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/EUBA-EFSA-2025-IDATA-01-02-Lot2?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-JU-CHIPS-2025-SG-SSOI?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  24%|██▎       | 27/114 [02:57<08:38,  5.95s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EUROHPC-JU-2025-IHPCSS-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  25%|██▍       | 28/114 [02:58<06:15,  4.37s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CEF-DIG-2025-PLATFORMS-WORKS?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CERV-2025-CITIZENS-REM-HISTMIGRATION?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  26%|██▋       | 30/114 [02:58<03:30,  2.51s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/RFCS-2025-02-PDP?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  28%|██▊       | 32/114 [03:42<12:39,  9.27s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/RFCS-2025-02-RPJ?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/RFCS-2025-01-RPJ?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  29%|██▉       | 33/114 [03:42<09:13,  6.84s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/RFCS-2025-01-PDP?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/RFCS-2025-01-AM?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/RFCS-2025-02-AM?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  32%|███▏      | 36/114 [03:42<04:13,  3.25s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-SNS-2025-02-STREAM-D-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  34%|███▍      | 39/114 [03:56<04:14,  3.40s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-IHI-2025-11-02-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-IHI-2025-11-04-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-IHI-2025-11-03-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  35%|███▌      | 40/114 [03:57<03:34,  2.90s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-IHI-2025-11-01-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  37%|███▋      | 42/114 [04:54<13:55, 11.60s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-IHI-2025-11-05-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/EUBA-EFSA-2025-BIOHAW-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  38%|███▊      | 43/114 [05:18<17:27, 14.75s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-ECCC-2025-DEPLOY-CYBER-08-NCC?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-ECCC-2025-DEPLOY-CYBER-08-CYBERHEALTH?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=3&pageSize=50&sortBy=startDate



🔍 Analyse des URLs:  40%|████      | 46/114 [05:19<07:31,  6.64s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL3-2025-01-BM-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-EUROHPC-2026-COE-LH-01-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  41%|████      | 47/114 [06:07<19:04, 17.09s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-HUMAN-18?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  42%|████▏     | 48/114 [06:08<14:04, 12.79s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-DIGITAL-EMERGING-09?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  43%|████▎     | 49/114 [06:22<14:11, 13.09s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-HUMAN-17?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  44%|████▍     | 50/114 [06:32<13:01, 12.20s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-DIGITAL-EMERGING-04?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-MATERIALS-46?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-DATA-12?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-DATA-13?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate



🔍 Analyse des URLs:  47%|████▋     | 54/114 [06:33<04:49,  4.82s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-DATA-10?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  48%|████▊     | 55/114 [07:35<14:48, 15.07s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-EUROHPC-2026-COE-LH-01-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  49%|████▉     | 56/114 [07:36<11:48, 12.22s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-EUROHPC-2026-COE-LH-01-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  50%|█████     | 57/114 [07:44<10:50, 11.42s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-HUMAN-16?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-HUMAN-16?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-DATA-08?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  53%|█████▎    | 60/114 [07:45<05:15,  5.84s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-DATA-13?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  54%|█████▎    | 61/114 [08:25<11:01, 12.47s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-03-HUMAN-15?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-04-DIGITAL-EMERGING-05?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-04-DIGITAL-EMERGING-07?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate



🔍 Analyse des URLs:  58%|█████▊    | 66/114 [08:26<03:54,  4.89s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-04-DATA-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-04-DIGITAL-EMERGING-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-04-DIGITAL-EMERGING-04?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  61%|██████    | 69/114 [09:48<09:15, 12.34s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-04-HUMAN-08?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-04-DATA-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-JU-CHIPS-2025-CSA-DET?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  61%|██████▏   | 70/114 [09:49<07:20, 10.01s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/EURATOM-2026-SOFT-PRIZE?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-MSCA-2025-DN-01-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  63%|██████▎   | 72/114 [09:49<04:33,  6.52s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CERV-2025-CHAR-LITI?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  64%|██████▍   | 73/114 [10:52<12:20, 18.05s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-TWIN-TRANSITION-05?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  65%|██████▍   | 74/114 [10:53<09:22, 14.06s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-TWIN-TRANSITION-32?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-HUMAN-60?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/I3-2025-INV2a?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  68%|██████▊   | 77/114 [10:56<04:43,  7.66s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/I3-2025-INV1?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  68%|██████▊   | 78/114 [10:56<03:46,  6.28s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-05-TWIN-TRANSITION-11-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  69%|██████▉   | 79/114 [11:17<05:29,  9.42s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-05-MATERIALS-42-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  72%|███████▏  | 82/114 [11:22<02:42,  5.07s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-05-MATERIALS-51-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-05-MATERIALS-43-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-2025-05-TWIN-TRANSITION-35-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  73%|███████▎  | 83/114 [11:23<02:04,  4.01s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-TOOL-05?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  74%|███████▎  | 84/114 [11:59<05:53, 11.80s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-TOOL-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  75%|███████▍  | 85/114 [12:03<04:45,  9.83s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-IND-02?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  75%|███████▌  | 86/114 [12:12<04:29,  9.62s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-IND-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  76%|███████▋  | 87/114 [12:12<03:09,  7.03s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-DISEASE-06?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  77%|███████▋  | 88/114 [12:13<02:18,  5.31s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-DISEASE-04?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-CARE-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  79%|███████▉  | 90/114 [12:49<04:19, 10.82s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-03-DISEASE-02-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  80%|███████▉  | 91/114 [13:02<04:24, 11.51s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-01-DISEASE-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  82%|████████▏ | 93/114 [13:08<02:32,  7.28s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-03-ENVHLTH-02-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-HLTH-2025-03-IND-03-two-stage?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  82%|████████▏ | 94/114 [13:08<01:46,  5.33s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-SNS-2025-01-STREAM-C-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  84%|████████▍ | 96/114 [13:08<00:51,  2.85s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-MATERIALS-64?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-MATERIALS-52?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  85%|████████▌ | 97/114 [14:19<06:22, 22.52s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-HUMAN-66?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  88%|████████▊ | 100/114 [14:19<02:02,  8.72s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-TWIN-TRANSITION-37?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-HUMAN-65?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-MATERIALS-44?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  89%|████████▊ | 101/114 [14:20<01:26,  6.66s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-HUMAN-64?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  89%|████████▉ | 102/114 [14:23<01:10,  5.85s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-DIGITAL-61?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate


🔍 Analyse des URLs:  90%|█████████ | 103/114 [15:36<04:25, 24.10s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-HUMAN-63?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-TWIN-TRANSITION-31?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-MATERIALS-62?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate
🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-TWIN-TRANSITION-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy

🔍 Analyse des URLs:  95%|█████████▍| 108/114 [15:37<00:49,  8.27s/it]

🟡 Traitement : https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-CL4-INDUSTRY-2025-01-HUMAN-62?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate


🔍 Analyse des URLs: 100%|██████████| 114/114 [16:59<00:00,  8.94s/it]



✅ Résultats enrichis sauvegardés dans 'df_yes_avec_pertinence_et_resume.csv'.
                                                 URL Pertinence LLM  \
0  https://ec.europa.eu/info/funding-tenders/oppo...            Non   
1  https://ec.europa.eu/info/funding-tenders/oppo...            Non   
2  https://ec.europa.eu/info/funding-tenders/oppo...            Non   
3  https://ec.europa.eu/info/funding-tenders/oppo...            Non   
4  https://ec.europa.eu/info/funding-tenders/oppo...            Non   

                                          Résumé LLM  
0  L'appel à projet ne fournit aucune description...  
1  L'appel à projets ne contient aucune informati...  
2  L'appel à projets ne fournit pas de descriptio...  
3  L'appel à projet ne fournit aucune description...  
4  L'appel à projet ne fournit aucune description...  


In [12]:
df_final_llm

Unnamed: 0,URL,Pertinence,Matching Word(s),Pertinence LLM,Résumé LLM,Réponse brute
0,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"climate resilience, social innovation",Non,L'appel à projet ne fournit aucune description...,Pertinence : Non\n\n Projets similaires détect...
1,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,renewable energy,Non,L'appel à projets ne contient aucune informati...,Pertinence : Non\n\nProjets similaires détecté...
2,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"sustainable development, industrial competitiv...",Non,L'appel à projets ne fournit pas de descriptio...,Pertinence : Non\n\nProjets similaires détecté...
3,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"economic development, renewable energy, intern...",Non,L'appel à projet ne fournit aucune description...,Pertinence : Non\n\nProjets similaires détecté...
4,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,renewable energy,Non,L'appel à projet ne fournit aucune description...,Pertinence : Non\n\nProjets similaires détecté...
...,...,...,...,...,...,...
109,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,advanced materials,Oui,L'appel à projets vise à utiliser l'intelligen...,Pertinence : Oui\n\nProjets similaires détecté...
110,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"strategic partnership, social impact",Oui,La pertinence de cet appel à projets est élevé...,Pertinence : Oui\n\nProjets similaires détecté...
111,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"robotics, social innovation",Oui,Cette opportunité de projet est pertinente car...,Pertinence : Oui\n\nProjets similaires détecté...
112,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"robotics, artificial intelligence",Oui,L'appel à projet vise à valoriser les résultat...,"Après analyse, voici ma réponse :\n\nPertinenc..."


In [13]:
df_final_llm.to_excel("df_LLM_ALL_EU.xlsx", index=False)
    

In [32]:

df_final_llm

Unnamed: 0,URL,Pertinence,Matching Word(s),Title,Status,Start_date,Deadline,Pertinence LLM,Résumé LLM,Réponse brute
0,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-PATHFINDERCHALLENGES-01-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Yes,"biotechnology, artificial intelligence, climate resilience",Biotech for Climate Resilient Crops and Plant-Based Biomanufacturing,Forthcoming,24 July 2025,29 October 2025,Non,"L'appel à projets ne fournit pas de description du projet, ce qui ne permet pas de déterminer sa pertinence avec les projets déjà réalisés par l'entreprise. Il est donc impossible de juger si cette opportunité est pertinente sans plus d'informations sur le projet.","Pertinence : Non\n\nProjets similaires détectés : Aucun\n\nRésumé rapide : L'appel à projets ne fournit pas de description du projet, ce qui ne permet pas de déterminer sa pertinence avec les projets déjà réalisés par l'entreprise. Il est donc impossible de juger si cette opportunité est pertinente sans plus d'informations sur le projet."
1,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/LIFE-2025-CET-INDUSTRY?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=1&pageSize=50&sortBy=startDate",Yes,"capacity building, renewable energy",Supporting the clean energy transition of European industry and businesses,Open For Submission,24 April 2025,23 September 2025,Non,"L'appel à projets est spécifiquement centré sur la transition énergétique et la réduction des émissions de gaz à effet de serre dans l'industrie, avec un focus sur la collaboration entre les secteurs industriels et les fournisseurs de technologies pour déployer des solutions d'énergie propre. Les projets précédents de l'entreprise ne montrent pas de liens directs avec ce domaine spécifique, mais plutôt une expertise dans les domaines de l'incubation, de l'accélération, de la stratégie et du développement économique numérique.","- Pertinence : Non \n- Projets similaires détectés : Aucun \n- Résumé rapide : L'appel à projets est spécifiquement centré sur la transition énergétique et la réduction des émissions de gaz à effet de serre dans l'industrie, avec un focus sur la collaboration entre les secteurs industriels et les fournisseurs de technologies pour déployer des solutions d'énergie propre. Les projets précédents de l'entreprise ne montrent pas de liens directs avec ce domaine spécifique, mais plutôt une expertise dans les domaines de l'incubation, de l'accélération, de la stratégie et du développement économique numérique."
2,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/LIFE-2025-CET-PRIVAFIN?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"technical assistance, renewable energy",Crowding in private finance,Open For Submission,24 April 2025,23 September 2025,Oui,"L'appel à projets concerne la mise en place de mécanismes de financement innovants pour les énergies renouvelables et l'efficacité énergétique, ce qui est très proche de l'axe ""Digital Economy, Strategic Development, Financing"" du projet Arab Bank Strategy. Bien que ce dernier projet ne soit pas directement lié à l'énergie, il concerne également la mise en place de stratégies de financement pour des projets économiques. L'expertise de l'entreprise dans ce domaine peut être très pertinente pour répondre à cet appel à projets.","Pertinence : Oui\n\nProjets similaires détectés : Arab Bank Strategy\n\n Résumé rapide : L'appel à projets concerne la mise en place de mécanismes de financement innovants pour les énergies renouvelables et l'efficacité énergétique, ce qui est très proche de l'axe ""Digital Economy, Strategic Development, Financing"" du projet Arab Bank Strategy. Bien que ce dernier projet ne soit pas directement lié à l'énergie, il concerne également la mise en place de stratégies de financement pour des projets économiques. L'expertise de l'entreprise dans ce domaine peut être très pertinente pour répondre à cet appel à projets."
3,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-BESTUSE-08-NETWORKSICs?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"internet of things, digital transformation",Network of Safer Internet Centres (SICs),Open For Submission,15 April 2025,02 September 2025,Non,"L'appel à projets concerne la création d'un centre pour une internet plus sûre, notamment pour les enfants, avec des activités de sensibilisation, d'assistance et de lutte contre les contenus illicites. Les projets de l'entreprise précédemment réalisés ne montrent pas de lien direct avec ce type de sujet, les axes principaux étant plutôt liés à l'entrepreunariat, la stratégie, l'incubation, la prototypage, le financement, etc. Il n'y a donc pas de projet similaire détecté.","Après analyse de l'appel à projets et des projets de l'entreprise, je réponds :\n\n- Pertinence : Non\n- Projets similaires détectés : Aucun\n- Résumé rapide : L'appel à projets concerne la création d'un centre pour une internet plus sûre, notamment pour les enfants, avec des activités de sensibilisation, d'assistance et de lutte contre les contenus illicites. Les projets de l'entreprise précédemment réalisés ne montrent pas de lien direct avec ce type de sujet, les axes principaux étant plutôt liés à l'entrepreunariat, la stratégie, l'incubation, la prototypage, le financement, etc. Il n'y a donc pas de projet similaire détecté."
4,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-AI-08-AGRIFOOD?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"capacity building, digital transformation",Multi-Country project in Agri-Food,Open For Submission,15 April 2025,02 September 2025,Non,"L'appel à projets concerne principalement le développement d'une infrastructure numérique pour le secteur agroalimentaire à l'échelle européenne, avec un focus sur la collaboration et l'échange de données. Les projets déjà réalisés par l'entreprise se concentrent plutôt sur l'incubation, le prototypage, la stratégie et le financement de startups et d'écosystèmes d'entrepreneuriat, sans lien direct avec le secteur agroalimentaire ou les infrastructures numériques.","Après analyse de l'appel à projets et des projets déjà réalisés par l'entreprise, voici ma réponse :\n\n- Pertinence : Non\n- Projets similaires détectés : Aucun\n- Résumé rapide : L'appel à projets concerne principalement le développement d'une infrastructure numérique pour le secteur agroalimentaire à l'échelle européenne, avec un focus sur la collaboration et l'échange de données. Les projets déjà réalisés par l'entreprise se concentrent plutôt sur l'incubation, le prototypage, la stratégie et le financement de startups et d'écosystèmes d'entrepreneuriat, sans lien direct avec le secteur agroalimentaire ou les infrastructures numériques."
...,...,...,...,...,...,...,...,...,...,...
69,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-ACCELERATOR-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate",Yes,technology readiness level,EIC Accelerator 2025 - Short application,Open For Submission,29 October 2024,18 December 2025,Oui,"La pertinence de cette opportunité est évidente car elle s'inscrit dans les axes de l'entreprise, en particulier avec le projet FACTORIAT qui supporte les startups Deeptech et Hardware en les aidant à prototyper et à matureurer technologiquement. Le scopes de l'EIC Accelerator, qui vise à soutenir les innovations à fort impact avec un potentiel de création de nouveaux marchés ou de disruption de marchés existants, est en phase avec les objectifs de FACTORIAT. De plus, l'EIC Accelerator met l'accent sur les innovations deeptech et le patient capital, ce qui correspond aux axes de l'entreprise.","Pertinence : Oui\n\nProjets similaires détectés : FACTORIAT\n\nRésumé rapide : La pertinence de cette opportunité est évidente car elle s'inscrit dans les axes de l'entreprise, en particulier avec le projet FACTORIAT qui supporte les startups Deeptech et Hardware en les aidant à prototyper et à matureurer technologiquement. Le scopes de l'EIC Accelerator, qui vise à soutenir les innovations à fort impact avec un potentiel de création de nouveaux marchés ou de disruption de marchés existants, est en phase avec les objectifs de FACTORIAT. De plus, l'EIC Accelerator met l'accent sur les innovations deeptech et le patient capital, ce qui correspond aux axes de l'entreprise."
70,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/CREA-MEDIA-2025-FILMOVE?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=7&pageSize=50&sortBy=startDate",Yes,call for proposals,Films on the Move,Open For Submission,01 October 2024,17 July 2025,Non,"L'appel à projets concerne la distribution de films européens, ce qui est très différent des domaines d'expertise de l'entreprise, qui ont trait à la Deeptech, l'incubation, la stratégie, la capacité et le financement d'entreprises et de start-ups. Les objectifs et les activités décrits dans l'appel à projets ne correspondent pas aux axes principaux des projets déjà réalisés par l'entreprise.","Pertinence : Non\n\nProjets similaires détectés : Aucun\n\nRésumé rapide : L'appel à projets concerne la distribution de films européens, ce qui est très différent des domaines d'expertise de l'entreprise, qui ont trait à la Deeptech, l'incubation, la stratégie, la capacité et le financement d'entreprises et de start-ups. Les objectifs et les activités décrits dans l'appel à projets ne correspondent pas aux axes principaux des projets déjà réalisés par l'entreprise."
71,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/JTM-2022-2025-PSLF-LOAN-SCHEMES?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=8&pageSize=50&sortBy=startDate",Yes,renewable energy,PSLF-LOAN SCHEMES,Open For Submission,19 July 2022,11 September 2025,Non,"Les projets déjà réalisés par l'entreprise se concentrent sur l'incubation, l'accélération et le développement de startups, ainsi que sur la mise en place de stratégies pour l'économie digitale et les écosystèmes d'entrepreneuriat. L'appel à projet européen ciblé Seems to focus on funding sustainable infrastructure projects and investments in the context of the EU's climate and energy transition goals. Les deux thématiques sont très différentes, et il n'y a pas de liens évidents entre les projets de l'entreprise et cet appel à projet.","Pertinence : Non\n\nProjets similaires détectés : Aucun\n\nRésumé rapide : Les projets déjà réalisés par l'entreprise se concentrent sur l'incubation, l'accélération et le développement de startups, ainsi que sur la mise en place de stratégies pour l'économie digitale et les écosystèmes d'entrepreneuriat. L'appel à projet européen ciblé Seems to focus on funding sustainable infrastructure projects and investments in the context of the EU's climate and energy transition goals. Les deux thématiques sont très différentes, et il n'y a pas de liens évidents entre les projets de l'entreprise et cet appel à projet."
72,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/JTM-2022-2025-PSLF-STANDALONE-PROJECTS?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=8&pageSize=50&sortBy=startDate",Yes,renewable energy,PSLF-PROJECTS,Open For Submission,19 July 2022,11 September 2025,Non,"Les projets déjà réalisés par l'entreprise sont principalement liés à l'incubation, l'accélération, la stratégie et le financement de startups et d'écosystèmes d'entreprises, tandis que l'appel à projet européen est centré sur des investissements durables et des projets de transition énergétique et environnementale. Il n'y a pas de similitude évidente entre les deux.","Pertinence : Non\n\nProjets similaires détectés : Aucun\n\nRésumé rapide : Les projets déjà réalisés par l'entreprise sont principalement liés à l'incubation, l'accélération, la stratégie et le financement de startups et d'écosystèmes d'entreprises, tandis que l'appel à projet européen est centré sur des investissements durables et des projets de transition énergétique et environnementale. Il n'y a pas de similitude évidente entre les deux.\n\nCependant, il est possible que l'entreprise puisse adapter ses compétences en stratégie, financement et capacité d'accélérer les startups pour répondre à cet appel à projet en collaborant avec d'autres acteurs ayant une expertise dans les domaines de la transition énergétique et environnementale."


In [3]:
df_final_llm.shape

NameError: name 'df_final_llm' is not defined

In [14]:
df_yes_final = df_final_llm[df_final_llm['Pertinence LLM'] == 'Oui'].copy()


In [38]:
df_yes_final.to_excel("df_LLM_YES_EU.xlsx", index=False)

In [15]:
df_yes_final

Unnamed: 0,URL,Pertinence,Matching Word(s),Pertinence LLM,Résumé LLM,Réponse brute
12,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"artificial intelligence, renewable energy",Oui,Cette opportunité est pertinente car elle vise...,- Pertinence : Oui\n- Projets similaires détec...
13,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,proof of concept,Oui,La pertinence est établie en raison de la prox...,- Pertinence : Oui\n- Projets similaires détec...
14,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"artificial intelligence, machine learning",Oui,**,**Pertinence : Oui**\n\n**Projets similaires d...
15,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"artificial intelligence, social impact",Oui,La pertinence est établie car l'appel à projet...,- Pertinence : Oui\n- Projets similaires détec...
16,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,monitoring and evaluation,Oui,L'appel à projets concerne le développement d'...,- Pertinence : Oui\n- Projets similaires détec...
17,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"cybersecurity, digital transformation",Oui,L'appel àprojets seem to align with the enterp...,Pertinence : Oui\n\nProjets similaires détecté...
19,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"technology readiness level, biotechnology, ren...",Oui,L'appel à projets concerne la maturation techn...,Pertinence : Oui\n\nProjets similaires détecté...
20,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,robotics,Oui,L'appel à projets européen concerne le dévelop...,Pertinence : Oui\n\nProjets similaires détecté...
22,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"biotechnology, artificial intelligence, climat...",Oui,Ce projet européen concerne le développement e...,Pertinence : Oui\n\nProjets similaires détecté...
23,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"artificial intelligence, cybersecurity",Oui,Ce projet européen de développement de composa...,"Bonjour !\n\nAnalyse faite, voici ma réponse :..."


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIGURATION SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === Nettoyage & Limitation texte ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis une URL ===
def extract_text_from_URL(URL):
    try:
        driver.get(URL)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(3)

        js_desc = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : '';
        """
        description_text = driver.execute_script(js_desc)

        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        final_text = f"{description_text}\n\n---\n\n{accordion_text}".strip()
        return final_text if final_text.strip() else "Pas de contenu détecté."
    
    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Construction du prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']} (axe {p['Axes / Thematic Areas']})"
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?
Merci de répondre clairement :
- Pertinence : Oui / Non
- Projets similaires détectés : [liste]
- Résumé rapide expliquant ta réponse.
"""

# === Requête Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )
        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Erreur Groq: {e}"

# === Parsing du résultat Groq ===
def parse_result(text):
    pertinence = "Non"
    resume = ""

    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()

        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass

    return pertinence, resume

# === Analyse d'une URL ===
def analyze_URL(URL, projects_list):
    print(f"🟡 Analyse en cours : {URL}")
    try:
        site_text_raw = extract_text_from_URL(URL)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_list_sample = projects_list[:3]  # 3 projets max

        prompt = build_prompt(projects_list_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)
        
        return {
            "URL": URL,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": URL,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }

# === MAIN EXECUTION ===
if __name__ == "__main__":
    try:
        df_pertinence_uk = pd.read_excel("projets_pertinents_keywords_InnUK_from_all_description.xlsx")
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']].dropna().to_dict(orient='records')
        URL_list = df_pertinence_uk['URL'].dropna().unique()

        results = []
        for URL in tqdm(URL_list, desc="🔍 Analyse des URLs"):
            results.append(analyze_URL(URL, projects_list))

        results_df = pd.DataFrame(results)
        df_final_llm_uk = df_pertinence_uk.merge(results_df, on="URL", how="left")

        df_final_llm_uk.to_excel("df_yes_avec_pertinence_et_resume_uk.xlsx", index=False)
        print("\n✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.")
        
    finally:
        driver.quit()


🔍 Analyse des URLs:   0%|          | 0/90 [00:00<?, ?it/s]

🟡 Analyse en cours : https://www.ukri.org/opportunity/smart-data-research-uk-fellowships/


🔍 Analyse des URLs:   1%|          | 1/90 [00:31<47:08, 31.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/institutional-diversity-fund-invite-only/


🔍 Analyse des URLs:   2%|▏         | 2/90 [00:56<40:14, 27.44s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/funding-for-early-stage-development-of-new-healthcare-interventions/


🔍 Analyse des URLs:   3%|▎         | 3/90 [01:19<37:18, 25.73s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/digitise-uk-natural-science-collections/


🔍 Analyse des URLs:   4%|▍         | 4/90 [01:44<36:09, 25.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/aviations-non-co2-impacts-on-the-climate-programme-coordinator/


🔍 Analyse des URLs:   6%|▌         | 5/90 [02:08<35:06, 24.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-and-partnership-hubs-for-a-healthy-society/


🔍 Analyse des URLs:   7%|▋         | 6/90 [02:32<34:18, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/stfc-leadership-fellowships-in-public-engagement/


🔍 Analyse des URLs:   8%|▊         | 7/90 [02:55<33:27, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/computing-resources-on-the-stfc-dirac-hpc-facility-rac18/


🔍 Analyse des URLs:   9%|▉         | 8/90 [03:19<32:48, 24.00s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine-invited-stage-two-application/


🔍 Analyse des URLs:  10%|█         | 9/90 [03:43<32:16, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/us-uk-and-germany-uk-collaborative-research-seed-funding-in-semiconductor-security/


🔍 Analyse des URLs:  11%|█         | 10/90 [04:06<31:47, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/biological-influence-on-ocean-carbon-novel-modelling-approaches/


🔍 Analyse des URLs:  12%|█▏        | 11/90 [04:31<31:50, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-gambling-harms-research-coordination-centre-ghrcc/


🔍 Analyse des URLs:  13%|█▎        | 12/90 [04:55<31:20, 24.11s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/gambling-harms-research-and-innovation-partnerships/


🔍 Analyse des URLs:  14%|█▍        | 13/90 [05:20<31:14, 24.34s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/multiple-model-assessment-of-biological-influence-on-ocean-carbon/


🔍 Analyse des URLs:  16%|█▌        | 14/90 [05:46<31:18, 24.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/transdisciplinary-research-to-tackle-antimicrobial-resistance-full-application/


🔍 Analyse des URLs:  17%|█▋        | 15/90 [06:10<30:42, 24.57s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/canada-uk-networkplus-in-semiconductor-research/


🔍 Analyse des URLs:  18%|█▊        | 16/90 [06:36<30:42, 24.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/uk-international-ocean-drilling-programme-moratorium-awards/


🔍 Analyse des URLs:  19%|█▉        | 17/90 [07:01<30:38, 25.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/health-technologies-connectivity-awards-round-two/


🔍 Analyse des URLs:  20%|██        | 18/90 [07:25<29:38, 24.70s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/clinician-scientist-fellowship/


🔍 Analyse des URLs:  21%|██        | 19/90 [07:50<29:11, 24.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pre-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  22%|██▏       | 20/90 [08:15<28:52, 24.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/post-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  23%|██▎       | 21/90 [08:40<28:45, 25.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/engineer-next-generation-veterinary-vaccine-technology-platforms/


🔍 Analyse des URLs:  24%|██▍       | 22/90 [09:05<28:10, 24.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-for-engineering-and-physical-sciences-june-2025/


🔍 Analyse des URLs:  26%|██▌       | 23/90 [09:28<27:24, 24.54s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ernest-rutherford-fellowship-2025/


🔍 Analyse des URLs:  27%|██▋       | 24/90 [09:52<26:45, 24.32s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-to-advance-uk-recycling-capabilities-full-stage/


🔍 Analyse des URLs:  28%|██▊       | 25/90 [10:16<26:10, 24.16s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/sandpit-moving-respiratory-health-diagnosis-into-the-21st-century-invite-only/


🔍 Analyse des URLs:  29%|██▉       | 26/90 [10:39<25:31, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mrc-centre-of-research-excellence-round-three/


🔍 Analyse des URLs:  30%|███       | 27/90 [11:06<26:02, 24.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/responsive-mode-partnership/


🔍 Analyse des URLs:  31%|███       | 28/90 [11:30<25:19, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-programme/


🔍 Analyse des URLs:  32%|███▏      | 29/90 [11:54<24:37, 24.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-programme/


🔍 Analyse des URLs:  33%|███▎      | 30/90 [12:17<24:04, 24.08s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-programme/


🔍 Analyse des URLs:  34%|███▍      | 31/90 [12:41<23:36, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-programme/


🔍 Analyse des URLs:  36%|███▌      | 32/90 [13:05<23:02, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-new-investigator/


🔍 Analyse des URLs:  37%|███▋      | 33/90 [13:28<22:36, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-new-investigator/


🔍 Analyse des URLs:  38%|███▊      | 34/90 [13:52<22:11, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-new-investigator/


🔍 Analyse des URLs:  39%|███▉      | 35/90 [14:16<21:47, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-new-investigator/


🔍 Analyse des URLs:  40%|████      | 36/90 [14:39<21:19, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-research/


🔍 Analyse des URLs:  41%|████      | 37/90 [15:03<20:55, 23.68s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-research/


🔍 Analyse des URLs:  42%|████▏     | 38/90 [15:27<20:32, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-research/


🔍 Analyse des URLs:  43%|████▎     | 39/90 [15:50<20:05, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-research/


🔍 Analyse des URLs:  44%|████▍     | 40/90 [16:14<19:42, 23.66s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/dare-uk-real-world-research-exemplar-programme/


🔍 Analyse des URLs:  46%|████▌     | 41/90 [16:38<19:19, 23.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-new-investigator-award-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  47%|████▋     | 42/90 [17:01<18:54, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-standard-research-grant-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  48%|████▊     | 43/90 [17:25<18:31, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/critical-mass-programmes-to-drive-a-sustainable-future-invitation-only/


🔍 Analyse des URLs:  49%|████▉     | 44/90 [17:49<18:09, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine/


🔍 Analyse des URLs:  50%|█████     | 45/90 [18:12<17:46, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/career-development-award/


🔍 Analyse des URLs:  51%|█████     | 46/90 [18:36<17:24, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-policy-fellowships-2025/


🔍 Analyse des URLs:  52%|█████▏    | 47/90 [19:00<17:03, 23.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-research-invited-stage-two/


🔍 Analyse des URLs:  53%|█████▎    | 48/90 [19:24<16:42, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/2025-to-2026-strategic-longer-and-larger-slola-grants/


🔍 Analyse des URLs:  54%|█████▍    | 49/90 [19:48<16:18, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-partnership-invited-stage-two/


🔍 Analyse des URLs:  56%|█████▌    | 50/90 [20:12<15:58, 23.95s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-norway/


🔍 Analyse des URLs:  57%|█████▋    | 51/90 [20:36<15:34, 23.96s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborative-doctoral-landscape-award-in-the-arts-and-humanities/


🔍 Analyse des URLs:  58%|█████▊    | 52/90 [21:00<15:08, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/patt-travel-grants-for-competitively-awarded-observation-time-2025/


🔍 Analyse des URLs:  59%|█████▉    | 53/90 [21:24<14:44, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/developmental-pathway-funding-scheme/


🔍 Analyse des URLs:  60%|██████    | 54/90 [21:48<14:17, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/statements-of-need-in-research-infrastructure/


🔍 Analyse des URLs:  61%|██████    | 55/90 [22:12<13:55, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-full-proposal/


🔍 Analyse des URLs:  62%|██████▏   | 56/90 [22:35<13:31, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-postdoctoral-fellowship/


🔍 Analyse des URLs:  63%|██████▎   | 57/90 [23:00<13:15, 24.12s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/opening-up-the-environment-2026/


🔍 Analyse des URLs:  64%|██████▍   | 58/90 [23:24<12:45, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/small-molecule-high-throughput-screen-using-astrazeneca-facilities/


🔍 Analyse des URLs:  66%|██████▌   | 59/90 [23:48<12:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-curiosity-award/


🔍 Analyse des URLs:  67%|██████▋   | 60/90 [24:11<11:56, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-catalyst-awards/


🔍 Analyse des URLs:  68%|██████▊   | 61/90 [24:35<11:29, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pushing-the-frontiers-of-environmental-research-july-2025/


🔍 Analyse des URLs:  69%|██████▉   | 62/90 [24:58<11:03, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-open-and-open-plus-fellowship/


🔍 Analyse des URLs:  70%|███████   | 63/90 [25:24<10:52, 24.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-ukri-nsf-sbe-lead-agency/


🔍 Analyse des URLs:  71%|███████   | 64/90 [25:48<10:30, 24.25s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-working-with-brazilian-researchers/


🔍 Analyse des URLs:  72%|███████▏  | 65/90 [26:12<10:02, 24.10s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  73%|███████▎  | 66/90 [26:36<09:35, 23.97s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-standard-research-grant/


🔍 Analyse des URLs:  74%|███████▍  | 67/90 [26:59<09:08, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-outline-stage/


🔍 Analyse des URLs:  76%|███████▌  | 68/90 [27:23<08:44, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-international-travel-award-scheme/


🔍 Analyse des URLs:  77%|███████▋  | 69/90 [27:47<08:24, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/projects-peer-review-panel-pprp-2025/


🔍 Analyse des URLs:  78%|███████▊  | 70/90 [28:12<08:01, 24.07s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/nerc-urgency-funding-open/


🔍 Analyse des URLs:  79%|███████▉  | 71/90 [28:35<07:35, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-outlines/


🔍 Analyse des URLs:  80%|████████  | 72/90 [28:59<07:09, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-network-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  81%|████████  | 73/90 [29:23<06:46, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-working-with-overseas-scientists-nov-2023-responsive-mode/


🔍 Analyse des URLs:  82%|████████▏ | 74/90 [29:47<06:21, 23.83s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-new-investigator-award-nov-2023-responsive-mode/


🔍 Analyse des URLs:  83%|████████▎ | 75/90 [30:10<05:56, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-discipline-hopping-in-ict-nov-2023-responsive-mode/


🔍 Analyse des URLs:  84%|████████▍ | 76/90 [30:34<05:32, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-overseas-travel-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  86%|████████▌ | 77/90 [30:58<05:09, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-standard-research-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  87%|████████▋ | 78/90 [31:22<04:46, 23.89s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-research-grants-round-two/


🔍 Analyse des URLs:  88%|████████▊ | 79/90 [31:46<04:23, 23.94s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-new-investigator-grants-round-two/


🔍 Analyse des URLs:  89%|████████▉ | 80/90 [32:10<03:59, 23.91s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-luxembourg-researchers-round-two/


🔍 Analyse des URLs:  90%|█████████ | 81/90 [32:33<03:34, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-secondary-data-analysis-round-two/


🔍 Analyse des URLs:  91%|█████████ | 82/90 [32:57<03:10, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-ukri-sbe-lead-agency-opportunity-round-two/


🔍 Analyse des URLs:  92%|█████████▏| 83/90 [33:21<02:46, 23.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-brazilian-researchers-round-two/


🔍 Analyse des URLs:  93%|█████████▎| 84/90 [33:45<02:22, 23.76s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  94%|█████████▍| 85/90 [34:09<01:59, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/work-with-brazilian-researchers-nerc-fapesp-lead-agency/


🔍 Analyse des URLs:  96%|█████████▌| 86/90 [34:32<01:35, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-brazil/


🔍 Analyse des URLs:  97%|█████████▋| 87/90 [34:56<01:11, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-sbe-lead-agency-opportunity/


🔍 Analyse des URLs:  98%|█████████▊| 88/90 [35:20<00:47, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-england-development-fund/


🔍 Analyse des URLs:  99%|█████████▉| 89/90 [35:44<00:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/daphne-jackson-fellowship/


🔍 Analyse des URLs: 100%|██████████| 90/90 [36:08<00:00, 24.09s/it]



✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIGURATION SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === Nettoyage & Limitation texte ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis une URL ===
def extract_text_from_URL(URL):
    try:
        driver.get(URL)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(3)

        js_desc = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : '';
        """
        description_text = driver.execute_script(js_desc)

        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        final_text = f"{description_text}\n\n---\n\n{accordion_text}".strip()
        return final_text if final_text.strip() else "Pas de contenu détecté."
    
    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Construction du prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']} (axe {p['Axes / Thematic Areas']})"
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?
Merci de répondre clairement :
- Pertinence : Oui / Non
- Projets similaires détectés : [liste]
- Résumé rapide expliquant ta réponse.
"""

# === Requête Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )
        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Erreur Groq: {e}"

# === Parsing du résultat Groq ===
def parse_result(text):
    pertinence = "Non"
    resume = ""

    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()

        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass

    return pertinence, resume

# === Analyse d'une URL ===
def analyze_URL(URL, projects_list):
    print(f"🟡 Analyse en cours : {URL}")
    try:
        site_text_raw = extract_text_from_URL(URL)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_list_sample = projects_list[:3]  # 3 projets max

        prompt = build_prompt(projects_list_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)
        
        return {
            "URL": URL,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": URL,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }

# === MAIN EXECUTION ===
if __name__ == "__main__":
    try:
        df_pertinence_uk = pd.read_excel("projets_pertinents_keywords_InnUK_from_all_description.xlsx")
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']].dropna().to_dict(orient='records')
        URL_list = df_pertinence_uk['URL'].dropna().unique()

        results = []
        for URL in tqdm(URL_list, desc="🔍 Analyse des URLs"):
            results.append(analyze_URL(URL, projects_list))

        results_df = pd.DataFrame(results)
        df_final_llm_uk = df_pertinence_uk.merge(results_df, on="URL", how="left")

        df_final_llm_uk.to_excel("df_yes_avec_pertinence_et_resume_uk.xlsx", index=False)
        print("\n✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.")
        
    finally:
        driver.quit()


🔍 Analyse des URLs:   0%|          | 0/90 [00:00<?, ?it/s]

🟡 Analyse en cours : https://www.ukri.org/opportunity/smart-data-research-uk-fellowships/


🔍 Analyse des URLs:   1%|          | 1/90 [00:31<47:08, 31.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/institutional-diversity-fund-invite-only/


🔍 Analyse des URLs:   2%|▏         | 2/90 [00:56<40:14, 27.44s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/funding-for-early-stage-development-of-new-healthcare-interventions/


🔍 Analyse des URLs:   3%|▎         | 3/90 [01:19<37:18, 25.73s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/digitise-uk-natural-science-collections/


🔍 Analyse des URLs:   4%|▍         | 4/90 [01:44<36:09, 25.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/aviations-non-co2-impacts-on-the-climate-programme-coordinator/


🔍 Analyse des URLs:   6%|▌         | 5/90 [02:08<35:06, 24.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-and-partnership-hubs-for-a-healthy-society/


🔍 Analyse des URLs:   7%|▋         | 6/90 [02:32<34:18, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/stfc-leadership-fellowships-in-public-engagement/


🔍 Analyse des URLs:   8%|▊         | 7/90 [02:55<33:27, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/computing-resources-on-the-stfc-dirac-hpc-facility-rac18/


🔍 Analyse des URLs:   9%|▉         | 8/90 [03:19<32:48, 24.00s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine-invited-stage-two-application/


🔍 Analyse des URLs:  10%|█         | 9/90 [03:43<32:16, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/us-uk-and-germany-uk-collaborative-research-seed-funding-in-semiconductor-security/


🔍 Analyse des URLs:  11%|█         | 10/90 [04:06<31:47, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/biological-influence-on-ocean-carbon-novel-modelling-approaches/


🔍 Analyse des URLs:  12%|█▏        | 11/90 [04:31<31:50, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-gambling-harms-research-coordination-centre-ghrcc/


🔍 Analyse des URLs:  13%|█▎        | 12/90 [04:55<31:20, 24.11s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/gambling-harms-research-and-innovation-partnerships/


🔍 Analyse des URLs:  14%|█▍        | 13/90 [05:20<31:14, 24.34s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/multiple-model-assessment-of-biological-influence-on-ocean-carbon/


🔍 Analyse des URLs:  16%|█▌        | 14/90 [05:46<31:18, 24.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/transdisciplinary-research-to-tackle-antimicrobial-resistance-full-application/


🔍 Analyse des URLs:  17%|█▋        | 15/90 [06:10<30:42, 24.57s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/canada-uk-networkplus-in-semiconductor-research/


🔍 Analyse des URLs:  18%|█▊        | 16/90 [06:36<30:42, 24.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/uk-international-ocean-drilling-programme-moratorium-awards/


🔍 Analyse des URLs:  19%|█▉        | 17/90 [07:01<30:38, 25.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/health-technologies-connectivity-awards-round-two/


🔍 Analyse des URLs:  20%|██        | 18/90 [07:25<29:38, 24.70s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/clinician-scientist-fellowship/


🔍 Analyse des URLs:  21%|██        | 19/90 [07:50<29:11, 24.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pre-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  22%|██▏       | 20/90 [08:15<28:52, 24.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/post-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  23%|██▎       | 21/90 [08:40<28:45, 25.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/engineer-next-generation-veterinary-vaccine-technology-platforms/


🔍 Analyse des URLs:  24%|██▍       | 22/90 [09:05<28:10, 24.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-for-engineering-and-physical-sciences-june-2025/


🔍 Analyse des URLs:  26%|██▌       | 23/90 [09:28<27:24, 24.54s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ernest-rutherford-fellowship-2025/


🔍 Analyse des URLs:  27%|██▋       | 24/90 [09:52<26:45, 24.32s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-to-advance-uk-recycling-capabilities-full-stage/


🔍 Analyse des URLs:  28%|██▊       | 25/90 [10:16<26:10, 24.16s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/sandpit-moving-respiratory-health-diagnosis-into-the-21st-century-invite-only/


🔍 Analyse des URLs:  29%|██▉       | 26/90 [10:39<25:31, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mrc-centre-of-research-excellence-round-three/


🔍 Analyse des URLs:  30%|███       | 27/90 [11:06<26:02, 24.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/responsive-mode-partnership/


🔍 Analyse des URLs:  31%|███       | 28/90 [11:30<25:19, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-programme/


🔍 Analyse des URLs:  32%|███▏      | 29/90 [11:54<24:37, 24.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-programme/


🔍 Analyse des URLs:  33%|███▎      | 30/90 [12:17<24:04, 24.08s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-programme/


🔍 Analyse des URLs:  34%|███▍      | 31/90 [12:41<23:36, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-programme/


🔍 Analyse des URLs:  36%|███▌      | 32/90 [13:05<23:02, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-new-investigator/


🔍 Analyse des URLs:  37%|███▋      | 33/90 [13:28<22:36, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-new-investigator/


🔍 Analyse des URLs:  38%|███▊      | 34/90 [13:52<22:11, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-new-investigator/


🔍 Analyse des URLs:  39%|███▉      | 35/90 [14:16<21:47, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-new-investigator/


🔍 Analyse des URLs:  40%|████      | 36/90 [14:39<21:19, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-research/


🔍 Analyse des URLs:  41%|████      | 37/90 [15:03<20:55, 23.68s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-research/


🔍 Analyse des URLs:  42%|████▏     | 38/90 [15:27<20:32, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-research/


🔍 Analyse des URLs:  43%|████▎     | 39/90 [15:50<20:05, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-research/


🔍 Analyse des URLs:  44%|████▍     | 40/90 [16:14<19:42, 23.66s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/dare-uk-real-world-research-exemplar-programme/


🔍 Analyse des URLs:  46%|████▌     | 41/90 [16:38<19:19, 23.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-new-investigator-award-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  47%|████▋     | 42/90 [17:01<18:54, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-standard-research-grant-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  48%|████▊     | 43/90 [17:25<18:31, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/critical-mass-programmes-to-drive-a-sustainable-future-invitation-only/


🔍 Analyse des URLs:  49%|████▉     | 44/90 [17:49<18:09, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine/


🔍 Analyse des URLs:  50%|█████     | 45/90 [18:12<17:46, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/career-development-award/


🔍 Analyse des URLs:  51%|█████     | 46/90 [18:36<17:24, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-policy-fellowships-2025/


🔍 Analyse des URLs:  52%|█████▏    | 47/90 [19:00<17:03, 23.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-research-invited-stage-two/


🔍 Analyse des URLs:  53%|█████▎    | 48/90 [19:24<16:42, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/2025-to-2026-strategic-longer-and-larger-slola-grants/


🔍 Analyse des URLs:  54%|█████▍    | 49/90 [19:48<16:18, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-partnership-invited-stage-two/


🔍 Analyse des URLs:  56%|█████▌    | 50/90 [20:12<15:58, 23.95s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-norway/


🔍 Analyse des URLs:  57%|█████▋    | 51/90 [20:36<15:34, 23.96s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborative-doctoral-landscape-award-in-the-arts-and-humanities/


🔍 Analyse des URLs:  58%|█████▊    | 52/90 [21:00<15:08, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/patt-travel-grants-for-competitively-awarded-observation-time-2025/


🔍 Analyse des URLs:  59%|█████▉    | 53/90 [21:24<14:44, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/developmental-pathway-funding-scheme/


🔍 Analyse des URLs:  60%|██████    | 54/90 [21:48<14:17, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/statements-of-need-in-research-infrastructure/


🔍 Analyse des URLs:  61%|██████    | 55/90 [22:12<13:55, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-full-proposal/


🔍 Analyse des URLs:  62%|██████▏   | 56/90 [22:35<13:31, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-postdoctoral-fellowship/


🔍 Analyse des URLs:  63%|██████▎   | 57/90 [23:00<13:15, 24.12s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/opening-up-the-environment-2026/


🔍 Analyse des URLs:  64%|██████▍   | 58/90 [23:24<12:45, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/small-molecule-high-throughput-screen-using-astrazeneca-facilities/


🔍 Analyse des URLs:  66%|██████▌   | 59/90 [23:48<12:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-curiosity-award/


🔍 Analyse des URLs:  67%|██████▋   | 60/90 [24:11<11:56, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-catalyst-awards/


🔍 Analyse des URLs:  68%|██████▊   | 61/90 [24:35<11:29, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pushing-the-frontiers-of-environmental-research-july-2025/


🔍 Analyse des URLs:  69%|██████▉   | 62/90 [24:58<11:03, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-open-and-open-plus-fellowship/


🔍 Analyse des URLs:  70%|███████   | 63/90 [25:24<10:52, 24.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-ukri-nsf-sbe-lead-agency/


🔍 Analyse des URLs:  71%|███████   | 64/90 [25:48<10:30, 24.25s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-working-with-brazilian-researchers/


🔍 Analyse des URLs:  72%|███████▏  | 65/90 [26:12<10:02, 24.10s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  73%|███████▎  | 66/90 [26:36<09:35, 23.97s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-standard-research-grant/


🔍 Analyse des URLs:  74%|███████▍  | 67/90 [26:59<09:08, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-outline-stage/


🔍 Analyse des URLs:  76%|███████▌  | 68/90 [27:23<08:44, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-international-travel-award-scheme/


🔍 Analyse des URLs:  77%|███████▋  | 69/90 [27:47<08:24, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/projects-peer-review-panel-pprp-2025/


🔍 Analyse des URLs:  78%|███████▊  | 70/90 [28:12<08:01, 24.07s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/nerc-urgency-funding-open/


🔍 Analyse des URLs:  79%|███████▉  | 71/90 [28:35<07:35, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-outlines/


🔍 Analyse des URLs:  80%|████████  | 72/90 [28:59<07:09, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-network-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  81%|████████  | 73/90 [29:23<06:46, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-working-with-overseas-scientists-nov-2023-responsive-mode/


🔍 Analyse des URLs:  82%|████████▏ | 74/90 [29:47<06:21, 23.83s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-new-investigator-award-nov-2023-responsive-mode/


🔍 Analyse des URLs:  83%|████████▎ | 75/90 [30:10<05:56, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-discipline-hopping-in-ict-nov-2023-responsive-mode/


🔍 Analyse des URLs:  84%|████████▍ | 76/90 [30:34<05:32, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-overseas-travel-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  86%|████████▌ | 77/90 [30:58<05:09, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-standard-research-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  87%|████████▋ | 78/90 [31:22<04:46, 23.89s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-research-grants-round-two/


🔍 Analyse des URLs:  88%|████████▊ | 79/90 [31:46<04:23, 23.94s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-new-investigator-grants-round-two/


🔍 Analyse des URLs:  89%|████████▉ | 80/90 [32:10<03:59, 23.91s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-luxembourg-researchers-round-two/


🔍 Analyse des URLs:  90%|█████████ | 81/90 [32:33<03:34, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-secondary-data-analysis-round-two/


🔍 Analyse des URLs:  91%|█████████ | 82/90 [32:57<03:10, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-ukri-sbe-lead-agency-opportunity-round-two/


🔍 Analyse des URLs:  92%|█████████▏| 83/90 [33:21<02:46, 23.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-brazilian-researchers-round-two/


🔍 Analyse des URLs:  93%|█████████▎| 84/90 [33:45<02:22, 23.76s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  94%|█████████▍| 85/90 [34:09<01:59, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/work-with-brazilian-researchers-nerc-fapesp-lead-agency/


🔍 Analyse des URLs:  96%|█████████▌| 86/90 [34:32<01:35, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-brazil/


🔍 Analyse des URLs:  97%|█████████▋| 87/90 [34:56<01:11, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-sbe-lead-agency-opportunity/


🔍 Analyse des URLs:  98%|█████████▊| 88/90 [35:20<00:47, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-england-development-fund/


🔍 Analyse des URLs:  99%|█████████▉| 89/90 [35:44<00:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/daphne-jackson-fellowship/


🔍 Analyse des URLs: 100%|██████████| 90/90 [36:08<00:00, 24.09s/it]



✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIGURATION SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === Nettoyage & Limitation texte ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis une URL ===
def extract_text_from_URL(URL):
    try:
        driver.get(URL)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(3)

        js_desc = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : '';
        """
        description_text = driver.execute_script(js_desc)

        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        final_text = f"{description_text}\n\n---\n\n{accordion_text}".strip()
        return final_text if final_text.strip() else "Pas de contenu détecté."
    
    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Construction du prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']} (axe {p['Axes / Thematic Areas']})"
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?
Merci de répondre clairement :
- Pertinence : Oui / Non
- Projets similaires détectés : [liste]
- Résumé rapide expliquant ta réponse.
"""

# === Requête Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )
        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Erreur Groq: {e}"

# === Parsing du résultat Groq ===
def parse_result(text):
    pertinence = "Non"
    resume = ""

    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()

        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass

    return pertinence, resume

# === Analyse d'une URL ===
def analyze_URL(URL, projects_list):
    print(f"🟡 Analyse en cours : {URL}")
    try:
        site_text_raw = extract_text_from_URL(URL)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_list_sample = projects_list[:3]  # 3 projets max

        prompt = build_prompt(projects_list_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)
        
        return {
            "URL": URL,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": URL,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }

# === MAIN EXECUTION ===
if __name__ == "__main__":
    try:
        df_pertinence_uk = pd.read_excel("projets_pertinents_keywords_InnUK_from_all_description.xlsx")
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']].dropna().to_dict(orient='records')
        URL_list = df_pertinence_uk['URL'].dropna().unique()

        results = []
        for URL in tqdm(URL_list, desc="🔍 Analyse des URLs"):
            results.append(analyze_URL(URL, projects_list))

        results_df = pd.DataFrame(results)
        df_final_llm_uk = df_pertinence_uk.merge(results_df, on="URL", how="left")

        df_final_llm_uk.to_excel("df_yes_avec_pertinence_et_resume_uk.xlsx", index=False)
        print("\n✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.")
        
    finally:
        driver.quit()


🔍 Analyse des URLs:   0%|          | 0/90 [00:00<?, ?it/s]

🟡 Analyse en cours : https://www.ukri.org/opportunity/smart-data-research-uk-fellowships/


🔍 Analyse des URLs:   1%|          | 1/90 [00:31<47:08, 31.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/institutional-diversity-fund-invite-only/


🔍 Analyse des URLs:   2%|▏         | 2/90 [00:56<40:14, 27.44s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/funding-for-early-stage-development-of-new-healthcare-interventions/


🔍 Analyse des URLs:   3%|▎         | 3/90 [01:19<37:18, 25.73s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/digitise-uk-natural-science-collections/


🔍 Analyse des URLs:   4%|▍         | 4/90 [01:44<36:09, 25.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/aviations-non-co2-impacts-on-the-climate-programme-coordinator/


🔍 Analyse des URLs:   6%|▌         | 5/90 [02:08<35:06, 24.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-and-partnership-hubs-for-a-healthy-society/


🔍 Analyse des URLs:   7%|▋         | 6/90 [02:32<34:18, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/stfc-leadership-fellowships-in-public-engagement/


🔍 Analyse des URLs:   8%|▊         | 7/90 [02:55<33:27, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/computing-resources-on-the-stfc-dirac-hpc-facility-rac18/


🔍 Analyse des URLs:   9%|▉         | 8/90 [03:19<32:48, 24.00s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine-invited-stage-two-application/


🔍 Analyse des URLs:  10%|█         | 9/90 [03:43<32:16, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/us-uk-and-germany-uk-collaborative-research-seed-funding-in-semiconductor-security/


🔍 Analyse des URLs:  11%|█         | 10/90 [04:06<31:47, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/biological-influence-on-ocean-carbon-novel-modelling-approaches/


🔍 Analyse des URLs:  12%|█▏        | 11/90 [04:31<31:50, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-gambling-harms-research-coordination-centre-ghrcc/


🔍 Analyse des URLs:  13%|█▎        | 12/90 [04:55<31:20, 24.11s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/gambling-harms-research-and-innovation-partnerships/


🔍 Analyse des URLs:  14%|█▍        | 13/90 [05:20<31:14, 24.34s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/multiple-model-assessment-of-biological-influence-on-ocean-carbon/


🔍 Analyse des URLs:  16%|█▌        | 14/90 [05:46<31:18, 24.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/transdisciplinary-research-to-tackle-antimicrobial-resistance-full-application/


🔍 Analyse des URLs:  17%|█▋        | 15/90 [06:10<30:42, 24.57s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/canada-uk-networkplus-in-semiconductor-research/


🔍 Analyse des URLs:  18%|█▊        | 16/90 [06:36<30:42, 24.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/uk-international-ocean-drilling-programme-moratorium-awards/


🔍 Analyse des URLs:  19%|█▉        | 17/90 [07:01<30:38, 25.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/health-technologies-connectivity-awards-round-two/


🔍 Analyse des URLs:  20%|██        | 18/90 [07:25<29:38, 24.70s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/clinician-scientist-fellowship/


🔍 Analyse des URLs:  21%|██        | 19/90 [07:50<29:11, 24.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pre-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  22%|██▏       | 20/90 [08:15<28:52, 24.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/post-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  23%|██▎       | 21/90 [08:40<28:45, 25.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/engineer-next-generation-veterinary-vaccine-technology-platforms/


🔍 Analyse des URLs:  24%|██▍       | 22/90 [09:05<28:10, 24.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-for-engineering-and-physical-sciences-june-2025/


🔍 Analyse des URLs:  26%|██▌       | 23/90 [09:28<27:24, 24.54s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ernest-rutherford-fellowship-2025/


🔍 Analyse des URLs:  27%|██▋       | 24/90 [09:52<26:45, 24.32s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-to-advance-uk-recycling-capabilities-full-stage/


🔍 Analyse des URLs:  28%|██▊       | 25/90 [10:16<26:10, 24.16s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/sandpit-moving-respiratory-health-diagnosis-into-the-21st-century-invite-only/


🔍 Analyse des URLs:  29%|██▉       | 26/90 [10:39<25:31, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mrc-centre-of-research-excellence-round-three/


🔍 Analyse des URLs:  30%|███       | 27/90 [11:06<26:02, 24.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/responsive-mode-partnership/


🔍 Analyse des URLs:  31%|███       | 28/90 [11:30<25:19, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-programme/


🔍 Analyse des URLs:  32%|███▏      | 29/90 [11:54<24:37, 24.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-programme/


🔍 Analyse des URLs:  33%|███▎      | 30/90 [12:17<24:04, 24.08s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-programme/


🔍 Analyse des URLs:  34%|███▍      | 31/90 [12:41<23:36, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-programme/


🔍 Analyse des URLs:  36%|███▌      | 32/90 [13:05<23:02, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-new-investigator/


🔍 Analyse des URLs:  37%|███▋      | 33/90 [13:28<22:36, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-new-investigator/


🔍 Analyse des URLs:  38%|███▊      | 34/90 [13:52<22:11, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-new-investigator/


🔍 Analyse des URLs:  39%|███▉      | 35/90 [14:16<21:47, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-new-investigator/


🔍 Analyse des URLs:  40%|████      | 36/90 [14:39<21:19, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-research/


🔍 Analyse des URLs:  41%|████      | 37/90 [15:03<20:55, 23.68s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-research/


🔍 Analyse des URLs:  42%|████▏     | 38/90 [15:27<20:32, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-research/


🔍 Analyse des URLs:  43%|████▎     | 39/90 [15:50<20:05, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-research/


🔍 Analyse des URLs:  44%|████▍     | 40/90 [16:14<19:42, 23.66s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/dare-uk-real-world-research-exemplar-programme/


🔍 Analyse des URLs:  46%|████▌     | 41/90 [16:38<19:19, 23.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-new-investigator-award-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  47%|████▋     | 42/90 [17:01<18:54, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-standard-research-grant-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  48%|████▊     | 43/90 [17:25<18:31, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/critical-mass-programmes-to-drive-a-sustainable-future-invitation-only/


🔍 Analyse des URLs:  49%|████▉     | 44/90 [17:49<18:09, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine/


🔍 Analyse des URLs:  50%|█████     | 45/90 [18:12<17:46, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/career-development-award/


🔍 Analyse des URLs:  51%|█████     | 46/90 [18:36<17:24, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-policy-fellowships-2025/


🔍 Analyse des URLs:  52%|█████▏    | 47/90 [19:00<17:03, 23.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-research-invited-stage-two/


🔍 Analyse des URLs:  53%|█████▎    | 48/90 [19:24<16:42, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/2025-to-2026-strategic-longer-and-larger-slola-grants/


🔍 Analyse des URLs:  54%|█████▍    | 49/90 [19:48<16:18, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-partnership-invited-stage-two/


🔍 Analyse des URLs:  56%|█████▌    | 50/90 [20:12<15:58, 23.95s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-norway/


🔍 Analyse des URLs:  57%|█████▋    | 51/90 [20:36<15:34, 23.96s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborative-doctoral-landscape-award-in-the-arts-and-humanities/


🔍 Analyse des URLs:  58%|█████▊    | 52/90 [21:00<15:08, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/patt-travel-grants-for-competitively-awarded-observation-time-2025/


🔍 Analyse des URLs:  59%|█████▉    | 53/90 [21:24<14:44, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/developmental-pathway-funding-scheme/


🔍 Analyse des URLs:  60%|██████    | 54/90 [21:48<14:17, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/statements-of-need-in-research-infrastructure/


🔍 Analyse des URLs:  61%|██████    | 55/90 [22:12<13:55, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-full-proposal/


🔍 Analyse des URLs:  62%|██████▏   | 56/90 [22:35<13:31, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-postdoctoral-fellowship/


🔍 Analyse des URLs:  63%|██████▎   | 57/90 [23:00<13:15, 24.12s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/opening-up-the-environment-2026/


🔍 Analyse des URLs:  64%|██████▍   | 58/90 [23:24<12:45, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/small-molecule-high-throughput-screen-using-astrazeneca-facilities/


🔍 Analyse des URLs:  66%|██████▌   | 59/90 [23:48<12:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-curiosity-award/


🔍 Analyse des URLs:  67%|██████▋   | 60/90 [24:11<11:56, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-catalyst-awards/


🔍 Analyse des URLs:  68%|██████▊   | 61/90 [24:35<11:29, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pushing-the-frontiers-of-environmental-research-july-2025/


🔍 Analyse des URLs:  69%|██████▉   | 62/90 [24:58<11:03, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-open-and-open-plus-fellowship/


🔍 Analyse des URLs:  70%|███████   | 63/90 [25:24<10:52, 24.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-ukri-nsf-sbe-lead-agency/


🔍 Analyse des URLs:  71%|███████   | 64/90 [25:48<10:30, 24.25s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-working-with-brazilian-researchers/


🔍 Analyse des URLs:  72%|███████▏  | 65/90 [26:12<10:02, 24.10s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  73%|███████▎  | 66/90 [26:36<09:35, 23.97s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-standard-research-grant/


🔍 Analyse des URLs:  74%|███████▍  | 67/90 [26:59<09:08, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-outline-stage/


🔍 Analyse des URLs:  76%|███████▌  | 68/90 [27:23<08:44, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-international-travel-award-scheme/


🔍 Analyse des URLs:  77%|███████▋  | 69/90 [27:47<08:24, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/projects-peer-review-panel-pprp-2025/


🔍 Analyse des URLs:  78%|███████▊  | 70/90 [28:12<08:01, 24.07s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/nerc-urgency-funding-open/


🔍 Analyse des URLs:  79%|███████▉  | 71/90 [28:35<07:35, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-outlines/


🔍 Analyse des URLs:  80%|████████  | 72/90 [28:59<07:09, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-network-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  81%|████████  | 73/90 [29:23<06:46, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-working-with-overseas-scientists-nov-2023-responsive-mode/


🔍 Analyse des URLs:  82%|████████▏ | 74/90 [29:47<06:21, 23.83s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-new-investigator-award-nov-2023-responsive-mode/


🔍 Analyse des URLs:  83%|████████▎ | 75/90 [30:10<05:56, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-discipline-hopping-in-ict-nov-2023-responsive-mode/


🔍 Analyse des URLs:  84%|████████▍ | 76/90 [30:34<05:32, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-overseas-travel-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  86%|████████▌ | 77/90 [30:58<05:09, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-standard-research-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  87%|████████▋ | 78/90 [31:22<04:46, 23.89s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-research-grants-round-two/


🔍 Analyse des URLs:  88%|████████▊ | 79/90 [31:46<04:23, 23.94s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-new-investigator-grants-round-two/


🔍 Analyse des URLs:  89%|████████▉ | 80/90 [32:10<03:59, 23.91s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-luxembourg-researchers-round-two/


🔍 Analyse des URLs:  90%|█████████ | 81/90 [32:33<03:34, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-secondary-data-analysis-round-two/


🔍 Analyse des URLs:  91%|█████████ | 82/90 [32:57<03:10, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-ukri-sbe-lead-agency-opportunity-round-two/


🔍 Analyse des URLs:  92%|█████████▏| 83/90 [33:21<02:46, 23.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-brazilian-researchers-round-two/


🔍 Analyse des URLs:  93%|█████████▎| 84/90 [33:45<02:22, 23.76s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  94%|█████████▍| 85/90 [34:09<01:59, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/work-with-brazilian-researchers-nerc-fapesp-lead-agency/


🔍 Analyse des URLs:  96%|█████████▌| 86/90 [34:32<01:35, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-brazil/


🔍 Analyse des URLs:  97%|█████████▋| 87/90 [34:56<01:11, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-sbe-lead-agency-opportunity/


🔍 Analyse des URLs:  98%|█████████▊| 88/90 [35:20<00:47, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-england-development-fund/


🔍 Analyse des URLs:  99%|█████████▉| 89/90 [35:44<00:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/daphne-jackson-fellowship/


🔍 Analyse des URLs: 100%|██████████| 90/90 [36:08<00:00, 24.09s/it]



✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIGURATION SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === Nettoyage & Limitation texte ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis une URL ===
def extract_text_from_URL(URL):
    try:
        driver.get(URL)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(3)

        js_desc = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : '';
        """
        description_text = driver.execute_script(js_desc)

        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        final_text = f"{description_text}\n\n---\n\n{accordion_text}".strip()
        return final_text if final_text.strip() else "Pas de contenu détecté."
    
    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Construction du prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']} (axe {p['Axes / Thematic Areas']})"
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?
Merci de répondre clairement :
- Pertinence : Oui / Non
- Projets similaires détectés : [liste]
- Résumé rapide expliquant ta réponse.
"""

# === Requête Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )
        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Erreur Groq: {e}"

# === Parsing du résultat Groq ===
def parse_result(text):
    pertinence = "Non"
    resume = ""

    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()

        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass

    return pertinence, resume

# === Analyse d'une URL ===
def analyze_URL(URL, projects_list):
    print(f"🟡 Analyse en cours : {URL}")
    try:
        site_text_raw = extract_text_from_URL(URL)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_list_sample = projects_list[:3]  # 3 projets max

        prompt = build_prompt(projects_list_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)
        
        return {
            "URL": URL,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": URL,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }

# === MAIN EXECUTION ===
if __name__ == "__main__":
    try:
        df_pertinence_uk = pd.read_excel("projets_pertinents_keywords_InnUK_from_all_description.xlsx")
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']].dropna().to_dict(orient='records')
        URL_list = df_pertinence_uk['URL'].dropna().unique()

        results = []
        for URL in tqdm(URL_list, desc="🔍 Analyse des URLs"):
            results.append(analyze_URL(URL, projects_list))

        results_df = pd.DataFrame(results)
        df_final_llm_uk = df_pertinence_uk.merge(results_df, on="URL", how="left")

        df_final_llm_uk.to_excel("df_yes_avec_pertinence_et_resume_uk.xlsx", index=False)
        print("\n✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.")
        
    finally:
        driver.quit()


🔍 Analyse des URLs:   0%|          | 0/90 [00:00<?, ?it/s]

🟡 Analyse en cours : https://www.ukri.org/opportunity/smart-data-research-uk-fellowships/


🔍 Analyse des URLs:   1%|          | 1/90 [00:31<47:08, 31.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/institutional-diversity-fund-invite-only/


🔍 Analyse des URLs:   2%|▏         | 2/90 [00:56<40:14, 27.44s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/funding-for-early-stage-development-of-new-healthcare-interventions/


🔍 Analyse des URLs:   3%|▎         | 3/90 [01:19<37:18, 25.73s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/digitise-uk-natural-science-collections/


🔍 Analyse des URLs:   4%|▍         | 4/90 [01:44<36:09, 25.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/aviations-non-co2-impacts-on-the-climate-programme-coordinator/


🔍 Analyse des URLs:   6%|▌         | 5/90 [02:08<35:06, 24.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-and-partnership-hubs-for-a-healthy-society/


🔍 Analyse des URLs:   7%|▋         | 6/90 [02:32<34:18, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/stfc-leadership-fellowships-in-public-engagement/


🔍 Analyse des URLs:   8%|▊         | 7/90 [02:55<33:27, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/computing-resources-on-the-stfc-dirac-hpc-facility-rac18/


🔍 Analyse des URLs:   9%|▉         | 8/90 [03:19<32:48, 24.00s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine-invited-stage-two-application/


🔍 Analyse des URLs:  10%|█         | 9/90 [03:43<32:16, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/us-uk-and-germany-uk-collaborative-research-seed-funding-in-semiconductor-security/


🔍 Analyse des URLs:  11%|█         | 10/90 [04:06<31:47, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/biological-influence-on-ocean-carbon-novel-modelling-approaches/


🔍 Analyse des URLs:  12%|█▏        | 11/90 [04:31<31:50, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-gambling-harms-research-coordination-centre-ghrcc/


🔍 Analyse des URLs:  13%|█▎        | 12/90 [04:55<31:20, 24.11s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/gambling-harms-research-and-innovation-partnerships/


🔍 Analyse des URLs:  14%|█▍        | 13/90 [05:20<31:14, 24.34s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/multiple-model-assessment-of-biological-influence-on-ocean-carbon/


🔍 Analyse des URLs:  16%|█▌        | 14/90 [05:46<31:18, 24.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/transdisciplinary-research-to-tackle-antimicrobial-resistance-full-application/


🔍 Analyse des URLs:  17%|█▋        | 15/90 [06:10<30:42, 24.57s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/canada-uk-networkplus-in-semiconductor-research/


🔍 Analyse des URLs:  18%|█▊        | 16/90 [06:36<30:42, 24.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/uk-international-ocean-drilling-programme-moratorium-awards/


🔍 Analyse des URLs:  19%|█▉        | 17/90 [07:01<30:38, 25.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/health-technologies-connectivity-awards-round-two/


🔍 Analyse des URLs:  20%|██        | 18/90 [07:25<29:38, 24.70s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/clinician-scientist-fellowship/


🔍 Analyse des URLs:  21%|██        | 19/90 [07:50<29:11, 24.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pre-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  22%|██▏       | 20/90 [08:15<28:52, 24.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/post-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  23%|██▎       | 21/90 [08:40<28:45, 25.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/engineer-next-generation-veterinary-vaccine-technology-platforms/


🔍 Analyse des URLs:  24%|██▍       | 22/90 [09:05<28:10, 24.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-for-engineering-and-physical-sciences-june-2025/


🔍 Analyse des URLs:  26%|██▌       | 23/90 [09:28<27:24, 24.54s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ernest-rutherford-fellowship-2025/


🔍 Analyse des URLs:  27%|██▋       | 24/90 [09:52<26:45, 24.32s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-to-advance-uk-recycling-capabilities-full-stage/


🔍 Analyse des URLs:  28%|██▊       | 25/90 [10:16<26:10, 24.16s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/sandpit-moving-respiratory-health-diagnosis-into-the-21st-century-invite-only/


🔍 Analyse des URLs:  29%|██▉       | 26/90 [10:39<25:31, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mrc-centre-of-research-excellence-round-three/


🔍 Analyse des URLs:  30%|███       | 27/90 [11:06<26:02, 24.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/responsive-mode-partnership/


🔍 Analyse des URLs:  31%|███       | 28/90 [11:30<25:19, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-programme/


🔍 Analyse des URLs:  32%|███▏      | 29/90 [11:54<24:37, 24.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-programme/


🔍 Analyse des URLs:  33%|███▎      | 30/90 [12:17<24:04, 24.08s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-programme/


🔍 Analyse des URLs:  34%|███▍      | 31/90 [12:41<23:36, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-programme/


🔍 Analyse des URLs:  36%|███▌      | 32/90 [13:05<23:02, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-new-investigator/


🔍 Analyse des URLs:  37%|███▋      | 33/90 [13:28<22:36, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-new-investigator/


🔍 Analyse des URLs:  38%|███▊      | 34/90 [13:52<22:11, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-new-investigator/


🔍 Analyse des URLs:  39%|███▉      | 35/90 [14:16<21:47, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-new-investigator/


🔍 Analyse des URLs:  40%|████      | 36/90 [14:39<21:19, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-research/


🔍 Analyse des URLs:  41%|████      | 37/90 [15:03<20:55, 23.68s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-research/


🔍 Analyse des URLs:  42%|████▏     | 38/90 [15:27<20:32, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-research/


🔍 Analyse des URLs:  43%|████▎     | 39/90 [15:50<20:05, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-research/


🔍 Analyse des URLs:  44%|████▍     | 40/90 [16:14<19:42, 23.66s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/dare-uk-real-world-research-exemplar-programme/


🔍 Analyse des URLs:  46%|████▌     | 41/90 [16:38<19:19, 23.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-new-investigator-award-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  47%|████▋     | 42/90 [17:01<18:54, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-standard-research-grant-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  48%|████▊     | 43/90 [17:25<18:31, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/critical-mass-programmes-to-drive-a-sustainable-future-invitation-only/


🔍 Analyse des URLs:  49%|████▉     | 44/90 [17:49<18:09, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine/


🔍 Analyse des URLs:  50%|█████     | 45/90 [18:12<17:46, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/career-development-award/


🔍 Analyse des URLs:  51%|█████     | 46/90 [18:36<17:24, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-policy-fellowships-2025/


🔍 Analyse des URLs:  52%|█████▏    | 47/90 [19:00<17:03, 23.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-research-invited-stage-two/


🔍 Analyse des URLs:  53%|█████▎    | 48/90 [19:24<16:42, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/2025-to-2026-strategic-longer-and-larger-slola-grants/


🔍 Analyse des URLs:  54%|█████▍    | 49/90 [19:48<16:18, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-partnership-invited-stage-two/


🔍 Analyse des URLs:  56%|█████▌    | 50/90 [20:12<15:58, 23.95s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-norway/


🔍 Analyse des URLs:  57%|█████▋    | 51/90 [20:36<15:34, 23.96s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborative-doctoral-landscape-award-in-the-arts-and-humanities/


🔍 Analyse des URLs:  58%|█████▊    | 52/90 [21:00<15:08, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/patt-travel-grants-for-competitively-awarded-observation-time-2025/


🔍 Analyse des URLs:  59%|█████▉    | 53/90 [21:24<14:44, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/developmental-pathway-funding-scheme/


🔍 Analyse des URLs:  60%|██████    | 54/90 [21:48<14:17, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/statements-of-need-in-research-infrastructure/


🔍 Analyse des URLs:  61%|██████    | 55/90 [22:12<13:55, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-full-proposal/


🔍 Analyse des URLs:  62%|██████▏   | 56/90 [22:35<13:31, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-postdoctoral-fellowship/


🔍 Analyse des URLs:  63%|██████▎   | 57/90 [23:00<13:15, 24.12s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/opening-up-the-environment-2026/


🔍 Analyse des URLs:  64%|██████▍   | 58/90 [23:24<12:45, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/small-molecule-high-throughput-screen-using-astrazeneca-facilities/


🔍 Analyse des URLs:  66%|██████▌   | 59/90 [23:48<12:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-curiosity-award/


🔍 Analyse des URLs:  67%|██████▋   | 60/90 [24:11<11:56, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-catalyst-awards/


🔍 Analyse des URLs:  68%|██████▊   | 61/90 [24:35<11:29, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pushing-the-frontiers-of-environmental-research-july-2025/


🔍 Analyse des URLs:  69%|██████▉   | 62/90 [24:58<11:03, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-open-and-open-plus-fellowship/


🔍 Analyse des URLs:  70%|███████   | 63/90 [25:24<10:52, 24.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-ukri-nsf-sbe-lead-agency/


🔍 Analyse des URLs:  71%|███████   | 64/90 [25:48<10:30, 24.25s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-working-with-brazilian-researchers/


🔍 Analyse des URLs:  72%|███████▏  | 65/90 [26:12<10:02, 24.10s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  73%|███████▎  | 66/90 [26:36<09:35, 23.97s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-standard-research-grant/


🔍 Analyse des URLs:  74%|███████▍  | 67/90 [26:59<09:08, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-outline-stage/


🔍 Analyse des URLs:  76%|███████▌  | 68/90 [27:23<08:44, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-international-travel-award-scheme/


🔍 Analyse des URLs:  77%|███████▋  | 69/90 [27:47<08:24, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/projects-peer-review-panel-pprp-2025/


🔍 Analyse des URLs:  78%|███████▊  | 70/90 [28:12<08:01, 24.07s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/nerc-urgency-funding-open/


🔍 Analyse des URLs:  79%|███████▉  | 71/90 [28:35<07:35, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-outlines/


🔍 Analyse des URLs:  80%|████████  | 72/90 [28:59<07:09, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-network-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  81%|████████  | 73/90 [29:23<06:46, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-working-with-overseas-scientists-nov-2023-responsive-mode/


🔍 Analyse des URLs:  82%|████████▏ | 74/90 [29:47<06:21, 23.83s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-new-investigator-award-nov-2023-responsive-mode/


🔍 Analyse des URLs:  83%|████████▎ | 75/90 [30:10<05:56, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-discipline-hopping-in-ict-nov-2023-responsive-mode/


🔍 Analyse des URLs:  84%|████████▍ | 76/90 [30:34<05:32, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-overseas-travel-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  86%|████████▌ | 77/90 [30:58<05:09, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-standard-research-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  87%|████████▋ | 78/90 [31:22<04:46, 23.89s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-research-grants-round-two/


🔍 Analyse des URLs:  88%|████████▊ | 79/90 [31:46<04:23, 23.94s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-new-investigator-grants-round-two/


🔍 Analyse des URLs:  89%|████████▉ | 80/90 [32:10<03:59, 23.91s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-luxembourg-researchers-round-two/


🔍 Analyse des URLs:  90%|█████████ | 81/90 [32:33<03:34, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-secondary-data-analysis-round-two/


🔍 Analyse des URLs:  91%|█████████ | 82/90 [32:57<03:10, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-ukri-sbe-lead-agency-opportunity-round-two/


🔍 Analyse des URLs:  92%|█████████▏| 83/90 [33:21<02:46, 23.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-brazilian-researchers-round-two/


🔍 Analyse des URLs:  93%|█████████▎| 84/90 [33:45<02:22, 23.76s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  94%|█████████▍| 85/90 [34:09<01:59, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/work-with-brazilian-researchers-nerc-fapesp-lead-agency/


🔍 Analyse des URLs:  96%|█████████▌| 86/90 [34:32<01:35, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-brazil/


🔍 Analyse des URLs:  97%|█████████▋| 87/90 [34:56<01:11, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-sbe-lead-agency-opportunity/


🔍 Analyse des URLs:  98%|█████████▊| 88/90 [35:20<00:47, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-england-development-fund/


🔍 Analyse des URLs:  99%|█████████▉| 89/90 [35:44<00:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/daphne-jackson-fellowship/


🔍 Analyse des URLs: 100%|██████████| 90/90 [36:08<00:00, 24.09s/it]



✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIGURATION SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === Nettoyage & Limitation texte ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis une URL ===
def extract_text_from_URL(URL):
    try:
        driver.get(URL)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(3)

        js_desc = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : '';
        """
        description_text = driver.execute_script(js_desc)

        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        final_text = f"{description_text}\n\n---\n\n{accordion_text}".strip()
        return final_text if final_text.strip() else "Pas de contenu détecté."
    
    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Construction du prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']} (axe {p['Axes / Thematic Areas']})"
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?
Merci de répondre clairement :
- Pertinence : Oui / Non
- Projets similaires détectés : [liste]
- Résumé rapide expliquant ta réponse.
"""

# === Requête Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )
        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Erreur Groq: {e}"

# === Parsing du résultat Groq ===
def parse_result(text):
    pertinence = "Non"
    resume = ""

    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()

        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass

    return pertinence, resume

# === Analyse d'une URL ===
def analyze_URL(URL, projects_list):
    print(f"🟡 Analyse en cours : {URL}")
    try:
        site_text_raw = extract_text_from_URL(URL)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_list_sample = projects_list[:3]  # 3 projets max

        prompt = build_prompt(projects_list_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)
        
        return {
            "URL": URL,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": URL,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }

# === MAIN EXECUTION ===
if __name__ == "__main__":
    try:
        df_pertinence_uk = pd.read_excel("projets_pertinents_keywords_InnUK_from_all_description.xlsx")
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']].dropna().to_dict(orient='records')
        URL_list = df_pertinence_uk['URL'].dropna().unique()

        results = []
        for URL in tqdm(URL_list, desc="🔍 Analyse des URLs"):
            results.append(analyze_URL(URL, projects_list))

        results_df = pd.DataFrame(results)
        df_final_llm_uk = df_pertinence_uk.merge(results_df, on="URL", how="left")

        df_final_llm_uk.to_excel("df_yes_avec_pertinence_et_resume_uk.xlsx", index=False)
        print("\n✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.")
        
    finally:
        driver.quit()


🔍 Analyse des URLs:   0%|          | 0/90 [00:00<?, ?it/s]

🟡 Analyse en cours : https://www.ukri.org/opportunity/smart-data-research-uk-fellowships/


🔍 Analyse des URLs:   1%|          | 1/90 [00:31<47:08, 31.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/institutional-diversity-fund-invite-only/


🔍 Analyse des URLs:   2%|▏         | 2/90 [00:56<40:14, 27.44s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/funding-for-early-stage-development-of-new-healthcare-interventions/


🔍 Analyse des URLs:   3%|▎         | 3/90 [01:19<37:18, 25.73s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/digitise-uk-natural-science-collections/


🔍 Analyse des URLs:   4%|▍         | 4/90 [01:44<36:09, 25.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/aviations-non-co2-impacts-on-the-climate-programme-coordinator/


🔍 Analyse des URLs:   6%|▌         | 5/90 [02:08<35:06, 24.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-and-partnership-hubs-for-a-healthy-society/


🔍 Analyse des URLs:   7%|▋         | 6/90 [02:32<34:18, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/stfc-leadership-fellowships-in-public-engagement/


🔍 Analyse des URLs:   8%|▊         | 7/90 [02:55<33:27, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/computing-resources-on-the-stfc-dirac-hpc-facility-rac18/


🔍 Analyse des URLs:   9%|▉         | 8/90 [03:19<32:48, 24.00s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine-invited-stage-two-application/


🔍 Analyse des URLs:  10%|█         | 9/90 [03:43<32:16, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/us-uk-and-germany-uk-collaborative-research-seed-funding-in-semiconductor-security/


🔍 Analyse des URLs:  11%|█         | 10/90 [04:06<31:47, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/biological-influence-on-ocean-carbon-novel-modelling-approaches/


🔍 Analyse des URLs:  12%|█▏        | 11/90 [04:31<31:50, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-gambling-harms-research-coordination-centre-ghrcc/


🔍 Analyse des URLs:  13%|█▎        | 12/90 [04:55<31:20, 24.11s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/gambling-harms-research-and-innovation-partnerships/


🔍 Analyse des URLs:  14%|█▍        | 13/90 [05:20<31:14, 24.34s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/multiple-model-assessment-of-biological-influence-on-ocean-carbon/


🔍 Analyse des URLs:  16%|█▌        | 14/90 [05:46<31:18, 24.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/transdisciplinary-research-to-tackle-antimicrobial-resistance-full-application/


🔍 Analyse des URLs:  17%|█▋        | 15/90 [06:10<30:42, 24.57s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/canada-uk-networkplus-in-semiconductor-research/


🔍 Analyse des URLs:  18%|█▊        | 16/90 [06:36<30:42, 24.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/uk-international-ocean-drilling-programme-moratorium-awards/


🔍 Analyse des URLs:  19%|█▉        | 17/90 [07:01<30:38, 25.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/health-technologies-connectivity-awards-round-two/


🔍 Analyse des URLs:  20%|██        | 18/90 [07:25<29:38, 24.70s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/clinician-scientist-fellowship/


🔍 Analyse des URLs:  21%|██        | 19/90 [07:50<29:11, 24.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pre-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  22%|██▏       | 20/90 [08:15<28:52, 24.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/post-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  23%|██▎       | 21/90 [08:40<28:45, 25.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/engineer-next-generation-veterinary-vaccine-technology-platforms/


🔍 Analyse des URLs:  24%|██▍       | 22/90 [09:05<28:10, 24.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-for-engineering-and-physical-sciences-june-2025/


🔍 Analyse des URLs:  26%|██▌       | 23/90 [09:28<27:24, 24.54s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ernest-rutherford-fellowship-2025/


🔍 Analyse des URLs:  27%|██▋       | 24/90 [09:52<26:45, 24.32s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-to-advance-uk-recycling-capabilities-full-stage/


🔍 Analyse des URLs:  28%|██▊       | 25/90 [10:16<26:10, 24.16s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/sandpit-moving-respiratory-health-diagnosis-into-the-21st-century-invite-only/


🔍 Analyse des URLs:  29%|██▉       | 26/90 [10:39<25:31, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mrc-centre-of-research-excellence-round-three/


🔍 Analyse des URLs:  30%|███       | 27/90 [11:06<26:02, 24.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/responsive-mode-partnership/


🔍 Analyse des URLs:  31%|███       | 28/90 [11:30<25:19, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-programme/


🔍 Analyse des URLs:  32%|███▏      | 29/90 [11:54<24:37, 24.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-programme/


🔍 Analyse des URLs:  33%|███▎      | 30/90 [12:17<24:04, 24.08s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-programme/


🔍 Analyse des URLs:  34%|███▍      | 31/90 [12:41<23:36, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-programme/


🔍 Analyse des URLs:  36%|███▌      | 32/90 [13:05<23:02, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-new-investigator/


🔍 Analyse des URLs:  37%|███▋      | 33/90 [13:28<22:36, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-new-investigator/


🔍 Analyse des URLs:  38%|███▊      | 34/90 [13:52<22:11, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-new-investigator/


🔍 Analyse des URLs:  39%|███▉      | 35/90 [14:16<21:47, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-new-investigator/


🔍 Analyse des URLs:  40%|████      | 36/90 [14:39<21:19, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-research/


🔍 Analyse des URLs:  41%|████      | 37/90 [15:03<20:55, 23.68s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-research/


🔍 Analyse des URLs:  42%|████▏     | 38/90 [15:27<20:32, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-research/


🔍 Analyse des URLs:  43%|████▎     | 39/90 [15:50<20:05, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-research/


🔍 Analyse des URLs:  44%|████▍     | 40/90 [16:14<19:42, 23.66s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/dare-uk-real-world-research-exemplar-programme/


🔍 Analyse des URLs:  46%|████▌     | 41/90 [16:38<19:19, 23.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-new-investigator-award-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  47%|████▋     | 42/90 [17:01<18:54, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-standard-research-grant-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  48%|████▊     | 43/90 [17:25<18:31, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/critical-mass-programmes-to-drive-a-sustainable-future-invitation-only/


🔍 Analyse des URLs:  49%|████▉     | 44/90 [17:49<18:09, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine/


🔍 Analyse des URLs:  50%|█████     | 45/90 [18:12<17:46, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/career-development-award/


🔍 Analyse des URLs:  51%|█████     | 46/90 [18:36<17:24, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-policy-fellowships-2025/


🔍 Analyse des URLs:  52%|█████▏    | 47/90 [19:00<17:03, 23.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-research-invited-stage-two/


🔍 Analyse des URLs:  53%|█████▎    | 48/90 [19:24<16:42, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/2025-to-2026-strategic-longer-and-larger-slola-grants/


🔍 Analyse des URLs:  54%|█████▍    | 49/90 [19:48<16:18, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-partnership-invited-stage-two/


🔍 Analyse des URLs:  56%|█████▌    | 50/90 [20:12<15:58, 23.95s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-norway/


🔍 Analyse des URLs:  57%|█████▋    | 51/90 [20:36<15:34, 23.96s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborative-doctoral-landscape-award-in-the-arts-and-humanities/


🔍 Analyse des URLs:  58%|█████▊    | 52/90 [21:00<15:08, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/patt-travel-grants-for-competitively-awarded-observation-time-2025/


🔍 Analyse des URLs:  59%|█████▉    | 53/90 [21:24<14:44, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/developmental-pathway-funding-scheme/


🔍 Analyse des URLs:  60%|██████    | 54/90 [21:48<14:17, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/statements-of-need-in-research-infrastructure/


🔍 Analyse des URLs:  61%|██████    | 55/90 [22:12<13:55, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-full-proposal/


🔍 Analyse des URLs:  62%|██████▏   | 56/90 [22:35<13:31, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-postdoctoral-fellowship/


🔍 Analyse des URLs:  63%|██████▎   | 57/90 [23:00<13:15, 24.12s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/opening-up-the-environment-2026/


🔍 Analyse des URLs:  64%|██████▍   | 58/90 [23:24<12:45, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/small-molecule-high-throughput-screen-using-astrazeneca-facilities/


🔍 Analyse des URLs:  66%|██████▌   | 59/90 [23:48<12:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-curiosity-award/


🔍 Analyse des URLs:  67%|██████▋   | 60/90 [24:11<11:56, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-catalyst-awards/


🔍 Analyse des URLs:  68%|██████▊   | 61/90 [24:35<11:29, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pushing-the-frontiers-of-environmental-research-july-2025/


🔍 Analyse des URLs:  69%|██████▉   | 62/90 [24:58<11:03, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-open-and-open-plus-fellowship/


🔍 Analyse des URLs:  70%|███████   | 63/90 [25:24<10:52, 24.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-ukri-nsf-sbe-lead-agency/


🔍 Analyse des URLs:  71%|███████   | 64/90 [25:48<10:30, 24.25s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-working-with-brazilian-researchers/


🔍 Analyse des URLs:  72%|███████▏  | 65/90 [26:12<10:02, 24.10s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  73%|███████▎  | 66/90 [26:36<09:35, 23.97s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-standard-research-grant/


🔍 Analyse des URLs:  74%|███████▍  | 67/90 [26:59<09:08, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-outline-stage/


🔍 Analyse des URLs:  76%|███████▌  | 68/90 [27:23<08:44, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-international-travel-award-scheme/


🔍 Analyse des URLs:  77%|███████▋  | 69/90 [27:47<08:24, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/projects-peer-review-panel-pprp-2025/


🔍 Analyse des URLs:  78%|███████▊  | 70/90 [28:12<08:01, 24.07s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/nerc-urgency-funding-open/


🔍 Analyse des URLs:  79%|███████▉  | 71/90 [28:35<07:35, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-outlines/


🔍 Analyse des URLs:  80%|████████  | 72/90 [28:59<07:09, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-network-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  81%|████████  | 73/90 [29:23<06:46, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-working-with-overseas-scientists-nov-2023-responsive-mode/


🔍 Analyse des URLs:  82%|████████▏ | 74/90 [29:47<06:21, 23.83s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-new-investigator-award-nov-2023-responsive-mode/


🔍 Analyse des URLs:  83%|████████▎ | 75/90 [30:10<05:56, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-discipline-hopping-in-ict-nov-2023-responsive-mode/


🔍 Analyse des URLs:  84%|████████▍ | 76/90 [30:34<05:32, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-overseas-travel-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  86%|████████▌ | 77/90 [30:58<05:09, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-standard-research-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  87%|████████▋ | 78/90 [31:22<04:46, 23.89s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-research-grants-round-two/


🔍 Analyse des URLs:  88%|████████▊ | 79/90 [31:46<04:23, 23.94s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-new-investigator-grants-round-two/


🔍 Analyse des URLs:  89%|████████▉ | 80/90 [32:10<03:59, 23.91s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-luxembourg-researchers-round-two/


🔍 Analyse des URLs:  90%|█████████ | 81/90 [32:33<03:34, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-secondary-data-analysis-round-two/


🔍 Analyse des URLs:  91%|█████████ | 82/90 [32:57<03:10, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-ukri-sbe-lead-agency-opportunity-round-two/


🔍 Analyse des URLs:  92%|█████████▏| 83/90 [33:21<02:46, 23.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-brazilian-researchers-round-two/


🔍 Analyse des URLs:  93%|█████████▎| 84/90 [33:45<02:22, 23.76s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  94%|█████████▍| 85/90 [34:09<01:59, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/work-with-brazilian-researchers-nerc-fapesp-lead-agency/


🔍 Analyse des URLs:  96%|█████████▌| 86/90 [34:32<01:35, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-brazil/


🔍 Analyse des URLs:  97%|█████████▋| 87/90 [34:56<01:11, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-sbe-lead-agency-opportunity/


🔍 Analyse des URLs:  98%|█████████▊| 88/90 [35:20<00:47, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-england-development-fund/


🔍 Analyse des URLs:  99%|█████████▉| 89/90 [35:44<00:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/daphne-jackson-fellowship/


🔍 Analyse des URLs: 100%|██████████| 90/90 [36:08<00:00, 24.09s/it]



✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIGURATION SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === Nettoyage & Limitation texte ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis une URL ===
def extract_text_from_URL(URL):
    try:
        driver.get(URL)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(3)

        js_desc = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : '';
        """
        description_text = driver.execute_script(js_desc)

        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        final_text = f"{description_text}\n\n---\n\n{accordion_text}".strip()
        return final_text if final_text.strip() else "Pas de contenu détecté."
    
    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Construction du prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']} (axe {p['Axes / Thematic Areas']})"
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?
Merci de répondre clairement :
- Pertinence : Oui / Non
- Projets similaires détectés : [liste]
- Résumé rapide expliquant ta réponse.
"""

# === Requête Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )
        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Erreur Groq: {e}"

# === Parsing du résultat Groq ===
def parse_result(text):
    pertinence = "Non"
    resume = ""

    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()

        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass

    return pertinence, resume

# === Analyse d'une URL ===
def analyze_URL(URL, projects_list):
    print(f"🟡 Analyse en cours : {URL}")
    try:
        site_text_raw = extract_text_from_URL(URL)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_list_sample = projects_list[:3]  # 3 projets max

        prompt = build_prompt(projects_list_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)
        
        return {
            "URL": URL,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": URL,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }

# === MAIN EXECUTION ===
if __name__ == "__main__":
    try:
        df_pertinence_uk = pd.read_excel("projets_pertinents_keywords_InnUK_from_all_description.xlsx")
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']].dropna().to_dict(orient='records')
        URL_list = df_pertinence_uk['URL'].dropna().unique()

        results = []
        for URL in tqdm(URL_list, desc="🔍 Analyse des URLs"):
            results.append(analyze_URL(URL, projects_list))

        results_df = pd.DataFrame(results)
        df_final_llm_uk = df_pertinence_uk.merge(results_df, on="URL", how="left")

        df_final_llm_uk.to_excel("df_yes_avec_pertinence_et_resume_uk.xlsx", index=False)
        print("\n✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.")
        
    finally:
        driver.quit()


🔍 Analyse des URLs:   0%|          | 0/90 [00:00<?, ?it/s]

🟡 Analyse en cours : https://www.ukri.org/opportunity/smart-data-research-uk-fellowships/


🔍 Analyse des URLs:   1%|          | 1/90 [00:31<47:08, 31.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/institutional-diversity-fund-invite-only/


🔍 Analyse des URLs:   2%|▏         | 2/90 [00:56<40:14, 27.44s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/funding-for-early-stage-development-of-new-healthcare-interventions/


🔍 Analyse des URLs:   3%|▎         | 3/90 [01:19<37:18, 25.73s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/digitise-uk-natural-science-collections/


🔍 Analyse des URLs:   4%|▍         | 4/90 [01:44<36:09, 25.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/aviations-non-co2-impacts-on-the-climate-programme-coordinator/


🔍 Analyse des URLs:   6%|▌         | 5/90 [02:08<35:06, 24.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-and-partnership-hubs-for-a-healthy-society/


🔍 Analyse des URLs:   7%|▋         | 6/90 [02:32<34:18, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/stfc-leadership-fellowships-in-public-engagement/


🔍 Analyse des URLs:   8%|▊         | 7/90 [02:55<33:27, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/computing-resources-on-the-stfc-dirac-hpc-facility-rac18/


🔍 Analyse des URLs:   9%|▉         | 8/90 [03:19<32:48, 24.00s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine-invited-stage-two-application/


🔍 Analyse des URLs:  10%|█         | 9/90 [03:43<32:16, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/us-uk-and-germany-uk-collaborative-research-seed-funding-in-semiconductor-security/


🔍 Analyse des URLs:  11%|█         | 10/90 [04:06<31:47, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/biological-influence-on-ocean-carbon-novel-modelling-approaches/


🔍 Analyse des URLs:  12%|█▏        | 11/90 [04:31<31:50, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-gambling-harms-research-coordination-centre-ghrcc/


🔍 Analyse des URLs:  13%|█▎        | 12/90 [04:55<31:20, 24.11s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/gambling-harms-research-and-innovation-partnerships/


🔍 Analyse des URLs:  14%|█▍        | 13/90 [05:20<31:14, 24.34s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/multiple-model-assessment-of-biological-influence-on-ocean-carbon/


🔍 Analyse des URLs:  16%|█▌        | 14/90 [05:46<31:18, 24.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/transdisciplinary-research-to-tackle-antimicrobial-resistance-full-application/


🔍 Analyse des URLs:  17%|█▋        | 15/90 [06:10<30:42, 24.57s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/canada-uk-networkplus-in-semiconductor-research/


🔍 Analyse des URLs:  18%|█▊        | 16/90 [06:36<30:42, 24.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/uk-international-ocean-drilling-programme-moratorium-awards/


🔍 Analyse des URLs:  19%|█▉        | 17/90 [07:01<30:38, 25.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/health-technologies-connectivity-awards-round-two/


🔍 Analyse des URLs:  20%|██        | 18/90 [07:25<29:38, 24.70s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/clinician-scientist-fellowship/


🔍 Analyse des URLs:  21%|██        | 19/90 [07:50<29:11, 24.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pre-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  22%|██▏       | 20/90 [08:15<28:52, 24.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/post-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  23%|██▎       | 21/90 [08:40<28:45, 25.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/engineer-next-generation-veterinary-vaccine-technology-platforms/


🔍 Analyse des URLs:  24%|██▍       | 22/90 [09:05<28:10, 24.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-for-engineering-and-physical-sciences-june-2025/


🔍 Analyse des URLs:  26%|██▌       | 23/90 [09:28<27:24, 24.54s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ernest-rutherford-fellowship-2025/


🔍 Analyse des URLs:  27%|██▋       | 24/90 [09:52<26:45, 24.32s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-to-advance-uk-recycling-capabilities-full-stage/


🔍 Analyse des URLs:  28%|██▊       | 25/90 [10:16<26:10, 24.16s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/sandpit-moving-respiratory-health-diagnosis-into-the-21st-century-invite-only/


🔍 Analyse des URLs:  29%|██▉       | 26/90 [10:39<25:31, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mrc-centre-of-research-excellence-round-three/


🔍 Analyse des URLs:  30%|███       | 27/90 [11:06<26:02, 24.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/responsive-mode-partnership/


🔍 Analyse des URLs:  31%|███       | 28/90 [11:30<25:19, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-programme/


🔍 Analyse des URLs:  32%|███▏      | 29/90 [11:54<24:37, 24.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-programme/


🔍 Analyse des URLs:  33%|███▎      | 30/90 [12:17<24:04, 24.08s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-programme/


🔍 Analyse des URLs:  34%|███▍      | 31/90 [12:41<23:36, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-programme/


🔍 Analyse des URLs:  36%|███▌      | 32/90 [13:05<23:02, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-new-investigator/


🔍 Analyse des URLs:  37%|███▋      | 33/90 [13:28<22:36, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-new-investigator/


🔍 Analyse des URLs:  38%|███▊      | 34/90 [13:52<22:11, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-new-investigator/


🔍 Analyse des URLs:  39%|███▉      | 35/90 [14:16<21:47, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-new-investigator/


🔍 Analyse des URLs:  40%|████      | 36/90 [14:39<21:19, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-research/


🔍 Analyse des URLs:  41%|████      | 37/90 [15:03<20:55, 23.68s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-research/


🔍 Analyse des URLs:  42%|████▏     | 38/90 [15:27<20:32, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-research/


🔍 Analyse des URLs:  43%|████▎     | 39/90 [15:50<20:05, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-research/


🔍 Analyse des URLs:  44%|████▍     | 40/90 [16:14<19:42, 23.66s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/dare-uk-real-world-research-exemplar-programme/


🔍 Analyse des URLs:  46%|████▌     | 41/90 [16:38<19:19, 23.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-new-investigator-award-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  47%|████▋     | 42/90 [17:01<18:54, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-standard-research-grant-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  48%|████▊     | 43/90 [17:25<18:31, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/critical-mass-programmes-to-drive-a-sustainable-future-invitation-only/


🔍 Analyse des URLs:  49%|████▉     | 44/90 [17:49<18:09, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine/


🔍 Analyse des URLs:  50%|█████     | 45/90 [18:12<17:46, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/career-development-award/


🔍 Analyse des URLs:  51%|█████     | 46/90 [18:36<17:24, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-policy-fellowships-2025/


🔍 Analyse des URLs:  52%|█████▏    | 47/90 [19:00<17:03, 23.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-research-invited-stage-two/


🔍 Analyse des URLs:  53%|█████▎    | 48/90 [19:24<16:42, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/2025-to-2026-strategic-longer-and-larger-slola-grants/


🔍 Analyse des URLs:  54%|█████▍    | 49/90 [19:48<16:18, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-partnership-invited-stage-two/


🔍 Analyse des URLs:  56%|█████▌    | 50/90 [20:12<15:58, 23.95s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-norway/


🔍 Analyse des URLs:  57%|█████▋    | 51/90 [20:36<15:34, 23.96s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborative-doctoral-landscape-award-in-the-arts-and-humanities/


🔍 Analyse des URLs:  58%|█████▊    | 52/90 [21:00<15:08, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/patt-travel-grants-for-competitively-awarded-observation-time-2025/


🔍 Analyse des URLs:  59%|█████▉    | 53/90 [21:24<14:44, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/developmental-pathway-funding-scheme/


🔍 Analyse des URLs:  60%|██████    | 54/90 [21:48<14:17, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/statements-of-need-in-research-infrastructure/


🔍 Analyse des URLs:  61%|██████    | 55/90 [22:12<13:55, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-full-proposal/


🔍 Analyse des URLs:  62%|██████▏   | 56/90 [22:35<13:31, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-postdoctoral-fellowship/


🔍 Analyse des URLs:  63%|██████▎   | 57/90 [23:00<13:15, 24.12s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/opening-up-the-environment-2026/


🔍 Analyse des URLs:  64%|██████▍   | 58/90 [23:24<12:45, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/small-molecule-high-throughput-screen-using-astrazeneca-facilities/


🔍 Analyse des URLs:  66%|██████▌   | 59/90 [23:48<12:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-curiosity-award/


🔍 Analyse des URLs:  67%|██████▋   | 60/90 [24:11<11:56, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-catalyst-awards/


🔍 Analyse des URLs:  68%|██████▊   | 61/90 [24:35<11:29, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pushing-the-frontiers-of-environmental-research-july-2025/


🔍 Analyse des URLs:  69%|██████▉   | 62/90 [24:58<11:03, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-open-and-open-plus-fellowship/


🔍 Analyse des URLs:  70%|███████   | 63/90 [25:24<10:52, 24.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-ukri-nsf-sbe-lead-agency/


🔍 Analyse des URLs:  71%|███████   | 64/90 [25:48<10:30, 24.25s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-working-with-brazilian-researchers/


🔍 Analyse des URLs:  72%|███████▏  | 65/90 [26:12<10:02, 24.10s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  73%|███████▎  | 66/90 [26:36<09:35, 23.97s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-standard-research-grant/


🔍 Analyse des URLs:  74%|███████▍  | 67/90 [26:59<09:08, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-outline-stage/


🔍 Analyse des URLs:  76%|███████▌  | 68/90 [27:23<08:44, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-international-travel-award-scheme/


🔍 Analyse des URLs:  77%|███████▋  | 69/90 [27:47<08:24, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/projects-peer-review-panel-pprp-2025/


🔍 Analyse des URLs:  78%|███████▊  | 70/90 [28:12<08:01, 24.07s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/nerc-urgency-funding-open/


🔍 Analyse des URLs:  79%|███████▉  | 71/90 [28:35<07:35, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-outlines/


🔍 Analyse des URLs:  80%|████████  | 72/90 [28:59<07:09, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-network-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  81%|████████  | 73/90 [29:23<06:46, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-working-with-overseas-scientists-nov-2023-responsive-mode/


🔍 Analyse des URLs:  82%|████████▏ | 74/90 [29:47<06:21, 23.83s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-new-investigator-award-nov-2023-responsive-mode/


🔍 Analyse des URLs:  83%|████████▎ | 75/90 [30:10<05:56, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-discipline-hopping-in-ict-nov-2023-responsive-mode/


🔍 Analyse des URLs:  84%|████████▍ | 76/90 [30:34<05:32, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-overseas-travel-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  86%|████████▌ | 77/90 [30:58<05:09, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-standard-research-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  87%|████████▋ | 78/90 [31:22<04:46, 23.89s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-research-grants-round-two/


🔍 Analyse des URLs:  88%|████████▊ | 79/90 [31:46<04:23, 23.94s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-new-investigator-grants-round-two/


🔍 Analyse des URLs:  89%|████████▉ | 80/90 [32:10<03:59, 23.91s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-luxembourg-researchers-round-two/


🔍 Analyse des URLs:  90%|█████████ | 81/90 [32:33<03:34, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-secondary-data-analysis-round-two/


🔍 Analyse des URLs:  91%|█████████ | 82/90 [32:57<03:10, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-ukri-sbe-lead-agency-opportunity-round-two/


🔍 Analyse des URLs:  92%|█████████▏| 83/90 [33:21<02:46, 23.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-brazilian-researchers-round-two/


🔍 Analyse des URLs:  93%|█████████▎| 84/90 [33:45<02:22, 23.76s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  94%|█████████▍| 85/90 [34:09<01:59, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/work-with-brazilian-researchers-nerc-fapesp-lead-agency/


🔍 Analyse des URLs:  96%|█████████▌| 86/90 [34:32<01:35, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-brazil/


🔍 Analyse des URLs:  97%|█████████▋| 87/90 [34:56<01:11, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-sbe-lead-agency-opportunity/


🔍 Analyse des URLs:  98%|█████████▊| 88/90 [35:20<00:47, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-england-development-fund/


🔍 Analyse des URLs:  99%|█████████▉| 89/90 [35:44<00:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/daphne-jackson-fellowship/


🔍 Analyse des URLs: 100%|██████████| 90/90 [36:08<00:00, 24.09s/it]



✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd
from groq import Groq
import re
from tqdm import tqdm

# === CONFIGURATION GROQ ===
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

client = Groq(api_key=os.getenv('GROQ_API_KEY'))
MODEL_NAME = "llama3-70b-8192"

# === CONFIGURATION SELENIUM HEADLESS ===
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === Nettoyage & Limitation texte ===
def clean_and_limit_text(text, max_words=800):
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + " [...] (texte tronqué)"
    return text

# === Extraction texte depuis une URL ===
def extract_text_from_URL(URL):
    try:
        driver.get(URL)
        time.sleep(5)

        for _ in range(10):
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(0.3)
        time.sleep(3)

        js_desc = """
        let desc = document.querySelector('div.showMore--three-lines');
        return desc ? desc.innerText.trim() : '';
        """
        description_text = driver.execute_script(js_desc)

        try:
            open_all_btn = driver.find_element(By.CLASS_NAME, "govuk-accordion__open-all")
            open_all_btn.click()
            time.sleep(1)
        except:
            pass

        js_accordion = """
        let acc = document.querySelector('div.govuk-accordion.ukri-accordion');
        return acc ? acc.innerText.trim() : '';
        """
        accordion_text = driver.execute_script(js_accordion)

        final_text = f"{description_text}\n\n---\n\n{accordion_text}".strip()
        return final_text if final_text.strip() else "Pas de contenu détecté."
    
    except Exception as e:
        return f"Erreur Selenium: {e}"

# === Construction du prompt pour Groq ===
def build_prompt(projects, site_text):
    projects_text = "\n".join([
        f"{i+1}. {p['Project Name']} : {p['Description']} (axe {p['Axes / Thematic Areas']})"
        for i, p in enumerate(projects)
    ])
    return f"""
Tu es un expert en analyse de projets d'entreprise.

Voici une liste de projets que cette entreprise a déjà réalisés avec leurs descriptions et axes principaux :

{projects_text}

Voici maintenant le contenu d'un appel à projets extrait d'une page du portail européen :

{site_text}

Peux-tu me dire si cette opportunité est pertinente par rapport aux projets que l'entreprise a déjà réalisés ?
Merci de répondre clairement :
- Pertinence : Oui / Non
- Projets similaires détectés : [liste]
- Résumé rapide expliquant ta réponse.
"""

# === Requête Groq ===
def query_groq(prompt, model=MODEL_NAME, max_completion_tokens=3000, temperature=1.0, stream=False, delay_between_requests=10):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Tu es un expert en comparaison de projets R&D."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=1,
            stream=stream
        )
        if stream:
            result = ""
            for chunk in response:
                delta = chunk.choices[0].delta.content
                if delta:
                    result += delta
            time.sleep(delay_between_requests)
            return result.strip()
        else:
            time.sleep(delay_between_requests)
            return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Erreur Groq: {e}"

# === Parsing du résultat Groq ===
def parse_result(text):
    pertinence = "Non"
    resume = ""

    try:
        match = re.search(r"(?i)pertinence\s*[:\-–]\s*(oui|non)", text)
        if match:
            pertinence = match.group(1).capitalize()

        resume_match = re.search(r"(?i)résumé.*?:\s*(.+)", text)
        if resume_match:
            resume = resume_match.group(1).strip()
        else:
            resume = "\n".join(text.splitlines()[1:4]).strip()
    except:
        pass

    return pertinence, resume

# === Analyse d'une URL ===
def analyze_URL(URL, projects_list):
    print(f"🟡 Analyse en cours : {URL}")
    try:
        site_text_raw = extract_text_from_URL(URL)
        site_text = clean_and_limit_text(site_text_raw, max_words=800)
        projects_list_sample = projects_list[:3]  # 3 projets max

        prompt = build_prompt(projects_list_sample, site_text)
        result_text = query_groq(prompt)
        pertinence, resume = parse_result(result_text)
        
        return {
            "URL": URL,
            "Pertinence LLM": pertinence,
            "Résumé LLM": resume,
            "Réponse brute": result_text
        }
    except Exception as e:
        return {
            "URL": URL,
            "Pertinence LLM": "Erreur",
            "Résumé LLM": f"Erreur pendant l'analyse : {e}",
            "Réponse brute": str(e)
        }

# === MAIN EXECUTION ===
if __name__ == "__main__":
    try:
        df_pertinence_uk = pd.read_excel("projets_pertinents_keywords_InnUK_from_all_description.xlsx")
        projects_list = odf_df[['Project Name', 'Description', 'Axes / Thematic Areas']].dropna().to_dict(orient='records')
        URL_list = df_pertinence_uk['URL'].dropna().unique()

        results = []
        for URL in tqdm(URL_list, desc="🔍 Analyse des URLs"):
            results.append(analyze_URL(URL, projects_list))

        results_df = pd.DataFrame(results)
        df_final_llm_uk = df_pertinence_uk.merge(results_df, on="URL", how="left")

        df_final_llm_uk.to_excel("df_yes_avec_pertinence_et_resume_uk.xlsx", index=False)
        print("\n✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.")
        
    finally:
        driver.quit()


🔍 Analyse des URLs:   0%|          | 0/90 [00:00<?, ?it/s]

🟡 Analyse en cours : https://www.ukri.org/opportunity/smart-data-research-uk-fellowships/


🔍 Analyse des URLs:   1%|          | 1/90 [00:31<47:08, 31.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/institutional-diversity-fund-invite-only/


🔍 Analyse des URLs:   2%|▏         | 2/90 [00:56<40:14, 27.44s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/funding-for-early-stage-development-of-new-healthcare-interventions/


🔍 Analyse des URLs:   3%|▎         | 3/90 [01:19<37:18, 25.73s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/digitise-uk-natural-science-collections/


🔍 Analyse des URLs:   4%|▍         | 4/90 [01:44<36:09, 25.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/aviations-non-co2-impacts-on-the-climate-programme-coordinator/


🔍 Analyse des URLs:   6%|▌         | 5/90 [02:08<35:06, 24.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-and-partnership-hubs-for-a-healthy-society/


🔍 Analyse des URLs:   7%|▋         | 6/90 [02:32<34:18, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/stfc-leadership-fellowships-in-public-engagement/


🔍 Analyse des URLs:   8%|▊         | 7/90 [02:55<33:27, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/computing-resources-on-the-stfc-dirac-hpc-facility-rac18/


🔍 Analyse des URLs:   9%|▉         | 8/90 [03:19<32:48, 24.00s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine-invited-stage-two-application/


🔍 Analyse des URLs:  10%|█         | 9/90 [03:43<32:16, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/us-uk-and-germany-uk-collaborative-research-seed-funding-in-semiconductor-security/


🔍 Analyse des URLs:  11%|█         | 10/90 [04:06<31:47, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/biological-influence-on-ocean-carbon-novel-modelling-approaches/


🔍 Analyse des URLs:  12%|█▏        | 11/90 [04:31<31:50, 24.19s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-gambling-harms-research-coordination-centre-ghrcc/


🔍 Analyse des URLs:  13%|█▎        | 12/90 [04:55<31:20, 24.11s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/gambling-harms-research-and-innovation-partnerships/


🔍 Analyse des URLs:  14%|█▍        | 13/90 [05:20<31:14, 24.34s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/multiple-model-assessment-of-biological-influence-on-ocean-carbon/


🔍 Analyse des URLs:  16%|█▌        | 14/90 [05:46<31:18, 24.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/transdisciplinary-research-to-tackle-antimicrobial-resistance-full-application/


🔍 Analyse des URLs:  17%|█▋        | 15/90 [06:10<30:42, 24.57s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/canada-uk-networkplus-in-semiconductor-research/


🔍 Analyse des URLs:  18%|█▊        | 16/90 [06:36<30:42, 24.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/uk-international-ocean-drilling-programme-moratorium-awards/


🔍 Analyse des URLs:  19%|█▉        | 17/90 [07:01<30:38, 25.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/health-technologies-connectivity-awards-round-two/


🔍 Analyse des URLs:  20%|██        | 18/90 [07:25<29:38, 24.70s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/clinician-scientist-fellowship/


🔍 Analyse des URLs:  21%|██        | 19/90 [07:50<29:11, 24.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pre-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  22%|██▏       | 20/90 [08:15<28:52, 24.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/post-doctoral-clinical-research-training-fellowship/


🔍 Analyse des URLs:  23%|██▎       | 21/90 [08:40<28:45, 25.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/engineer-next-generation-veterinary-vaccine-technology-platforms/


🔍 Analyse des URLs:  24%|██▍       | 22/90 [09:05<28:10, 24.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-for-engineering-and-physical-sciences-june-2025/


🔍 Analyse des URLs:  26%|██▌       | 23/90 [09:28<27:24, 24.54s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ernest-rutherford-fellowship-2025/


🔍 Analyse des URLs:  27%|██▋       | 24/90 [09:52<26:45, 24.32s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-to-advance-uk-recycling-capabilities-full-stage/


🔍 Analyse des URLs:  28%|██▊       | 25/90 [10:16<26:10, 24.16s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/sandpit-moving-respiratory-health-diagnosis-into-the-21st-century-invite-only/


🔍 Analyse des URLs:  29%|██▉       | 26/90 [10:39<25:31, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mrc-centre-of-research-excellence-round-three/


🔍 Analyse des URLs:  30%|███       | 27/90 [11:06<26:02, 24.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/responsive-mode-partnership/


🔍 Analyse des URLs:  31%|███       | 28/90 [11:30<25:19, 24.51s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-programme/


🔍 Analyse des URLs:  32%|███▏      | 29/90 [11:54<24:37, 24.22s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-programme/


🔍 Analyse des URLs:  33%|███▎      | 30/90 [12:17<24:04, 24.08s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-programme/


🔍 Analyse des URLs:  34%|███▍      | 31/90 [12:41<23:36, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-programme/


🔍 Analyse des URLs:  36%|███▌      | 32/90 [13:05<23:02, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-new-investigator/


🔍 Analyse des URLs:  37%|███▋      | 33/90 [13:28<22:36, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-new-investigator/


🔍 Analyse des URLs:  38%|███▊      | 34/90 [13:52<22:11, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-new-investigator/


🔍 Analyse des URLs:  39%|███▉      | 35/90 [14:16<21:47, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-new-investigator/


🔍 Analyse des URLs:  40%|████      | 36/90 [14:39<21:19, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/neurosciences-and-mental-health-research/


🔍 Analyse des URLs:  41%|████      | 37/90 [15:03<20:55, 23.68s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/population-and-systems-medicine-research/


🔍 Analyse des URLs:  42%|████▏     | 38/90 [15:27<20:32, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/molecular-and-cellular-medicine-research/


🔍 Analyse des URLs:  43%|████▎     | 39/90 [15:50<20:05, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/infections-and-immunity-research/


🔍 Analyse des URLs:  44%|████▍     | 40/90 [16:14<19:42, 23.66s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/dare-uk-real-world-research-exemplar-programme/


🔍 Analyse des URLs:  46%|████▌     | 41/90 [16:38<19:19, 23.67s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-new-investigator-award-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  47%|████▋     | 42/90 [17:01<18:54, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-standard-research-grant-2025-round-3-applicant-led-mode/


🔍 Analyse des URLs:  48%|████▊     | 43/90 [17:25<18:31, 23.64s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/critical-mass-programmes-to-drive-a-sustainable-future-invitation-only/


🔍 Analyse des URLs:  49%|████▉     | 44/90 [17:49<18:09, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/experimental-medicine/


🔍 Analyse des URLs:  50%|█████     | 45/90 [18:12<17:46, 23.69s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/career-development-award/


🔍 Analyse des URLs:  51%|█████     | 46/90 [18:36<17:24, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-policy-fellowships-2025/


🔍 Analyse des URLs:  52%|█████▏    | 47/90 [19:00<17:03, 23.81s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-research-invited-stage-two/


🔍 Analyse des URLs:  53%|█████▎    | 48/90 [19:24<16:42, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/2025-to-2026-strategic-longer-and-larger-slola-grants/


🔍 Analyse des URLs:  54%|█████▍    | 49/90 [19:48<16:18, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/applied-global-health-partnership-invited-stage-two/


🔍 Analyse des URLs:  56%|█████▌    | 50/90 [20:12<15:58, 23.95s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-norway/


🔍 Analyse des URLs:  57%|█████▋    | 51/90 [20:36<15:34, 23.96s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborative-doctoral-landscape-award-in-the-arts-and-humanities/


🔍 Analyse des URLs:  58%|█████▊    | 52/90 [21:00<15:08, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/patt-travel-grants-for-competitively-awarded-observation-time-2025/


🔍 Analyse des URLs:  59%|█████▉    | 53/90 [21:24<14:44, 23.90s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/developmental-pathway-funding-scheme/


🔍 Analyse des URLs:  60%|██████    | 54/90 [21:48<14:17, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/statements-of-need-in-research-infrastructure/


🔍 Analyse des URLs:  61%|██████    | 55/90 [22:12<13:55, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-full-proposal/


🔍 Analyse des URLs:  62%|██████▏   | 56/90 [22:35<13:31, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-postdoctoral-fellowship/


🔍 Analyse des URLs:  63%|██████▎   | 57/90 [23:00<13:15, 24.12s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/opening-up-the-environment-2026/


🔍 Analyse des URLs:  64%|██████▍   | 58/90 [23:24<12:45, 23.93s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/small-molecule-high-throughput-screen-using-astrazeneca-facilities/


🔍 Analyse des URLs:  66%|██████▌   | 59/90 [23:48<12:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-curiosity-award/


🔍 Analyse des URLs:  67%|██████▋   | 60/90 [24:11<11:56, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-catalyst-awards/


🔍 Analyse des URLs:  68%|██████▊   | 61/90 [24:35<11:29, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/pushing-the-frontiers-of-environmental-research-july-2025/


🔍 Analyse des URLs:  69%|██████▉   | 62/90 [24:58<11:03, 23.71s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/mathematical-sciences-open-and-open-plus-fellowship/


🔍 Analyse des URLs:  70%|███████   | 63/90 [25:24<10:52, 24.18s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-ukri-nsf-sbe-lead-agency/


🔍 Analyse des URLs:  71%|███████   | 64/90 [25:48<10:30, 24.25s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-working-with-brazilian-researchers/


🔍 Analyse des URLs:  72%|███████▏  | 65/90 [26:12<10:02, 24.10s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  73%|███████▎  | 66/90 [26:36<09:35, 23.97s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ahrc-responsive-mode-standard-research-grant/


🔍 Analyse des URLs:  74%|███████▍  | 67/90 [26:59<09:08, 23.87s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-programme-grant-outline-stage/


🔍 Analyse des URLs:  76%|███████▌  | 68/90 [27:23<08:44, 23.86s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/bbsrc-international-travel-award-scheme/


🔍 Analyse des URLs:  77%|███████▋  | 69/90 [27:47<08:24, 24.01s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/projects-peer-review-panel-pprp-2025/


🔍 Analyse des URLs:  78%|███████▊  | 70/90 [28:12<08:01, 24.07s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/nerc-urgency-funding-open/


🔍 Analyse des URLs:  79%|███████▉  | 71/90 [28:35<07:35, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/strategic-infrastructure-outlines/


🔍 Analyse des URLs:  80%|████████  | 72/90 [28:59<07:09, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-network-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  81%|████████  | 73/90 [29:23<06:46, 23.92s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-working-with-overseas-scientists-nov-2023-responsive-mode/


🔍 Analyse des URLs:  82%|████████▏ | 74/90 [29:47<06:21, 23.83s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-new-investigator-award-nov-2023-responsive-mode/


🔍 Analyse des URLs:  83%|████████▎ | 75/90 [30:10<05:56, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-discipline-hopping-in-ict-nov-2023-responsive-mode/


🔍 Analyse des URLs:  84%|████████▍ | 76/90 [30:34<05:32, 23.75s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-overseas-travel-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  86%|████████▌ | 77/90 [30:58<05:09, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/epsrc-standard-research-grant-nov-2023-responsive-mode/


🔍 Analyse des URLs:  87%|████████▋ | 78/90 [31:22<04:46, 23.89s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-research-grants-round-two/


🔍 Analyse des URLs:  88%|████████▊ | 79/90 [31:46<04:23, 23.94s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-new-investigator-grants-round-two/


🔍 Analyse des URLs:  89%|████████▉ | 80/90 [32:10<03:59, 23.91s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-luxembourg-researchers-round-two/


🔍 Analyse des URLs:  90%|█████████ | 81/90 [32:33<03:34, 23.79s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-secondary-data-analysis-round-two/


🔍 Analyse des URLs:  91%|█████████ | 82/90 [32:57<03:10, 23.78s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-ukri-sbe-lead-agency-opportunity-round-two/


🔍 Analyse des URLs:  92%|█████████▏| 83/90 [33:21<02:46, 23.72s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/esrc-responsive-mode-working-with-brazilian-researchers-round-two/


🔍 Analyse des URLs:  93%|█████████▎| 84/90 [33:45<02:22, 23.76s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-luxembourg/


🔍 Analyse des URLs:  94%|█████████▍| 85/90 [34:09<01:59, 23.84s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/work-with-brazilian-researchers-nerc-fapesp-lead-agency/


🔍 Analyse des URLs:  96%|█████████▌| 86/90 [34:32<01:35, 23.82s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/collaborate-with-researchers-in-brazil/


🔍 Analyse des URLs:  97%|█████████▋| 87/90 [34:56<01:11, 23.80s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/ukri-sbe-lead-agency-opportunity/


🔍 Analyse des URLs:  98%|█████████▊| 88/90 [35:20<00:47, 23.88s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/research-england-development-fund/


🔍 Analyse des URLs:  99%|█████████▉| 89/90 [35:44<00:23, 23.99s/it]

🟡 Analyse en cours : https://www.ukri.org/opportunity/daphne-jackson-fellowship/


🔍 Analyse des URLs: 100%|██████████| 90/90 [36:08<00:00, 24.09s/it]



✅ Résultats sauvegardés dans 'df_yes_avec_pertinence_et_resume_uk.csv'.


In [None]:
df_yes_final

Unnamed: 0,URL,Pertinence,Matching Word(s),Title,Status,Start_date,Deadline,Pertinence LLM,Résumé LLM,Réponse brute
2,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"technical assistance, renewable energy",Crowding in private finance,Open For Submission,24 April 2025,23 September 2025,Oui,L'appel à projets concerne l'établissement de ...,Pertinence : Oui\n\nProjets similaires détecté...
4,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"capacity building, digital transformation",Multi-Country project in Agri-Food,Open For Submission,15 April 2025,02 September 2025,Oui,L'appel à projets spécifique est pertinent car...,Pertinence : Oui\n\nProjets similaires détecté...
5,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"capacity building, cybersecurity, digital tran...",Completion of the initial Network of European ...,Open For Submission,15 April 2025,02 September 2025,Oui,Cette opportunité de projet EDIH (European Dig...,Pertinence : Oui\n\nProjets similaires détecté...
16,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,machine learning,Fast-track Extending U-space eco-system,Open For Submission,01 April 2025,16 September 2025,Oui,L'appel à projet concerne la recherche et le d...,Pertinence : Oui\n\nProjets similaires détecté...
19,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,growth strategy,Women Tech.EU initiative,Open For Submission,01 April 2025,02 September 2025,Oui,L'appel à projets Women TechEU initiative prop...,Pertinence : Oui\n\nProjets similaires détecté...
20,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,knowledge transfer,MSCA Staff Exchanges 2025,Open For Submission,27 March 2025,08 October 2025,Oui,Cette opportunité de projetMSCA Staff Exchange...,Pertinence : Oui\n\nProjets similaires détecté...
21,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,technology maturation,Ground Test Demonstration up to TRL5 of On-Boa...,Open For Submission,27 March 2025,15 May 2025,Oui,L'appel à projets concerne le développement d'...,- Pertinence : Oui\n- Projets similaires détec...
28,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,call for proposals,WATER,Open For Submission,20 February 2025,17 June 2025,Oui,L'appel à projets spécifique concerne la créat...,Pertinence : Oui\n\nProjets similaires détecté...
29,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"intellectual property, technology readiness level",EIC Pathfinder Open,Open For Submission,20 February 2025,21 May 2025,Oui,Cette opportunité est pertinente car elle conc...,Pertinence : Oui\n\nProjets similaires détecté...
51,https://ec.europa.eu/info/funding-tenders/oppo...,Yes,"prototyping, innovative SMEs",Non-thematic research actions by SMEs and rese...,Open For Submission,18 February 2025,16 October 2025,Oui,Cette opportunité Seems to align with the comp...,Pertinence : Oui\n\nProjets similaires détecté...


In [39]:
df_yes_final.shape

(10, 10)

In [None]:
df_yes_final.to_csv("df_yes_final.csv", index=True)
print("\n✅ Résultats enrichis sauvegardés dans 'df_yes_final.csv'.")


✅ Résultats enrichis sauvegardés dans 'df_yes_final.csv'.


In [None]:
pd.set_option('display.max_colwidth', None)
print(df_yes_final['URL'])

2                       https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/LIFE-2025-CET-PRIVAFIN?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
4                  https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-AI-08-AGRIFOOD?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
5      https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-EDIH-AC-08-COMPLETION-STEP?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate
16          https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-SESAR-2025-DES-IR-02-WA6-2?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate
19                  https://ec.europ

In [None]:
df_yes_final

Unnamed: 0,URL,Pertinence,Matching Word(s),Title,Status,Start_date,Deadline,Pertinence LLM,Résumé LLM,Réponse brute
2,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/LIFE-2025-CET-PRIVAFIN?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"technical assistance, renewable energy",Crowding in private finance,Open For Submission,24 April 2025,23 September 2025,Oui,"L'appel à projets concerne l'établissement de mécanismes de financement pour les énergies renouvelables et l'efficacité énergétique, ce qui est en phase avec les objectifs de l'entreprise qui a déjà travaillé sur des projets de développement de l'économie numérique et de soutien aux startups dans les pays en développement. Les expérience acquises dans les projets Arab Bank Strategy et World Bank Collaboration pourraient être mobilisées pour répondre à cet appel à projet. De plus, l'expérience de MDBAN dans le soutien aux startups early-stage pourrait également être pertinente pour cet appel à projet.","Pertinence : Oui\n\nProjets similaires détectés : Arab Bank Strategy, World Bank Collaboration, MDBAN\n\nRésumé rapide : L'appel à projets concerne l'établissement de mécanismes de financement pour les énergies renouvelables et l'efficacité énergétique, ce qui est en phase avec les objectifs de l'entreprise qui a déjà travaillé sur des projets de développement de l'économie numérique et de soutien aux startups dans les pays en développement. Les expérience acquises dans les projets Arab Bank Strategy et World Bank Collaboration pourraient être mobilisées pour répondre à cet appel à projet. De plus, l'expérience de MDBAN dans le soutien aux startups early-stage pourrait également être pertinente pour cet appel à projet."
4,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-AI-08-AGRIFOOD?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"capacity building, digital transformation",Multi-Country project in Agri-Food,Open For Submission,15 April 2025,02 September 2025,Oui,"L'appel à projets spécifique est pertinent car il concerne la mise en place d'une infrastructure numérique pour le secteur agroalimentaire, ce qui correspond aux axes de développement de l'entreprise, tels que la Deeptech, les startups, l'incubation, l'accélération, l'entrepreneuriat et l'économie numérique. De plus, l'appel à projets spécifique met en avant la création d'un écosystème qui favorise la collaboration et l'échange d'informations entre les acteurs du secteur, ce qui correspond également aux expériences de l'entreprise dans le cadre de la mise en place d'écosystèmes d'innovation. Les projets similaires détectés montrent que l'entreprise a déjà une expérience dans la mise en place de stratégies de financement et de partenariats pour des projets de développement économique, ainsi que dans la création d'écosystèmes d'innovation et d'accélération pour les entreprises en démarrage.","Pertinence : Oui\n\nProjets similaires détectés : \n- Arab Bank Strategy \n- World Bank Collaboration \n- MDBAN – Business Angels Network \n- TECHNORIAT \n- ABI – Applied Biotech & Innovation \n\nRésumé rapide expliquant ma réponse : \nL'appel à projets spécifique est pertinent car il concerne la mise en place d'une infrastructure numérique pour le secteur agroalimentaire, ce qui correspond aux axes de développement de l'entreprise, tels que la Deeptech, les startups, l'incubation, l'accélération, l'entrepreneuriat et l'économie numérique. De plus, l'appel à projets spécifique met en avant la création d'un écosystème qui favorise la collaboration et l'échange d'informations entre les acteurs du secteur, ce qui correspond également aux expériences de l'entreprise dans le cadre de la mise en place d'écosystèmes d'innovation. Les projets similaires détectés montrent que l'entreprise a déjà une expérience dans la mise en place de stratégies de financement et de partenariats pour des projets de développement économique, ainsi que dans la création d'écosystèmes d'innovation et d'accélération pour les entreprises en démarrage."
5,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/DIGITAL-2025-EDIH-AC-08-COMPLETION-STEP?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=2&pageSize=50&sortBy=startDate",Yes,"capacity building, cybersecurity, digital transformation",Completion of the initial Network of European Digital Innovation Hubs (EDIHs),Open For Submission,15 April 2025,02 September 2025,Oui,"Cette opportunité de projet EDIH (European Digital Innovation Hubs) concerne la création d'un réseau d'innovation digital pour soutenir la transformation numérique des PME, des mid-caps et des organisations publiques. Les objectifs et les délivrables de ce projet sont en phase avec les expériences antérieures de l'entreprise, notamment dans les domaines de l'innovation, de l'entrepreneuriat, de la formation et du financement. Les projets TECHNORIAT, BIATLABS et Arab Bank Strategy montrent que l'entreprise a déjà travaillé sur des projets de soutien à l'innovation et à l'entrepreneuriat, ce qui la rend particulièrement apte à répondre à cet appel à projet EDIH.","Pertinence : Oui\n\nProjets similaires détectés : TECHNORIAT PPP Program, BIATLABS, World Bank Collaboration, Arab Bank Strategy\n\nRésumé rapide : Cette opportunité de projet EDIH (European Digital Innovation Hubs) concerne la création d'un réseau d'innovation digital pour soutenir la transformation numérique des PME, des mid-caps et des organisations publiques. Les objectifs et les délivrables de ce projet sont en phase avec les expériences antérieures de l'entreprise, notamment dans les domaines de l'innovation, de l'entrepreneuriat, de la formation et du financement. Les projets TECHNORIAT, BIATLABS et Arab Bank Strategy montrent que l'entreprise a déjà travaillé sur des projets de soutien à l'innovation et à l'entrepreneuriat, ce qui la rend particulièrement apte à répondre à cet appel à projet EDIH."
16,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-SESAR-2025-DES-IR-02-WA6-2?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate",Yes,machine learning,Fast-track Extending U-space eco-system,Open For Submission,01 April 2025,16 September 2025,Oui,"L'appel à projet concerne la recherche et le développement de systèmes de navigation et de surveillance pour les drones et les aéronefs dans l'espace aérien. Il explore l'utilisation de réseaux cellulaires publics, de radars mmWave, de technologies 5G et de systèmes de navigation pour améliorer la surveillance et la sécurité dans l'espace aérien.","Pertinence : Oui\n\nProjets similaires détectés : FACTORIAT, World Bank Collaboration, TECHNORIAT PPP Program\n\nRésumé rapide : \n\nL'appel à projet concerne la recherche et le développement de systèmes de navigation et de surveillance pour les drones et les aéronefs dans l'espace aérien. Il explore l'utilisation de réseaux cellulaires publics, de radars mmWave, de technologies 5G et de systèmes de navigation pour améliorer la surveillance et la sécurité dans l'espace aérien. \n\nL'entreprise a déjà réalisé des projets similaires, tels que FACTORIAT, qui soutient les startups Deeptech et Hardware, notamment dans les domaines de la navigation et de la surveillance. Le projet World Bank Collaboration a également développé des écosystèmes d'entreprises pour les startups dans le domaine de la navigation et de la surveillance. Enfin, le projet TECHNORIAT PPP Program a mis en relation la recherche et l'entrepreneuriat pour développer des startups dans le domaine de la navigation et de la surveillance. Ces expériences pourraient être utiles pour répondre à cet appel à projet."
19,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-WOMENTECH?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate",Yes,growth strategy,Women Tech.EU initiative,Open For Submission,01 April 2025,02 September 2025,Oui,"L'appel à projets Women TechEU initiative propose un programme d'incubation et d'accompagnement pour les startups féminines dans le domaine des technologies émergentes, ce qui correspond aux expertises de l'entreprise dans les domaines de l'incubation, du Deeptech et de l'accompagnement des startups. Les projets similaires détectés montrent que l'entreprise a déjà une expérience dans l'appui aux startups, notamment dans les domaines du Deeptech, de l'incubation et de l'accompagnement des startups féminines.","Pertinence : Oui\n\nProjets similaires détectés : FACTORIAT, Arab Bank Strategy, World Bank Collaboration, TECHNORIAT PPP Program, MDBAN – Business Angels Network\n\nRésumé rapide : L'appel à projets Women TechEU initiative propose un programme d'incubation et d'accompagnement pour les startups féminines dans le domaine des technologies émergentes, ce qui correspond aux expertises de l'entreprise dans les domaines de l'incubation, du Deeptech et de l'accompagnement des startups. Les projets similaires détectés montrent que l'entreprise a déjà une expérience dans l'appui aux startups, notamment dans les domaines du Deeptech, de l'incubation et de l'accompagnement des startups féminines."
20,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-MSCA-2025-SE-01-01?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate",Yes,knowledge transfer,MSCA Staff Exchanges 2025,Open For Submission,27 March 2025,08 October 2025,Oui,"Cette opportunité de projetMSCA Staff Exchanges est pertinente pour l'entreprise car elle implique des éléments clés qui sont déjà présents dans les projets précédents, tels que :","Pertinence : Oui\n\nProjets similaires détectés : FACTORIAT, TECHNORIAT PPP Program, World Bank Collaboration\n\nRésumé rapide : Cette opportunité de projetMSCA Staff Exchanges est pertinente pour l'entreprise car elle implique des éléments clés qui sont déjà présents dans les projets précédents, tels que :\n\n* La collaboration entre organisations académiques et non-académiques (FACTORIAT, TECHNORIAT PPP Program)\n* L'accent mis sur la mobilité internationale et interdisciplinaire (World Bank Collaboration)\n* L'objectif de renforcer les compétences et les connaissances dans le domaine de la recherche et de l'innovation (TECHNORIAT PPP Program)\n* La mise en réseau et la création de partenariats pour favoriser l'innovation (World Bank Collaboration, FACTORIAT)\n\nDe plus, l'accent mis sur le développement de compétences et les perspectives de carrière pour les membres du personnel aligne avec les objectifs de certains projets précédents, tels que FACTORIAT et TECHNORIAT PPP Program."
21,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-JU-CLEAN-AVIATION-2025-03-SMR-03?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=4&pageSize=50&sortBy=startDate",Yes,technology maturation,Ground Test Demonstration up to TRL5 of On-Board NPE Systems Architecture for SMR Aircraft,Open For Submission,27 March 2025,15 May 2025,Oui,"L'appel à projets concerne le développement d'un système d'énergie non propulsive pour des avions, avec des éléments de démonstration et de validation sur le terrain. Les aspects clés sont la maturation technologique, la démonstration et la validation de composants critiques. Ces éléments sont similaires aux projets FACTORIAT (support à la maturation technologique et prototypage pour les startups Deeptech et hardware) et TECHNORIAT PPP Program (pont entre la recherche et l'entrepreneuriat via incubation et accélération de chercheurs). L'entreprise a déjà démontré son expertise dans la maturation technologique et le soutien à l'innovation, ce qui rend cette opportunité pertinente.","- Pertinence : Oui\n- Projets similaires détectés : FACTORIAT, TECHNORIAT PPP Program\n- Résumé rapide : L'appel à projets concerne le développement d'un système d'énergie non propulsive pour des avions, avec des éléments de démonstration et de validation sur le terrain. Les aspects clés sont la maturation technologique, la démonstration et la validation de composants critiques. Ces éléments sont similaires aux projets FACTORIAT (support à la maturation technologique et prototypage pour les startups Deeptech et hardware) et TECHNORIAT PPP Program (pont entre la recherche et l'entrepreneuriat via incubation et accélération de chercheurs). L'entreprise a déjà démontré son expertise dans la maturation technologique et le soutien à l'innovation, ce qui rend cette opportunité pertinente."
28,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIT-2025?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate",Yes,call for proposals,WATER,Open For Submission,20 February 2025,17 June 2025,Oui,"L'appel à projets spécifique concerne la création d'un Knowledge and Innovation Community (KIC) dans le domaine de l'eau, des secteurs marins et des écosystèmes maritimes, avec un focus sur l'innovation, l'entrepreneuriat et l'éducation. Les projets déjà réalisés par l'entreprise, tels que FACTORIAT, World Bank Collaboration, TECHNORIAT PPP Program, BIATLABS, MDBAN – Business Angels Network et ABI – Applied Biotech & Innovation, montrent une expertise dans les domaines de l'innovation, de l'entrepreneuriat, de l'incubation, de l'accélération, de la formation et de la mise en réseau. Les similarités avec l'appel à projets sont évidentes, notamment en ce qui concerne la création d'un écosystème d'innovation, la promotion de l'entrepreneuriat et la formation en innovation.","Pertinence : Oui\n\nProjets similaires détectés : FACTORIAT, World Bank Collaboration, TECHNORIAT - Business Angels Network, BIATLABS, MDBAN – Business Angels Network\n\nRésumé rapide : L'appel à projets spécifique concerne la création d'un Knowledge and Innovation Community (KIC) dans le domaine de l'eau, des secteurs marins et des écosystèmes maritimes, avec un focus sur l'innovation, l'entrepreneuriat et l'éducation. Les projets déjà réalisés par l'entreprise, tels que FACTORIAT, World Bank Collaboration, TECHNORIAT PPP Program, BIATLABS, MDBAN – Business Angels Network et ABI – Applied Biotech & Innovation, montrent une expertise dans les domaines de l'innovation, de l'entrepreneuriat, de l'incubation, de l'accélération, de la formation et de la mise en réseau. Les similarités avec l'appel à projets sont évidentes, notamment en ce qui concerne la création d'un écosystème d'innovation, la promotion de l'entrepreneuriat et la formation en innovation."
29,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/HORIZON-EIC-2025-PATHFINDEROPEN?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=5&pageSize=50&sortBy=startDate",Yes,"intellectual property, technology readiness level",EIC Pathfinder Open,Open For Submission,20 February 2025,21 May 2025,Oui,"Cette opportunité est pertinente car elle concerne le soutien à des technologies innovantes et radicalement nouvelles, à travers des recherches à haut risque et à fort potentiel d'impact, ce qui est aligné avec les projets déjà réalisés par l'entreprise, tels que FACTORIAT, TECHNORIAT PPP Program et ABI - Applied Biotech & Innovation. Ces projets partagent des éléments clés tels que le soutien à la recherche et au développement de technologies innovantes, l'incubation et l'accélération de projets, ainsi que la gestion de la propriété intellectuelle. De plus, l'accent mis sur l'interdisciplinarité, la collaboration et l'égalité des genres sont également des thématiques présents dans les initiatives précédemment citées.","Pertinence : Oui\n\nProjets similaires détectés : FACTORIAT, TECHNORIAT PPP Program, ABI – Applied Biotech & Innovation \n\nRésumé rapide : Cette opportunité est pertinente car elle concerne le soutien à des technologies innovantes et radicalement nouvelles, à travers des recherches à haut risque et à fort potentiel d'impact, ce qui est aligné avec les projets déjà réalisés par l'entreprise, tels que FACTORIAT, TECHNORIAT PPP Program et ABI - Applied Biotech & Innovation. Ces projets partagent des éléments clés tels que le soutien à la recherche et au développement de technologies innovantes, l'incubation et l'accélération de projets, ainsi que la gestion de la propriété intellectuelle. De plus, l'accent mis sur l'interdisciplinarité, la collaboration et l'égalité des genres sont également des thématiques présents dans les initiatives précédemment citées."
51,"https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/EDF-2025-LS-RA-SMERO-NT?isExactMatch=true&status=31094501,31094502&order=DESC&pageNumber=6&pageSize=50&sortBy=startDate",Yes,"prototyping, innovative SMEs",Non-thematic research actions by SMEs and research organisations,Open For Submission,18 February 2025,16 October 2025,Oui,"Cette opportunité Seems to align with the company's expertise in supporting innovative startups, researchers, and SMEs in developing cutting-edge technologies, particularly in the defence sector. The call topic's focus on driving innovation, adaptation of civil technologies, and development of European research and technology ecosystems resonates with the company's experience in Deeptech, Hardware, Incubation, Prototyping, and Acceleration (FACTORIAT) and bridging research and entrepreneurship via incubation/acceleration of researchers (TECHNORIAT PPP Program).","Pertinence : Oui\n\nProjets similaires détectés : TECHNORIAT PPP Program, FACTORIAT\n\nRésumé rapide : Cette opportunité Seems to align with the company's expertise in supporting innovative startups, researchers, and SMEs in developing cutting-edge technologies, particularly in the defence sector. The call topic's focus on driving innovation, adaptation of civil technologies, and development of European research and technology ecosystems resonates with the company's experience in Deeptech, Hardware, Incubation, Prototyping, and Acceleration (FACTORIAT) and bridging research and entrepreneurship via incubation/acceleration of researchers (TECHNORIAT PPP Program)."


In [None]:
df_yes_final.shape

(15, 10)

In [None]:
print(df_yes_final.columns.tolist())


['URL', 'Pertinence', 'Matching Word(s)', 'Title', 'Status', 'Start_date', 'Deadline', 'Pertinence LLM', 'Résumé LLM', 'Réponse brute']


In [None]:
df_yes_final.to_excel('df_final_yes.xlsx', index=False, engine='openpyxl')
