In [17]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import dotenv
from pydantic import BaseModel, Field
import string
import pandas as pd
#import pprint3x
from sklearn.metrics import classification_report

dotenv.load_dotenv("/home/chougar/Documents/GitHub/.env")


True

#### Open / close questions classification

In [18]:
model_qa_name="gpt-4o-mini"
llm = ChatOpenAI(model_name=model_qa_name, temperature=0,)

sys_prompt="""
    You are an intelligent assistant tasked with classifying a given question into one of two categories: "open" or "close". Your decision must be based on the nature of the question:

    **Close Questions** are those that ask for specific details or information. They are typically structured to elicit direct, precise answers. These questions include:
    Requests for specific data (e.g., project title, dates, budget figures).
    Questions that list multiple bullet points with clear, targeted queries.
    Inquiries that require factual, objective responses with minimal explanation.

    **Open Questions** require answers that involve a broader context, reasoning, or synthesis. They are designed to gather insights, multiple concepts, or overall narratives. These questions include:
    Project summaries and descriptions
    Overviews of project objectives, context, or background.
    Inquiries that ask for analysis of outcomes, challenges, or the broader impact.
    Questions that invite descriptive or exploratory responses and may need a logical deduction.
    Instructions:

    Read the question carefully.
    Analyze if the question demands detailed, specific facts (classify as "close") or if it requires explanation, synthesis, and reasoning (classify as "open").
    Output a single label: either "open" or "close".
    Examples:

    Project Name question:
    "What is the title of your project? Does the title reflect the project's field, include a geographical area, and is it engaging?"
    → close

    Project Overview question:
    "What is the project about in one concise paragraph? What are the key problems, objectives, and expected results?"
    → open

    Submitting Organization question (detailing name, address, contact, mission, history, and goals):
    → close

    Context and Background question (exploring the current situation, history, previous interventions, and policy impacts):
    → open
                                            
    Your output should strictly be one of these two labels without additional commentary. Follow these instructions precisely to ensure accurate classification.
"""

       # Data model

class ClassifyQuestion(BaseModel):
    """Classfication for open/close questions"""

    type: str = Field(
        description="The question is open or close, output 'close' 'open'"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_classifier = llm.with_structured_output(ClassifyQuestion)

# Prompt
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", sys_prompt),
        ("human", "Classify the question as close or open \n\n User question: {question}"),
    ]
)

question_classifier = grade_prompt | structured_llm_classifier

questions=[
    "describe the project",
    "What is the specific country and city where the project is located ?",
    "What are the start and end dates of the project ?",
    "Who are the target beneficiaries of the project ?",
    "What is the context, environment, rationale, and the challenges associated with the project ?",
    "How many people are impacted by the project ?",
    "What is the total project budget in Euros, including the amount requested from the Foundation, the detailed provisional budget, and the budget for the current year ?",
]

questions=[
    {"question": """Project Name
        • What is the title of your project?
            ◦ Does the title reflect the project's field (e.g., health, education, environment)?
            ◦ Have you included the geographical area in the project name, if relevant?
            ◦ Is the name concise (6-10 words) and consistent with the project's objectives?
            ◦ Does the title highlight the project’s unique approach or innovation?
            ◦ Is the title engaging and likely to attract donor interest?
            ◦ Would a subtitle or tagline help clarify the project’s focus ? """,
    "type": "close"},

    {"question": """Date of Submission
        • When will the project be submitted to the donor?
            ◦ Is the submission date aligned with the donor’s deadlines?
            ◦ Are there any internal deadlines for reviews before submission?
            ◦ Would a timeline graphic of submission milestones be helpful ? """,
    "type": "close"},

    {"question": """Submitting Organization
        • What is the name of the organization submitting the proposal?
            ◦ What is the organization's address, phone number, and website?
            ◦ Who is the primary contact person for this proposal (name, title, contact details)?
            ◦ What is the mission and vision of your organization?
            ◦ Has your organization previously worked with this donor?
            ◦ Would an organizational chart help visualize the structure of your team?
            ◦ Who are the members of your board, and how are they involved in the organization?
            ◦ Can you describe diversity and inclusion practices within your organization? (if relevant)
            ◦ What are your short-term (1-5 years) and long-term (5-25 years) organizational goals?
            ◦ How do you plan to achieve these goals ? """,
    "type": "close"},

    {"question": """Donor Information
        • Who is the donor or funding agency?
            ◦ What are the donor's contact details and submission guidelines?
            ◦ What are the donor’s strategic priorities and focus areas?
            ◦ Has the donor funded similar projects in the past?
            ◦ How does your project align with the funding criteria and policies ? """,
    "type": "close"},

    {"question": """Project Duration
        • What is the proposed start and end date of the project?
            ◦ Are the timelines realistic and achievable?
            ◦ Are there any critical milestones within the project duration?
            ◦ Would a Gantt chart help illustrate the project timeline ? """,
    "type": "close"},

    {"question": """Total Budget Requested
        • What is the total amount of funding requested from the donor?
            ◦ How does the budget align with donor funding limits?
            ◦ Are there any co-funding or in-kind contributions included?
            ◦ Would a budget summary table enhance clarity ? """,
    "type": "close"},

    {"question": """Project Location
        • Where will the project be implemented (specific regions, countries, or communities)?
            ◦ Why is this location significant for the project?
            ◦ Are there logistical or security considerations in this area?
            ◦ Would a map or visual aid help illustrate the project’s geographic focus ? """,
    "type": "close"},

    {"question": """Signatures and Authorization
        • Who has authorized this proposal (signature of the director or responsible officer)?
            ◦ Are all required organizational approvals in place?
            ◦ Is the proposal compliant with internal governance policies?
            ◦ Would a checklist of required signatures streamline the submission process ? """,
    "type": "close"},


    {"question": """Project Overview
        • What is the project about in one concise paragraph?
            ◦ What are the key problems, objectives, and expected results?
            ◦ What makes this project innovative or unique?
            ◦ How does this project align with current global or local trends?
            ◦ Could a summary box or infographic provide a quick overview for readers ? """,
    "type": "open"},

    {"question": """Target Beneficiaries
        • Who are the primary beneficiaries of the project?
            ◦ How many people will directly and indirectly benefit?
            ◦ What are the demographics of the beneficiaries (age, gender, socio-economic status)?
            ◦ How were the beneficiaries selected?
            ◦ Would a demographic breakdown table clarify the scope of impact?
            ◦ How do you consult with and involve the community in your programs?
            ◦ What methods do you use to recruit and retain participants?
            ◦ How is participant feedback incorporated into your program development and continuation ? """,
    "type": "close"},

    {"question": """Implementation Methodology
        • How will the project be implemented (briefly describe the approach)?
            ◦ What methodologies or frameworks will guide the implementation?
            ◦ Are there any innovative techniques or technologies being used?
            ◦ Would a flowchart or diagram help explain the implementation process?
            ◦ How does your team's experience and expertise support the project's goals?
            ◦ How will additional funding affect the project's scope or outcomes ? """,
    "type": "close"},

    {"question": """Budget Summary
        • What is the overall budget, and how will the funds be allocated across major categories?
            ◦ Are the budget allocations aligned with project priorities?
            ◦ How does the budget demonstrate cost-effectiveness?
            ◦ Would a pie chart or budget allocation table enhance understanding ? """,
    "type": "close"},

    {"question": """Project Outcomes
        • What are the main outcomes expected from this project?
            ◦ How will these outcomes contribute to long-term change?
            ◦ Are the outcomes sustainable beyond the project timeline?
            ◦ Would a logic model or outcomes framework provide clarity?
            ◦ What is your exit strategy when the donor funding ends ? """,
    "type": "open"},


    {"question": """Context and Background
        • What is the current situation in the target area related to the project focus?
            ◦ Are there any historical, social, or economic factors influencing the problem?
            ◦ What previous interventions have been attempted in this area?
            ◦ How do current policies affect the situation?
            ◦ Would a timeline of past interventions help contextualize the problem?
            ◦ How does your project fit within the context of other organizations, government strategies, and existing programs ? """,
    "type": "open"},

    {"question": """Existing Challenges
        • What challenges are faced by the community or region regarding this issue?
            ◦ How have these challenges evolved over time?
            ◦ Are there any emerging threats or opportunities?
            ◦ What barriers exist to solving this problem ? """,
    "type": "open"},

    {"question": """Relevant Stakeholders
        • Who are the key stakeholders involved in the issue?
            ◦ What are their roles and influence in the current situation?
            ◦ How have stakeholders been engaged in the project design?
            ◦ Are there any potential conflicts of interest among stakeholders?
            ◦ Would a stakeholder map or matrix enhance understanding of their roles ? """,
    "type": "close"},


    {"question": """Selecting the Target Area
        • Why was this specific area selected for the project?
            ◦ What criteria were used to determine the area's eligibility?
            ◦ Are there any geographical, social, or economic factors that make this area a priority?
            ◦ Has this area previously received similar interventions? If so, what were the outcomes?
            ◦ Would a geographic information system (GIS) map illustrate area selection criteria ? """,
    "type": "close"},

    {"question": """Identifying the Main Problem
        • What is the primary problem the project seeks to address?
            ◦ What are the root causes of this problem?
            ◦ How does this problem affect different groups within the community (e.g., women, children, marginalized groups)?
            ◦ What are the short-term and long-term consequences if the problem is not addressed?
            ◦ Would a problem tree diagram help visualize the causes and effects ? """,
    "type": "open"},

    {"question": """Data Collection
        • Were any primary data collection methods used to identify the problem? (If applicable)
            ◦ Were surveys, interviews, or focus groups conducted with beneficiaries or stakeholders?
            ◦ How were these methods designed to ensure accuracy and reliability?
            ◦ Who conducted the data collection, and what was their expertise?
            ◦ How was community participation ensured in the data collection process?
            ◦ Were ethical considerations, such as informed consent, addressed?
            ◦ Could data be presented in charts or tables for clearer analysis?
        • What secondary data sources were reviewed to support the needs assessment?
            ◦ Were government reports, academic studies, or statistical databases consulted?
            ◦ How recent and reliable are the secondary data sources?
            ◦ Were previous project evaluations or reports used to inform this analysis?
            ◦ How do the secondary data findings align with the primary data collected?
            ◦ Are there gaps in the secondary data that the project seeks to address?
        • How were primary and secondary data combined to strengthen the needs assessment? (If both were used)
            ◦ Were there any discrepancies between the data sources, and how were they resolved?
            ◦ How does the combined data support the justification for the project ? """,
    "type": "close"},

    {"question": """Analyzing the Problem
        • How was the problem analyzed?
            ◦ What are the effects and consequences of this problem on the community?
            ◦ Are there external factors (political, environmental, economic) contributing to the problem?
            ◦ What are the existing coping mechanisms within the community?
            ◦ Was a Problem Tree Diagram or similar visual tool used to map out the causes and consequences of the issue? """,
    "type": "open"},

    {"question": """Formulating a Needs Assessment Paragraph
        • How will the needs assessment be summarized in the proposal?
            ◦ What evidence will be highlighted to justify the project's need?
            ◦ How does the needs assessment align with donor priorities?
            ◦ Are there visual aids (charts, graphs) that can support the needs assessment ? """,
    "type": "open"},

    {"question": """Why Support is Needed
        • Why is external funding required for this project?
            ◦ What makes this project a priority for your organization?
            ◦ Are there gaps in existing services that this project will address?
            ◦ How will the project add value compared to existing initiatives ? """,
    "type": "open"},

    {"question": """Alignment with Donor Priorities
        • How does the project align with the donor's priorities and goals?
            ◦ Are there specific donor strategies or initiatives this project supports?
            ◦ Has the project been tailored to meet donor expectations?
            ◦ Are there examples of similar projects funded by this donor ? """,
    "type": "open"},

    {"question": """Urgency and Relevance
        • Why is this project urgent?
            ◦ What will happen if the project is not funded?
            ◦ How does the timing of the project align with current community needs or global issues ? """,
    "type": "open"},

    {"question": """Project Area Description
        • What are the geographical, social, and economic characteristics of the project area?
            ◦ Are there any logistical challenges in the project area?
            ◦ What are the key resources available in the area to support the project ? """,
    "type": "close"},

    {"question": """Organizational Capacity
        • What is your organization's capacity to implement the project?
            ◦ What past projects or experiences demonstrate your capability?
            ◦ Does your organization have the technical expertise required for this project?
            ◦ Are there existing partnerships or networks that will support project implementation?
            ◦ Could a timeline chart of past successful projects strengthen this section?
            ◦ What internal systems and processes ensure effective project management ? """,
    "type": "open"},        

    {"question": """Partnerships and Collaborations
        • Are there any partners involved in project implementation?
            ◦ What are their roles and responsibilities?
            ◦ How will coordination among partners be managed?
            ◦ Are there formal agreements (MOUs) in place with partners?
            ◦ Would an organizational chart of partners clarify roles and responsibilities ? """,
    "type": "close"},

    {"question": """Project Objectives
    1 General Objective
        • What is the overarching goal of the project?
            ◦ How does this goal address the identified problem?
            ◦ Is the goal aligned with the strategic priorities of your organization?
            ◦ Could a flowchart illustrating the link between goals and outcomes be beneficial?
    2 Specific Objectives
        • What are the specific, measurable, achievable, relevant, and time-bound (SMART) objectives?
            ◦ How will each objective contribute to achieving the overall goal?
            ◦ Are the objectives realistic given the project's timeframe and resources?
            ◦ Would a Gantt chart help visualize the timeline for achieving these objectives?""",
    "type": "open"},

    {"question": """Outcomes
        • What are the intended outcomes of the project?
            ◦ How will these outcomes create sustainable change?
            ◦ Are the outcomes aligned with the needs of the target beneficiaries?
            ◦ Would a results chain diagram help visualize the link between activities, outputs, and outcomes ? """,
    "type": "open"},

    {"question": """Outputs
        • What tangible products, services, or results will the project deliver?
            ◦ How will the outputs be measured?
            ◦ Are the outputs achievable within the project's timeframe?
            ◦ Could a table summarizing outputs and corresponding indicators provide clarity ? """,
    "type": "close"},

    {"question": """Indicators
        • What indicators will be used to measure progress and success?
            ◦ Are the indicators quantitative or qualitative?
            ◦ How will baseline data be established for these indicators?
            ◦ Would a matrix of indicators, data sources, and frequency of collection be useful ? """,
    "type": "close"},

    {"question": """Verification Methods
        • How will data be collected and verified for each indicator?
            ◦ What tools or methodologies will be used for data verification?
            ◦ Who will be responsible for monitoring and verifying data?
            ◦ Could a flowchart or checklist improve understanding of the verification process ? """,
    "type":"close"},

    {"question": """Risks and Mitigation
        • What are the potential risks that could impact the project?
            ◦ What strategies will be used to mitigate these risks?
            ◦ Are there contingency plans in place for high-risk scenarios?
            ◦ Would a risk matrix help categorize risks by likelihood and impact ? """,
    "type": "open"},

    {"question": """Activities
        • What specific activities will be carried out to achieve the objectives?
            ◦ What is the timeline and sequence for these activities?
            ◦ Who will be responsible for each activity?
            ◦ Are there dependencies between activities that need to be managed?
            ◦ Would a Gantt chart help visualize the schedule of activities ? """,
    "type": "close"},

    {"question": """Work Plan
    Activity Schedule
        • What is the detailed schedule for each activity?
            ◦ Are there any seasonal or external factors that might affect the schedule?
            ◦ How will delays be managed if they occur?
            ◦ Could a table summarizing activities, timelines, and responsible parties improve clarity ? """,
    "type": "close"},

    {"question": """Responsible Parties
        • Who is responsible for each activity?
            ◦ What are the qualifications of the team members involved?
            ◦ How will roles and responsibilities be communicated within the team?
            ◦ Would an organizational chart or team structure diagram be helpful ? """,
    "type": "close"},

    {"question": """Needs Assessment
    Identifying the Main Problem
        • What is the primary problem the project seeks to address?
            ◦ What are the root causes of this problem?
            ◦ How does this problem affect different groups within the community?
            ◦ Would a problem tree diagram help visualize the causes and effects of the issue ? """,
    "type": "open"},

    {"question": """Analyzing the Problem
        • How was the problem analyzed?
            ◦ What are the effects and consequences of this problem on the community?
            ◦ Are there external factors contributing to the problem?
            ◦ Could a table summarizing the identified problems, causes, and effects be beneficial ? """,
    "type": "open"},

    {"question": """Monitoring, Evaluation, and Learning (MEL)
    Measurement and Evaluation Practices
        • What measurement and evaluation practices do you have in place?
            ◦ How do you ensure that your programs are effectively achieving their intended outcomes?
            ◦ What processes do you have in place for adapting programs based on evaluation findings or changing needs?
            ◦ How does your program define success?
            ◦ Would an MEL framework diagram enhance understanding ? """,
    "type": "close"},

    {"question": """Sustainability Plan
    Sustainability Beyond Funding
        • How will the project continue after the donor funding ends?
            ◦ Are there plans for local ownership, capacity building, or institutional integration?
            ◦ What mechanisms will ensure long-term benefits for beneficiaries?
            ◦ Could a sustainability plan matrix outline strategies for maintaining outcomes ? """,
    "type": "open"},

    {"question": """Financial Sustainability
        • How will the project achieve financial sustainability post-funding?
            ◦ Are there diversified funding streams planned for the future?
            ◦ How will the organization manage potential funding gaps ? """,
    "type": "close"},

    {"question": """Cross-Cutting Themes
    Gender Equality
        • How does the project promote gender equality?
            ◦ Are gender-specific barriers and opportunities identified in the project design?
            ◦ How is gender-disaggregated data collected and used ? """,
    "type": "open"},

    {"question": """Environmental Sustainability
        • How does the project address environmental sustainability?
            ◦ Are there measures to minimize environmental impact?
            ◦ Does the project promote sustainable practices in the community ? """,
    "type": "open"},

    {"question": """Human Rights and Inclusion
        • How does the project incorporate human rights principles?
            ◦ Are marginalized groups actively involved in the project design and implementation?
            ◦ What strategies ensure inclusivity and non-discrimination ? """,
    "type": "open"},

    {"question": """Budget
    Budget Summary
        • What is the total project budget?
            ◦ How is it broken down across major categories (personnel, materials, etc.)?
            ◦ Are there any high-cost items, and how are they justified?
            ◦ Would a budget table clearly outline costs across categories ? """,
    "type": "close"},

    {"question": """Co-Funding and Contributions
        • Are there other sources of funding or in-kind contributions?
            ◦ How will these additional funds be managed and reported?
            ◦ Are there commitments from local partners or governments?
            ◦ Could a co-funding matrix provide clarity on contributions from different sources ? """,
    "type": "close"},

    {"question": """Budget Justifications
        • What is the rationale for each budget line item?
            ◦ How does the budget reflect cost-efficiency and value for money?
            ◦ Are contingency funds included for unforeseen expenses?
            ◦ Would a justification table help link budget items to project activities and objectives?""",
    "type": "close"},

    {"question": """Financial Controls and Processes
        • What financial controls and processes are in place to ensure proper fund management?
            ◦ How does your organization define and ensure financial solvency?
            ◦ What processes do you have in place for financial audits and reporting?
            ◦ How do you track and manage budget line items ? """,
    "type": "close"},

    {"question": """Cross-Cutting Themes
        • Does the project address cross-cutting themes such as gender equality, environmental sustainability, or human rights?
            ◦ How are these themes integrated into the project design and implementation?
            ◦ Could a matrix showing the integration of cross-cutting themes improve clarity ? """,
    "type": "open"},    
]

replies=[]


In [None]:
i=1
for el in questions:
    resp= question_classifier.invoke({"question": el["question"]})
    print(f"Question {i}/{len(questions)}:\n",el["question"])
    print("\n--------\n")
    print("Predicted class:", resp.type)
    print("\n--------\n")
    print("True class:", el["type"])    
    print("\n--------\n")
    replies.append({"question": el["question"], "pred_class": resp.type, "true_class": el["type"]})
    i+=1
    print("\n=============================\n")

In [23]:
import pandas as pd
import pprint3x
from sklearn.metrics import classification_report

df_replies=pd.DataFrame(replies)
df_replies.to_csv("./classification_open_close_questions.csv", index=False)
class_report=classification_report(df_replies["true_class"], df_replies["pred_class"])
pprint3x.pprint(class_report)

('              precision    recall  f1-score   support\n'
 '\n'
 '       close       0.76      0.93      0.84        28\n'
 '        open       0.87      0.62      0.72        21\n'
 '\n'
 '    accuracy                           0.80        49\n'
 '   macro avg       0.82      0.77      0.78        49\n'
 'weighted avg       0.81      0.80      0.79        49\n')


#### Association / project classification

In [2]:
model_qa_name="gpt-4o-mini"
llm = ChatOpenAI(model_name=model_qa_name, temperature=0,)
"v1"
sys_prompt_v1="""
    You are an expert in text classification. Your task is to determine whether a given question belongs to the category of "Association Identification" in funding application forms.  

    **Definition of "Association Identification"**  
    A question belongs to this category if it is about **identifying the applying organization**. This includes:  
    - Official name, registration details, and legal status  
    - Address, contact details, and website  
    - Financials (annual budget, income, funding sources)  
    - Staff, volunteers, and organizational structure  
    - Membership in networks, federations, or partnerships  
    - Experience, expertise, and primary focus of activities  
    - Capacity to implement projects (general, not project-specific)  

    **It does NOT belong to this category if it asks about:**  
    - The specific project, its objectives, or expected impact  
    - Project implementation details (timelines, risks, locations)  
    - Target beneficiaries or stakeholder engagement  
    - Donor relations or project-specific past experiences  

    For each input question, respond with either:  
    - `"yes"` if the question belongs to "Association Identification"  
    - `"no"` if it does not  

    **Examples:**  
    * Positive Examples (Association Identification)

    "Input": "Provide the legal name of your organization.",
    "Output": "yes"


    "Input": "What is your organization’s full name, as registered?",
    "Output": "yes"


    "Input": "Does your organization belong to a professional network or federation?",
    "Output": "yes"


    "Input": "What type of legal entity is your organization?",
    "Output": "yes"


    "Input": "How many staff members are employed full-time by your organization?",
    "Output": "yes"


    "Input": "What are the primary areas of expertise of your organization?",
    "Output": "yes"


    "Input": "Has your organization previously managed projects of a similar scale?",
    "Output": "yes"


    "Input": "What is the official email address of your organization?",
    "Output": "yes"


    *  Negative Examples (Not Association Identification)

    "Input": "Describe the main objectives of your project.",
    "Output": "no"


    "Input": "What are the key activities planned for this project?",
    "Output": "no"


    "Input": "Explain how your project aligns with donor priorities.",
    "Output": "no"


    "Input": "What is the expected number of beneficiaries?",
    "Output": "no"


    "Input": "What are the risks associated with this project?",
    "Output": "no"


    "Input": "Provide details of your project’s expected impact.",
    "Output": "no"


    "Input": "Describe previous successful projects your organization has implemented.",
    "Output": "no"


    "Input": "Does your organization have prior experience working in this specific region?",
    "Output": "no"


    "Input": "Has your organization received funding from this donor before?",
    "Output": "no"


    Your output should strictly be one of these two labels without additional commentary. Follow these instructions precisely to ensure accurate classification.

"""


"v2"
sys_prompt_v2="""
    Tu es un classificateur de questions pour un formulaire de demande de financement.  
    Ta tâche est de déterminer si une question concerne **l’identification de l’association** ou non.  

    #### **Instructions :**  
    - Réponds **"yes"** si la question concerne l’organisation elle-même (nom, statut, contact, structure, budget, mission, historique, effectifs, partenaires).  
    - Réponds **"no"** si la question concerne le projet proposé (objectifs, bénéficiaires, financement, méthodologie, impact, contexte).  
    - Si la question est **ambiguë ou incomplète**, réponds **"uncertain"** au lieu de "yes" ou "no".  

    #### **Exemples positifs ("yes") :**  
    - "Quel est le nom de votre organisation ?" → "yes"  
    - "Adresse du siège social ?" → "yes"  
    - "Quel est le statut légal de votre organisation ?" → "yes"  
    - "Quel est le budget annuel de l’association ?" → "yes"  
    - "Quelle est la mission de votre organisation ?" → "yes"  
    - "Nombre d’employés et de bénévoles ?" → "yes"  
    - "Votre organisation a-t-elle de l’expérience sur ce type de projets ?" → "yes"  

    #### **Exemples négatifs ("no") :**  
    - "Quel est le titre de votre projet ?" → "no"  
    - "Quels sont les objectifs du projet ?" → "no"  
    - "Quel est le montant total du financement demandé ?" → "no"  
    - "Qui sont les bénéficiaires du projet ?" → "no"  
    - "Décrivez la méthodologie de mise en œuvre." → "no"  
    - "Quels sont les principaux résultats attendus ?" → "no"  

    #### **Exemples incertains ("uncertain") :**  
    - "Présentez votre organisation et son implication dans ce projet." → "uncertain"  
    - "Quels sont les principaux partenaires de votre projet ?" → "uncertain"  
    - "Quel est le contexte global de votre organisation ?" → "uncertain"  

    Réponds uniquement par **"yes"**, **"no"** ou **"uncertain"**, sans autre texte.

"""


"v3"
sys_prompt_v3="""
    Tu es un expert en classification de questions dans le cadre de formulaires de demande de financement. Ta mission est de déterminer si une question concerne l’identification de l’association ou non.

    DEFINITION :
    - **Association Identification ("yes")** : la question porte sur l’organisation elle-même et ses informations de base, telles que :
    - Informations générales : nom, adresse, téléphone, site web, année de création, zones d’intervention.
    - Statut légal et forme juridique (ex. : NGO, association, fondation).
    - Données de contact et composition du leadership (personne de contact, membres du conseil, organigramme).
    - Informations sur la mission, la vision, les valeurs et les objectifs organisationnels.
    - Expérience et historique (ex. : expérience sur des projets similaires, références, résultats).
    - Ressources humaines (nombre d’employés, bénévoles).
    - Capacités administratives et financières de l’organisation (budget annuel, partenaires financiers).

    - **Non-Association Identification ("no")** : la question porte spécifiquement sur le projet ou la proposition de financement, par exemple :
    - Le titre, les objectifs, les bénéficiaires ou le financement demandé pour le projet.
    - Les méthodes de mise en œuvre, la durée, les résultats attendus, l’évaluation ou l’impact du projet.
    - Toute autre information ne concernant pas directement l’identification ou la caractérisation de l’organisation.

    - **Ambigu ("uncertain")** : la question mélange des éléments d’identification de l’organisation avec des aspects projet ou présente une formulation ambiguë, rendant la classification incertaine.  
    Par exemple :  
    - "Présentez votre organisation et son implication dans ce projet."  
    - "Quels sont les principaux partenaires de votre projet ?"  
    Dans ces cas, réponds "uncertain".

    CONSIGNES :
    - Pour chaque question, réponds uniquement par "yes", "no" ou "uncertain", sans explications supplémentaires.
    - Tiens compte des exemples ci-dessous pour guider ta décision.

    EXEMPLES EXTRAITS DU DOCUMENT ET D’AUTRES SOURCES :

    Exemples positifs ("yes") :
    - "What is the name of the organization submitting the proposal?" → "yes"
    - "What is the organization's address?" → "yes"
    - "What is the organization's phone number?" → "yes"
    - "What is the organization's website?" → "yes"
    - "What year was your organization established?" → "yes"
    - "In which countries or regions does your organization operate?" → "yes"
    - "What is the legal status of your organization (e.g. NGO, association, foundation)?" → "yes"
    - "Who is the primary contact person for this proposal (name, title, contact details)?" → "yes"
    - "What is the mission and vision of your organization?" → "yes"
    - "How many full-time and part-time staff does your organization have?" → "yes"
    - "What is your organization’s annual budget?" → "yes"

    Exemples négatifs ("no") :
    - "What is the title of your project?" → "no"
    - "What are the objectives of the project?" → "no"
    - "What is the total amount of funding requested from the donor?" → "no"
    - "Who are the beneficiaries of the project?" → "no"
    - "Describe the methodology for project implementation." → "no"
    - "What are the expected outcomes of this project?" → "no"

    Exemples ambigus ("uncertain") :
    - "Present your organization and its involvement in this project." → "uncertain"
    - "What are the key partners of your project?" → "uncertain"
    - "Describe your organization’s history and recent projects." → "uncertain" 
    (mélange d’information sur l’organisation et le projet)

    
    Pour la question suivante, indique ta classification ("yes", "no" ou "uncertain") :
    

"""

"v4"
sys_prompt_v4="""
    You are a classification model designed to analyze whether a question concerns the identification of an association (also called 'lead organisation') in a funding application form. 
    A question belongs in the “yes” category if it seeks general information about the organization, such as its name, history, mission, partners or human and financial resources. 
    A question falls into the “no” category if it concerns other aspects of the project or funding application that are not directly related to the organization's identity.
    If the question is ambiguous and could reasonably fall into both categories, it should be classified as “uncertain”.

    Instructions
    If the question asks for information about the organization (name, history, mission, capabilities, members, partners, etc.), it should be classified as “uncertain”.

    If the question does not concern the organization itself, but rather the project, the project context or detailed financial aspects, it is classified as “no”.

    If the question is ambiguous and may concern both the identity of the association and another area, it is classified as “uncertain”.

    Commented examples
    Category "yes" (question relating to the association's identity)
    "Lead organisation’s primary focus ?" → yes

    "Lead organisation’s experience and expertise" → yes

    "Project leader’s full name and email address" → yes

    "The organization and its ecosystem (member of networks, affiliations, etc.)" → yes

    "Description of the organisation’s mission and vision" → yes

    "Does your organisation have a track record of managing projects of equivalent scale?" → yes

    "Does your organisation have a track record of engaging in the area of work proposed?" → yes

    "Does your organisation have the capacity to implement the proposed intervention?" → yes

    "Historical background of the applicant" → yes

    "Organization of the applicant" → yes

    "Context and Background" → yes

    "Relevant Stakeholders" → yes

    Category "uncertain" (question ambiguous beyween the identity of the association and other aspects)
    "Geographical location (regional? national?)" → uncertain

    "Annual budget in euros (last financial year)?" → uncertain

    "Number of volunteers?" → uncertain

    "Number of members?" → uncertain

    "Main partners (institutional, operational, financial)?" → uncertain

    "Description of the organization and its staff" → uncertain

    "Partners" → uncertain

    Category "no" (question not relating to the association's identity)
    "What are the expected outcomes of the project?" → no

    "What is the total budget requested for this project?" → no

    "What are the risks associated with the project?" → no

    "What monitoring and evaluation mechanisms will be used?" → no

    Expected output format
    Answer only yes, no or uncertain, without further explanation.
"""

class ClassifyQuestion(BaseModel):
    """Classfication for yes/no questions"""

    type: str = Field(
        description="The question is related to association identification or not, output 'yes', 'no' or 'uncertain'"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.5)
structured_llm_classifier = llm.with_structured_output(ClassifyQuestion)

# Prompt
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", sys_prompt_v4),
        ("human", "Classify the question as yes or no \n\n User question: {question}"),
    ]
)

question_classifier = grade_prompt | structured_llm_classifier


In [3]:

questions = [
    {"Input": "Lead organisation’s full name and website ?", "Output": "yes"},
    {"Input": "Lead organisation’s registered address ?", "Output": "yes"},
    {"Input": "Lead organisation’s annual income  ?", "Output": "yes"},
    {"Input": "Lead organisation’s legal status ?", "Output": "yes"},
    {"Input": "Lead organisation’s primary focus ?", "Output": "yes"},
    {"Input": "Lead organisation’s experience and expertise", "Output": "yes"},
    {"Input": "Lead organisation’s number of staff as full-time equivalent (FTE)", "Output": "yes"},
    {"Input": "Lead organisation’s number and roles of volunteers ", "Output": "yes"},
    {"Input": "Lead applicant’s full name and email address", "Output": "yes"},
    {"Input": "Project leader’s full name and email address", "Output": "yes"},
    {"Input": "Name of organization?", "Output": "yes"},
    {"Input": "Legal status (association, charitable foundation, other foundation, endowment fund, local authority, NGO)", "Output": "yes"},
    {"Input": "How long has the organization existed? Date founded?", "Output": "yes"},
    {"Input": "Head office address?", "Output": "yes"},
    {"Input": "Geographical location (regional? national?) ", "Output": "uncertain"},
    {"Input": "Annual budget in euros (last financial year)?", "Output": "uncertain"},
    {"Input": "Number of full-time equivalent employees?", "Output": "yes"},
    {"Input": "Number of volunteers?", "Output": "uncertain"},
    {"Input": "Number of members?", "Output": "uncertain"},
    {"Input": "The organization and its ecosystem (member of a network, federation, group)", "Output": "yes"},
    {"Input": "Main partners (institutional, operational, financial)", "Output": "uncertain"},
    {"Input": "Website ?", "Output": "yes"},
    {"Input": "Description of the organization and its staff", "Output": "yes"},
    {"Input": "Organization Type (public/private)", "Output": "yes"},
    {"Input": "Focal Person Name / Position", "Output": "yes"},
    {"Input": "Organisation name ", "Output": "yes"},
    {"Input": "Nature of organisation (e.g. INGO, Federation) ", "Output": "yes"},
    {"Input": "Year of creation", "Output": "yes"},
    {"Input": "Registration number ", "Output": "yes"},
    {"Input": "Place of registration ", "Output": "yes"},
    {"Input": "Registered Address, including country ", "Output": "yes"},
    {"Input": "Organisation telephone number", "Output": "yes"},
    {"Input": "Organisation website ", "Output": "yes"},
    {"Input": "Number of employees (full-time equivalent in most recent year) ", "Output": "yes"},
    {"Input": "Description of the organisation’s mission and activities ", "Output": "yes"},
    {"Input": "Does your organisation have a track record of managing projects of equivalent scale?", "Output": "yes"},
    {"Input": "Does your organisation have a track record of engaging in the area of work proposed?", "Output": "yes"},
    {"Input": "Does your organisation have the capacity to implement the proposed intervention?", "Output": "yes"},
    {"Input": "Provide a brief history of your organization", "Output": "yes"},
    {"Input": "Describe past projects undertaken by your organization", "Output": "yes"},
    {"Input": "What is the title of your project?", "Output": "no"},
    {"Input": "Date of Submission", "Output": "no"},
    {"Input": "Project Duration, What is the proposed start and end date of the project?", "Output": "no"},
    {"Input": "What is the total amount of funding requested from the donor?", "Output": "no"},
    {"Input": "Where will the project be implemented (specific regions, countries, or communities)?", "Output": "no"},
    {"Input": "Who are the primary beneficiaries of the project?", "Output": "no"},
    {"Input": "Implementation Methodology", "Output": "no"},
    {"Input": "What are the main outcomes expected from this project?", "Output": "no"},
    {"Input": "Context and Background", "Output": "uncertain"},
    {"Input": "Existing Challenges", "Output": "no"},
    {"Input": "Relevant Stakeholders", "Output": "no"},
    {"Input": "Project objective (Explain which GBF goal project the applies to)", "Output": "no"},
    {"Input": "Project implementation plan", "Output": "no"},
    {"Input": "Expected concrete activity results", "Output": "no"},
    {"Input": "Activity schedule", "Output": "no"},
    {"Input": "Income and Expenditure Budget plan", "Output": "uncertain"},
    {"Input": "The name and contact information of the experts outside of applicant’s who will provive advice and guidance for the project implementation .", "Output": "uncertain"},
    {"Input": "Local approvals", "Output": "no"},    
    {"Input": "Partners", "Output": "uncertain"},
    {"Input": "Historical background of the applicant", "Output": "yes"},    
    {"Input": "Organization of the applicant", "Output": "yes"},    

]


replies=[]
i=1
for el in questions:
    print(f"Question {i}/{len(questions)}:\n",el["Input"])
    resp= question_classifier.invoke({"question": el["Input"]})
    
    print("\n--------\n")
    print("Predicted class:", resp.type)
    print("\n--------\n")
    print("True class:", el["Output"])    
    print("\n--------\n")
    replies.append({"question": el["Input"], "pred_class": resp.type, "true_class": el["Output"]})
    i+=1
    print("\n=============================\n")

Question 1/61:
 Lead organisation’s full name and website ?

--------

Predicted class: yes

--------

True class: yes

--------



Question 2/61:
 Lead organisation’s registered address ?

--------

Predicted class: yes

--------

True class: yes

--------



Question 3/61:
 Lead organisation’s annual income  ?

--------

Predicted class: uncertain

--------

True class: yes

--------



Question 4/61:
 Lead organisation’s legal status ?

--------

Predicted class: yes

--------

True class: yes

--------



Question 5/61:
 Lead organisation’s primary focus ?

--------

Predicted class: yes

--------

True class: yes

--------



Question 6/61:
 Lead organisation’s experience and expertise

--------

Predicted class: yes

--------

True class: yes

--------



Question 7/61:
 Lead organisation’s number of staff as full-time equivalent (FTE)

--------

Predicted class: uncertain

--------

True class: yes

--------



Question 8/61:
 Lead organisation’s number and roles of volunteers 


In [None]:
import pandas as pd
import pprint3x
from sklearn.metrics import classification_report

df_replies=pd.DataFrame(replies)
df_replies.to_csv("./classification_asso_pp_questions.csv", index=False)
class_report=classification_report(df_replies["true_class"], df_replies["pred_class"])
print(class_report)

              precision    recall  f1-score   support

          no       0.61      1.00      0.76        19
         yes       1.00      0.71      0.83        42

    accuracy                           0.80        61
   macro avg       0.81      0.86      0.80        61
weighted avg       0.88      0.80      0.81        61



In [None]:
"v2"
df_replies=pd.DataFrame(replies)
df_replies.to_csv("./classification_asso_pp_questions.csv", index=False)
class_report=classification_report(df_replies["true_class"], df_replies["pred_class"])
print(class_report)

              precision    recall  f1-score   support

          no       0.88      0.74      0.80        19
   uncertain       0.00      0.00      0.00         0
         yes       0.97      0.83      0.90        42

    accuracy                           0.80        61
   macro avg       0.62      0.52      0.57        61
weighted avg       0.94      0.80      0.87        61



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
"v3"
df_replies=pd.DataFrame(replies)
df_replies.to_csv("./classification_asso_pp_questions.csv", index=False)
class_report=classification_report(y_true= df_replies["true_class"], y_pred= df_replies["pred_class"])
print(class_report)

              precision    recall  f1-score   support

          no       0.88      0.93      0.90        15
   uncertain       0.33      0.67      0.44         9
         yes       0.93      0.68      0.78        37

    accuracy                           0.74        61
   macro avg       0.71      0.76      0.71        61
weighted avg       0.83      0.74      0.76        61



In [None]:
"v4"
df_replies=pd.DataFrame(replies)
df_replies.to_csv("./classification_asso_pp_questions.csv", index=False)
class_report=classification_report(y_true= df_replies["true_class"], y_pred= df_replies["pred_class"])
print(class_report)

              precision    recall  f1-score   support

          no       0.88      0.93      0.90        15
   uncertain       0.32      0.67      0.43         9
         yes       0.92      0.65      0.76        37

    accuracy                           0.72        61
   macro avg       0.70      0.75      0.70        61
weighted avg       0.82      0.72      0.75        61



In [27]:
import pandas as pd

df=pd.read_csv("./questions_traintest-modernbert.csv")

testset=df[df["set"]=="test"]
print(len(testset))

for el in testset.head(10).iterrows():
    print("text:", el[1]["text"])
    print("label:", el[1]["label"])
    print("\n==================\n")

64
text: Phone number:
label: 0


text: I have provided actual start and end dates for the project.
label: 0


text: Specific context for the project presented to A Tree for You: 10.1. Target groups and final beneficiaries: 10.1.2. Female/male distribution
label: 0


text: 13.4. How were the list of species and the planting plans established? (Experts or the planter's own experiences, local/international experts, local nurserymen, traditional practices in the region, bibliographic sources, other structures practicing the same types of planting: NGOs, States, international institutions, etc.)
label: 1


text: 8. PROJECT SCHEDULE AND TIMELINE
label: 0


text: Q2.7 Project secondary country/ies
Which eligible secondary country/ies will your project work in? By secondary country/ies we refer to the country/ies which have some activities but are not the main project focus. NOTE: if you are working in a secondary country(ies), it is highly recommended that you have a partner organisation in 

In [21]:
from gradio_client import Client
import time

client = Client("gylrt/question_classification")

def question_classifier_open_close(question: str) -> str:

    result = client.predict(
        question=question,
        api_name="/predict"
    )

    return result

err=1
while err:
    try:
        print(question_classifier_open_close("PROJECT SCHEDULE AND TIMELINE"))
        err=0
    except Exception as e:
        print(e)
        time.sleep(1)
        err=1

Loaded as API: https://gylrt-question-classification.hf.space ✔
close


In [28]:
test_evals=[]

i=1
for el in testset.iterrows():
    print("Step ", i)
    # llm call
    t=time.time()
    resp_llm= question_classifier.invoke({"question": el[1]["text"]})
    tf1=time.time()-t
    
    
    # bert
    t=time.time()
    resp_bert= question_classifier_open_close(el[1]["text"])
    tf2=time.time()-t

    test_evals.append(
        {
            "question": el[1]["text"],
            "true_class": el[1]["label"],
            "pred_llm": resp_llm.type,
            "pred_bert": resp_bert,
            "exec_time_llm": tf1,
            "exec_time_bert": tf2,
        }
    )    

    i+=1

Step  1
Step  2
Step  3
Step  4
Step  5
Step  6
Step  7
Step  8
Step  9
Step  10
Step  11
Step  12
Step  13
Step  14
Step  15
Step  16
Step  17
Step  18
Step  19
Step  20
Step  21
Step  22
Step  23
Step  24
Step  25
Step  26
Step  27
Step  28
Step  29
Step  30
Step  31
Step  32
Step  33
Step  34
Step  35
Step  36
Step  37
Step  38
Step  39
Step  40
Step  41
Step  42
Step  43
Step  44
Step  45
Step  46
Step  47
Step  48
Step  49
Step  50
Step  51
Step  52
Step  53
Step  54
Step  55
Step  56
Step  57
Step  58
Step  59
Step  60
Step  61
Step  62
Step  63
Step  64


In [52]:
df_test_evals=pd.DataFrame(test_evals)
df_test_evals.loc[:, "true_class"]=df_test_evals["true_class"].replace({0: 'close', 1: 'open'})


display("CM LLM:", pd.crosstab(df_test_evals["true_class"], df_test_evals["pred_llm"]))

display("CM Bert:", pd.crosstab(df_test_evals["true_class"], df_test_evals["pred_bert"]), "================")


print("CR LLM;\n", classification_report(df_test_evals["true_class"], df_test_evals["pred_llm"]), "\n================\n")

print("CR Bert;\n", classification_report(df_test_evals["true_class"], df_test_evals["pred_bert"]))

 'open' 'open' 'open' 'open' 'close' 'open' 'close' 'close' 'close'
 'close' 'open' 'open' 'open' 'close' 'close' 'open' 'close' 'open' 'open'
 'close' 'close' 'open' 'close' 'close' 'open' 'open' 'open' 'open' 'open'
 'open' 'open' 'close' 'open' 'close' 'close' 'open' 'open' 'close'
 'close' 'open' 'open' 'close' 'close' 'close' 'close' 'close' 'open'
 'open' 'open' 'close' 'open' 'open' 'close' 'open' 'close']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_test_evals.loc[:, "true_class"]=df_test_evals["true_class"].replace({0: 'close', 1: 'open'})


'CM LLM:'

pred_llm,close,open
true_class,Unnamed: 1_level_1,Unnamed: 2_level_1
close,30,1
open,20,13


'CM Bert:'

pred_bert,close,open
true_class,Unnamed: 1_level_1,Unnamed: 2_level_1
close,28,3
open,2,31




CR LLM;
               precision    recall  f1-score   support

       close       0.60      0.97      0.74        31
        open       0.93      0.39      0.55        33

    accuracy                           0.67        64
   macro avg       0.76      0.68      0.65        64
weighted avg       0.77      0.67      0.64        64
 

CR Bert;
               precision    recall  f1-score   support

       close       0.93      0.90      0.92        31
        open       0.91      0.94      0.93        33

    accuracy                           0.92        64
   macro avg       0.92      0.92      0.92        64
weighted avg       0.92      0.92      0.92        64

