# Import

In [1]:
from decouple import Config, RepositoryEnv
from ollama_interaction import generate_ollama_request

import pandas as pd
import json
import time
import numpy as np

## Import des donn√©es sur les mod√®les
Nous importons les mod√®les servant de base pour VApp. Par d√©faut, les mod√®les sont quantifi√©s en int4.

In [2]:
# context option are based on https://github.com/NVIDIA/RULER
# If model is not on doc we take the nearest one

# best context is based on  Effective length
# max context is base on claimed length

with open('../data/ai-assistants.json','r') as file:
    ai_assistants = json.load(file)

In [3]:
config = Config(RepositoryEnv('.env'))

ollama_api_url = config('OLLAMA_API_URL')
ollama_bearer_token = config('OLLAMA_BEARER_TOKEN')

gpu_model = config('OLLAMA_GPU')

# Scoring generation

## Import data base de travail
Import de la base de donn√©es g√©n√©r√©e dans download-data-base-at.ipynb.

In [4]:
data = pd.read_csv("hard-database/data_at_select_ai.csv",index_col='id')

data = data[(data['token_numb_description']<5000)&(data['token_numb_description']>500)]

In [5]:
# Load un √©chantillion de description de projet
with open("project-description-sample.json",'r') as file:
    project_descrpition_list = json.load(file)

## Usage de la fonction gen_prompt_aide_scoring

### Exemple d'usage
Pour g√©n√©rer le scoring, un petit LLM comme llama3.2:1b semble √™tre suffisant. Des benchmark devront √™tre r√©alis√© avec des mod√®les plus gros mais moins rapide.

In [6]:
# Fonction disponible sous ./prompt_script/gen_prompt_aide_scoring.py
from prompt_script.gen_prompt_aide_scoring import gen_prompt_aide_scoring

In [None]:
seed_number = 2
max_retry = 5
row_list = []

# model = "llama3.1:latest"
# model = "mistral-nemo:latest"
model = ai_assistants['score-assistant']['model']
model_options = ai_assistants['score-assistant']['model_parameters']
request_options = ai_assistants['score-assistant']['request_parameters']

error_list = []

max_score = 100
min_score = 0

for project_descrpition_key in project_descrpition_list:
    project_description = project_descrpition_list[project_descrpition_key]
    print('-------------------------')
    print('starting : ',project_description[:80])
    starting_project_time = time.time()
    for i, row in data.iterrows():
        # print('-------------------------')
        # print(project_description)
        # print('---------')
        # print('Aide : ',row['name'])
        aide_description = row['description_md']
        aide_eligibility = row['eligibility_md']
        prompt_system,prompt_user = gen_prompt_aide_scoring(aide_description,project_description,max_score=max_score,min_score=min_score)

        score_sub_list = []
        score_sub = 0
        seed = 0
        scoring_made = 0
        retry = 0

        start_requesting_score = time.time()
        while scoring_made < seed_number and retry < max_retry:
            seed += 1
            response = generate_ollama_request(
                model=model,
                prompt_system=prompt_system,
                response_format=None,
                prompt_user=prompt_user,
                ollama_api_url=ollama_api_url,
                bearer_token=ollama_bearer_token,
                model_options = model_options,  # Default to None
                request_options= request_options,  # Default to None
                seed=seed,
                )
            if response:
                try :
                    response_filtred = response['response'].replace(' ','').replace('\n','')
                    score_seed = int(response_filtred)
                    if score_seed > max_score:
                        score_seed = max_score
                    if score_seed < min_score:
                        score_seed = min_score
                    score_sub+=score_seed
                    retry = 0
                    scoring_made+=1
                    score_sub_list.append(score_seed)
                    # print("average score : ",int(np.mean(score_sub_list)))

                except Exception as error:
                    retry += 1
                    error_list.append(response_filtred)
                    # print(f"error : {response['response']}")
                    # print(f"error filred : {response_filtred}")
        end_requesting_score = time.time()
        row['project_description'] = project_description
        row['project_score'] = score_sub
        row['scoring_made'] = scoring_made
        row['scoring_error'] = seed - scoring_made
        row['request_time_total'] = end_requesting_score - start_requesting_score
        row['request_time_single'] = (end_requesting_score - start_requesting_score)/(seed)
        row['gpu'] = gpu_model
        # print('score : ',score_sub)
        # print('error made : ',seed - scoring_made)
        row_list.append(row)
    end_project_time = time.time()
    # print(f"project needed {(end_project_time - starting_project_time)/60}min to generate score for {len(data)} sub with {seed_number} seed")
    # print(f"averaging {(end_project_time - starting_project_time)/len(data) :.3}s per sub")
    # print(f"averaging {(end_project_time - starting_project_time)/(len(data)*seed_number) :.3}s per sub indexed on seed_number")


-------------------------
starting :  Revitalisation d'une zone humide
-------------------------
starting :  Entretient d'un vieux moulin
-------------------------
starting :  R√©habilitation d'une ancienne √©cole en lieu d√©di√© √† la sant√©.
-------------------------
starting :  Voir fiche action PVD /ORT n¬∞21 Apr√®s une premi√®re phase d‚Äôam√©nagement, la commu
-------------------------
starting :  La commune dispose d'outils num√©riques qu'il est n√©cessaire d'optimiser et coord
-------------------------
starting :  Cr√©ation d‚Äôun sentier th√©matique sur la for√™t √† Rieutord et sur le patrimoine √† 
-------------------------
starting :  Je souhaite refaire la voirie communal
-------------------------
starting :  Le projet consiste en l‚Äôam√©nagement d‚Äôun terrain communal en c≈ìur de commune, jo


In [8]:
def normalize_score(project_score:int,scoring_made:int,max_score:int=5,min_score:int=-5)->(float,float):
    corrected_project_score = project_score/scoring_made

    corrected_normalize_score =(corrected_project_score-min_score)/(max_score-min_score)

    return corrected_normalize_score, corrected_project_score

data_project_score = pd.DataFrame(row_list)

corrected_normalize_score, corrected_project_score = normalize_score(data_project_score['project_score'],data_project_score['scoring_made'],max_score=max_score,min_score=min_score)

data_project_score['corrected_normalize_score'] = corrected_normalize_score
data_project_score['corrected_project_score'] = corrected_project_score

data_project_score.head(5)

Unnamed: 0,slug,url,name,name_initial,short_title,financers,financers_full,instructors,instructors_full,programs,...,token_numb_eligibility,project_description,project_score,scoring_made,scoring_error,request_time_total,request_time_single,gpu,corrected_normalize_score,corrected_project_score
162921,actions-en-faveur-de-la-cohesion-sociale-polit...,/aides/actions-en-faveur-de-la-cohesion-social...,Mener des actions en faveur de la coh√©sion soc...,Actions en faveur de la coh√©sion sociale (poli...,,['Conseil d√©partemental de la Manche'],"[{'id': 164, 'name': 'Conseil d√©partemental de...",[],[],[],...,614,Revitalisation d'une zone humide,0,2,0,3.001832,1.500916,L4,0.0,0.0
162925,aide-a-la-realisation-de-plan-doccupation-past...,/aides/aide-a-la-realisation-de-plan-doccupati...,Aider √† la r√©alisation de Plan d'Occupation Pa...,Aide √† la r√©alisation de Plan d'Occupation Pas...,,"[""Conseil r√©gional de Provence-Alpes-C√¥te d'Az...","[{'id': 93, 'name': ""Conseil r√©gional de Prove...",[],[],[],...,172,Revitalisation d'une zone humide,10,2,0,2.407943,1.203972,L4,0.05,5.0
162938,accompagner-le-developpement-dune-production-e...,/aides/accompagner-le-developpement-dune-produ...,Accompagner la transition √©cologique,Programme LEADER 2023-2027,,['GAL des Co√´vrons'],"[{'id': 2541, 'name': 'GAL des Co√´vrons', 'log...",[],[],['üá™üá∫ LEADER - Liaison entre Actions de D√©velop...,...,862,Revitalisation d'une zone humide,160,2,0,2.848187,1.424093,L4,0.8,80.0
162939,accompagner-le-developpement-dune-production-e...,/aides/accompagner-le-developpement-dune-produ...,"Valoriser les atouts culturels, touristiques e...",Programme LEADER 2023-2027,,['GAL des Co√´vrons'],"[{'id': 2541, 'name': 'GAL des Co√´vrons', 'log...",[],[],['üá™üá∫ LEADER - Liaison entre Actions de D√©velop...,...,862,Revitalisation d'une zone humide,20,2,0,2.738413,1.369206,L4,0.1,10.0
162955,appel-a-projets-pedagogiques-culture-cheval-an...,/aides/appel-a-projets-pedagogiques-culture-ch...,D√©velopper des projets p√©dagogiques en lien av...,Appel √† projets p√©dagogiques Culture Cheval ‚Äì ...,,['Conseil d√©partemental de la Manche'],"[{'id': 164, 'name': 'Conseil d√©partemental de...",[],[],[],...,441,Revitalisation d'une zone humide,0,2,0,2.162419,1.08121,L4,0.0,0.0


In [9]:
grouped = data_project_score[data_project_score["corrected_normalize_score"]>=0.6].groupby("project_description")

# G√©n√©ration du rapport Markdown
with open("report.md", "w", encoding="utf-8") as file:
    for description, group in grouped:
        file.write(f"___\n\n")
        file.write(f"## {description}\n\n")
        for _, row in group.iterrows():
            file.write(f"- **Nom de l'aide** : {row['name']}\n")
            file.write(f"  - **URL**: [https://aides-territoires.beta.gouv.fr{row['url']}](https://aides-territoires.beta.gouv.fr{row['url']})\n")
            file.write(f"  - **Score**: {int(row['corrected_normalize_score']*100)} / 100\n\n")
            file.write(f"  - **Orginal score**: {int(row['project_score'])}\n\n")
            file.write(f"  - **Error made**: {int(row['scoring_error'])}\n\n")
            file.write(f"  - **Scoring made**: {int(row['scoring_made'])} / {seed_number}\n\n")

In [None]:
data_project_score.to_csv(f"hard-database/data_project_scoring_gpu_{gpu_model}.csv")
data_project_score.to_csv(f"hard-database/data_project_scoring.csv")