Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions components/Datasets/DatasetLongDescriptionSuggestFromFiles.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
<template>
<div>
<div class="flex flex-wrap items-center gap-4 mt-2 mb-3">
<Tooltip v-if="!canSuggest">
<BrandedButton
type="button"
color="primary"
:disabled="true"
>
<div class="flex items-center space-x-2">
<RiSparklingLine
class="size-4"
aria-hidden="true"
/>
<span>{{ $t('Suggérer une description à partir des fichiers') }}</span>
</div>
</BrandedButton>
<template #tooltip>
{{ tooltipWhenDisabled }}
</template>
</Tooltip>
<BrandedButton
v-else
type="button"
color="primary"
:icon="RiSparklingLine"
:loading="isGenerating"
@click="handleSuggest"
>
<template v-if="isGenerating">
{{ $t('Suggestion en cours…') }}
</template>
<template v-else>
{{ $t('Suggérer une description à partir des fichiers') }}
</template>
</BrandedButton>
<CdataLink
v-if="config.public.generateDescriptionFeedbackUrl && hasReceivedSuggestion"
:to="config.public.generateDescriptionFeedbackUrl"
target="_blank"
class="text-sm text-gray-medium"
>
{{ $t('Comment avez-vous trouvé cette suggestion ?') }}
</CdataLink>
</div>
<SimpleBanner
v-if="errorMessage"
type="danger"
class="mb-3"
>
{{ errorMessage }}
</SimpleBanner>
</div>
</template>

<script setup lang="ts">
import { BrandedButton, Tooltip, SimpleBanner } from '@datagouv/components-next'
import { RiSparklingLine } from '@remixicon/vue'
import CdataLink from '~/components/CdataLink.vue'
import type { ResourceForm } from '~/types/types'
import { buildCombinedExcerptFromResourceForms } from '~/utils/read-dataset-file-excerpt'

const description = defineModel<string>({ required: true })

const props = defineProps<{
resources: Array<ResourceForm>
title: string
organization?: string
}>()

const { t } = useTranslation()
const config = useRuntimeConfig()

const isGenerating = ref(false)
const hasReceivedSuggestion = ref(false)
const errorMessage = ref<string | null>(null)

const hasTitle = computed(() => !!props.title?.trim())
const hasLocalFile = computed(() =>
props.resources.some(r => r.filetype === 'file' && !!r.file?.raw),
)

const canSuggest = computed(() => hasTitle.value && hasLocalFile.value)

const tooltipWhenDisabled = computed(() => {
if (!hasTitle.value) {
return t('Renseignez d’abord le titre du jeu de données (étape précédente).')
}
if (!hasLocalFile.value) {
return t('Ajoutez au moins un fichier téléversé au format texte (CSV, JSON, etc.) pour utiliser cette fonctionnalité.')
}
return ''
})

const GENERIC_ERROR = 'Une erreur s’est produite lors de la suggestion. Vérifiez qu’au moins un fichier contient du texte lisible, puis réessayez.'

async function handleSuggest() {
errorMessage.value = null
if (!canSuggest.value) {
return
}

try {
isGenerating.value = true
const excerpt = await buildCombinedExcerptFromResourceForms(props.resources)
if (!excerpt) {
errorMessage.value = t('Aucun extrait lisible n’a pu être lu depuis vos fichiers. Utilisez un fichier texte ou CSV avec des en-têtes, ou un fichier JSON/XML.')
return
}

const response = await $fetch<{ description?: string }>('/nuxt-api/albert/generate-dataset-long-description', {
method: 'POST',
body: {
title: props.title.trim(),
fileExcerpt: excerpt,
...(props.organization?.trim() && { organization: props.organization.trim() }),
},
})

if (response.description?.trim()) {
description.value = response.description.trim()
hasReceivedSuggestion.value = true
}
}
catch (error) {
console.error('Failed to generate long dataset description:', error)
const raw = error && typeof error === 'object' && 'data' in error && error.data && typeof error.data === 'object' && 'statusMessage' in error.data
? String((error.data as { statusMessage: string }).statusMessage)
: error instanceof Error
? error.message
: ''
errorMessage.value = raw || GENERIC_ERROR
}
finally {
isGenerating.value = false
}
}
</script>
13 changes: 11 additions & 2 deletions components/Datasets/New/Step3AddResources.vue
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@
</template>
</LinkedToAccordion>
</fieldset>
<DatasetLongDescriptionSuggestFromFiles
v-if="form.resources.length > 0"
v-model="datasetForm.description"
:resources="form.resources"
:title="datasetForm.title"
:organization="datasetForm.owned?.organization?.name"
/>
<Alert
v-if="errors.length"
type="error"
Expand Down Expand Up @@ -179,13 +186,15 @@

<script setup lang="ts">
import { BrandedButton, PaddedContainer, SimpleBanner } from '@datagouv/components-next'
import DatasetLongDescriptionSuggestFromFiles from '../DatasetLongDescriptionSuggestFromFiles.vue'
import UploadResourceModal from '../UploadResourceModal.vue'
import type { DatasetForm, ResourceForm } from '~/types/types'

const datasetForm = defineModel<DatasetForm>('datasetForm', { required: true })

const props = defineProps<{
loading: boolean
resources: Array<ResourceForm>
datasetForm: DatasetForm
}>()

const emit = defineEmits<{
Expand All @@ -200,7 +209,7 @@ const { t } = useTranslation()
const publishFileAccordionId = useId()
const addDescriptionAccordionId = useId()

const isDatasetOpen = computed(() => props.datasetForm.access_type === 'open')
const isDatasetOpen = computed(() => datasetForm.value.access_type === 'open')

const { form, getFirstError, getFirstWarning, touch, validate, errorsAsList: errors } = useForm({
resources: props.resources,
Expand Down
16 changes: 15 additions & 1 deletion components/Datasets/Structured/Step3DescribeDataset.vue
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@
:placeholder="$t('Décrivez le contenu, la source et l\'utilité de ces données...')"
:rows="10"
/>
<DatasetLongDescriptionSuggestFromFiles
v-if="props.resources.length > 0"
v-model="form.description"
:resources="props.resources"
:title="form.title"
:organization="datasetForm.owned?.organization?.name"
/>
</LinkedToAccordion>

<LinkedToAccordion
Expand Down Expand Up @@ -171,6 +178,7 @@
import { BrandedButton, PaddedContainer, SimpleBanner, SearchableSelect } from '@datagouv/components-next'
import type { Frequency } from '@datagouv/components-next'
import { ref } from 'vue'
import DatasetLongDescriptionSuggestFromFiles from '~/components/Datasets/DatasetLongDescriptionSuggestFromFiles.vue'
import Alert from '~/components/Alert/Alert.vue'
import InputGroup from '~/components/InputGroup/InputGroup.vue'
import Sidemenu from '~/components/Sidemenu/Sidemenu.global.vue'
Expand All @@ -179,7 +187,13 @@ import AccordionGroup from '~/components/Accordion/AccordionGroup.global.vue'
import LinkedToAccordion from '~/components/LinkedToAccordion/LinkedToAccordion.vue'
import RequiredExplanation from '~/components/RequiredExplanation/RequiredExplanation.vue'
import { useForm, required, minLength } from '~/composables/useForm'
import type { DatasetForm } from '~/types/types'
import type { DatasetForm, ResourceForm } from '~/types/types'

const props = withDefaults(defineProps<{
resources?: Array<ResourceForm>
}>(), {
resources: () => [],
})

const emit = defineEmits<{
(e: 'previous' | 'next'): void
Expand Down
2 changes: 1 addition & 1 deletion pages/admin/datasets/new.vue
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
/>
<Step3AddResources
v-if="currentStep === 3"
v-model:dataset-form="datasetForm"
:resources
:dataset-form
:loading
@previous="moveToStep(2)"
@next="filesNext"
Expand Down
1 change: 1 addition & 0 deletions pages/admin/datasets/structured.vue
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
<Step3DescribeDataset
v-if="currentStep === 3 && associateSchemaForm.selectedSchema && associateSchemaForm.owned?.organization"
v-model="datasetForm"
:resources="resources"
@previous="goBackFromStep3"
@next="describeNext"
/>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import { DESCRIPTION_MIN_LENGTH } from '~/datagouv-components/src/functions/description'
import { callAlbertAPI } from './utils/albert-helpers'
import {
MAX_COMBINED_EXCERPT_CHARS,
MIN_COMBINED_EXCERPT_CHARS,
} from '~/utils/read-dataset-file-excerpt'

const MAXIMUM_PROMPT_LENGTH = 120_000

export default defineEventHandler(async (event) => {
const body = await readBody(event)
const { title, organization, fileExcerpt } = body

if (!title?.trim()) {
throw createError({
statusCode: 400,
statusMessage: 'Title is required',
})
}

const excerpt = typeof fileExcerpt === 'string' ? fileExcerpt.trim() : ''
if (!excerpt || excerpt.length < MIN_COMBINED_EXCERPT_CHARS) {
throw createError({
statusCode: 400,
statusMessage: 'File excerpt is required and must contain enough text',
})
}

if (excerpt.length > MAX_COMBINED_EXCERPT_CHARS) {
throw createError({
statusCode: 400,
statusMessage: 'File excerpt exceeds maximum length',
})
}

const systemContent = `You are an assistant integrated into data.gouv.fr, the French open data platform.
Your purpose is to help data producers write clear, comprehensive, and factual long descriptions of datasets.

Guidelines:
- Always respond in French.
- Your tone is factual, neutral, and accessible to non-experts.
- Use plain language and clear sentences; use Markdown when it helps structure (headings ##, bullet lists).
- Do not make assumptions or add information that is not clearly supported by the excerpt or title.
- Cover content, structure, and limits of the data when the excerpt allows it (variables, scope, updates, methodology if present).
- Always start the main text with a capital letter.
- IMPORTANT: Return ONLY the description text (Markdown allowed), without a preamble or labels such as "Description:".`

const userContent = `You are asked to generate a long description for a dataset on data.gouv.fr.

Goal:
→ Write a detailed, reusable description that helps people understand what the dataset contains and how to use it.
→ Reflect only what can be inferred from the excerpt below (column names, codes, dates, geography, etc. when visible).
→ If the excerpt is only a data sample, describe the likely subject matter and structure without inventing methodology or sources not shown.

Dataset title: ${title.trim()}
${organization ? `Producer organization: ${organization}\n` : ''}
Excerpt from uploaded file(s):

${excerpt}

Output:
→ A description in French with Markdown allowed (e.g. ## sections, lists).
→ Minimum length: at least ${DESCRIPTION_MIN_LENGTH} characters.
→ No generic filler about "open data" unless the excerpt supports it.`

const totalLength = systemContent.length + userContent.length
if (totalLength > MAXIMUM_PROMPT_LENGTH) {
throw createError({
statusCode: 422,
statusMessage: `The excerpt is too long to process (${totalLength} characters, maximum ${MAXIMUM_PROMPT_LENGTH}).`,
})
}

const messages = [
{ role: 'system', content: systemContent },
{ role: 'user', content: userContent },
]

const generatedDescription = (await callAlbertAPI(messages, 'openweight-small')).trim()

if (generatedDescription.length < DESCRIPTION_MIN_LENGTH) {
throw createError({
statusCode: 422,
statusMessage: 'The model could not generate a sufficient description. Try a more descriptive text or CSV with headers.',
})
}

return { description: generatedDescription }
})
Loading
Loading