# Add new metadata into Contentful with corresponding translations in French, Portuguese, and Spanish
**NOTE:** Run with Python 3.9 or higher

## Setup

In [1]:
# Import libraries
import openai
import contentful
import contentful_management
import requests
import uuid
import pandas as pd


In [2]:
# Get the API keys
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

openai.api_key = env['OPENAI_API_KEY']
cnt_space = env['contentful_space'] # Space in contentful
cnt_token = env['contentful_token'] # This token is only for read-only purposes, it doesn't allow management
cnt_management = env['contentful_personal_token'] # This is the token needed for management purposes

In [3]:
# Query the OpenAI Completion API
def get_completion(prompt, model="gpt-3.5-turbo", temperature=0): 
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
    )
    return response.choices[0].message["content"]

In [4]:
# Connect to Contentful
client = contentful_management.Client(cnt_management) # This allows managing
client2 = contentful.Client(cnt_space, cnt_token) # this only allows queries

## Add new metadata (in English) into Contenful

In [5]:
# Add new entry to contentful (the description must be kept in a single line to be correctly published in contentful)
entry_attributes = {
    'content_type_id': 'metadataProd',
    'fields': {
        'layerSlug': {
            'en-US': 'carbon-layer-marine'
        },
        'description': {
            'en-US': 'Marine sediment carbon stocks represent the amount of carbon stored in the top 1 meter of sediment. It has been estimated that marine sediment can hold up to two times more carbon than terrestrial soils. Data are displayed at a 1 km resolution.'
        },
        'source':{
            'en-US': '[Atwood et al., 2020](https://www.frontiersin.org/articles/10.3389/fmars.2020.00165/full)'
        },
        'molLogo':{
            'en-US': 'FALSE',
        },
        'hasAdditionalContent':{
            'en-US': 'FALSE',
        },
        'title':{
            'en-US': 'Marine sediment carbon'
        },
        'language':{
            'en-US': 'en'
        }
        
    }
}

random_id = str(uuid.uuid4())
new_entry = client.entries(cnt_space, 'master').create(
    random_id,
    entry_attributes
)
new_entry.publish()

<Entry[metadataProd] id='f2337cd9-aca9-4a2d-bf4c-38aace38bc89'>

## Translate description and title to FR, PT, ES

In [6]:
# Get the information from the entry that needs to be translated
description_en = entry_attributes['fields']['description']['en-US']
title_en = entry_attributes['fields']['title']['en-US']
print(description_en)
print(title_en)

Marine sediment carbon stocks represent the amount of carbon stored in the top 1 meter of sediment. It has been estimated that marine sediment can hold up to two times more carbon than terrestrial soils. Data are displayed at a 1 km resolution.
Marine sediment carbon


In [7]:
# Ask the AI to translate the text
prompt = "Translate the following English text to French, Brazilian Portuguese and Spanish making it sound natural: {text}"
texts = [description_en, title_en]
languages = ['fr', 'pt', 'es']
responses = {}

for text in texts:
    response = get_completion(prompt.format(text=text))
    # Divide the response into the three languages
    translations = [translation for translation in response.split("\n") if translation.strip() != '']
    for i, lang in enumerate(languages):
        translated_text = translations[i].split(': ', 1)[1]
        # Use separate keys for descriptions and titles
        if text == description_en:
            responses[f'description_{lang}'] = translated_text
        elif text == title_en:
            responses[f'title_{lang}'] = translated_text

# Assign the translated values to variables
description_fr = responses['description_fr']
description_pt = responses['description_pt']
description_es = responses['description_es']
title_fr = responses['title_fr']
title_pt = responses['title_pt']
title_es = responses['title_es']
print(description_es)
print(title_es)

Las reservas de carbono en los sedimentos marinos representan la cantidad de carbono almacenado en el primer metro de sedimento. Se estima que los sedimentos marinos pueden retener hasta dos veces más carbono que los suelos terrestres. Los datos se muestran con una resolución de 1 km.
Carbono de sedimentos marinos


**Because the translation is literal from the English string, we are going to rephase it to make it sound more natural**

In [8]:
# Ask the AI to rephrase the translations to make them sound more natural
translations = [description_es, description_fr, description_pt, title_es, title_fr, title_pt]
for i, t in enumerate(translations):
    prompt = f"""Proofread and rephrase the text in its original language 
                to make it sound more natural.```{t}```"""
    response = get_completion(prompt)
    rephrased_text = response.strip().replace('"', '')
    if i == 0:
        description_es = rephrased_text
    elif i == 1:
        description_fr = rephrased_text
    elif i == 2:
        description_pt = rephrased_text
    elif i == 3:
        title_es = rephrased_text
    elif i == 4:
        title_fr = rephrased_text
    elif i == 5:
        title_pt = rephrased_text

# Updated variables with rephrased strings
print(description_es)
print(description_fr)
print(description_pt)
print(title_es)
print(title_fr)
print(title_pt)


Las reservas de carbono en los sedimentos marinos se refieren a la cantidad de carbono almacenada en el primer metro de sedimento. Se estima que los sedimentos marinos pueden retener hasta el doble de carbono que los suelos terrestres. Los datos se presentan con una resolución de 1 km.
Les stocks de carbone des sédiments marins correspondent à la quantité de carbone stockée dans la couche supérieure d'un mètre de sédiment. On estime que les sédiments marins peuvent contenir jusqu'à deux fois plus de carbone que les sols terrestres. Les données sont présentées avec une résolution de 1 km.
Os estoques de carbono nos sedimentos marinhos representam a quantidade de carbono armazenada nos primeiros 1 metro de sedimento. Estima-se que os sedimentos marinhos possam conter até o dobro de carbono em comparação com os solos terrestres. Os dados são exibidos com uma resolução de 1 km.
Carbono de los sedimentos marinos
Carbone dans les sédiments marins
Carbono proveniente de sedimentos marinhos.


## Publish the translations as new entries in contentful


In [9]:
### Publish the new entries in the different languages

# Define the languages to iterate over
languages = ['es', 'pt', 'fr']

# Repeat the code block for each language
for language in languages:
    # Create new entry attributes for the specific language
    entry_attributes = {
        'content_type_id': 'metadataProd',
        'fields': {
            'layerSlug': {
                'en-US': entry_attributes['fields']['layerSlug']['en-US'].split('_', 1)[0] + f"_{language}"
            },
            'description': {
                'en-US': globals()[f'description_{language}']
            },
            'source': {
                'en-US': entry_attributes['fields']['source']['en-US']
            },
            'molLogo': {
                'en-US': entry_attributes['fields']['molLogo']['en-US']
            },
            'hasAdditionalContent': {
                'en-US': entry_attributes['fields']['hasAdditionalContent']['en-US']
            },
            'title': {
                'en-US': globals()[f'title_{language}']
            },
            'language': {
                'en-US': language
            }
        }
    }

    # Extract the desired format from the existing entry's ID
    existing_id_parts = random_id.split('-')
    existing_id_format = existing_id_parts[0]

    # Generate a new entry ID using the desired format and the current language
    new_entry_id = f'{existing_id_format}_{language}'

    # Create the new entry with the custom ID and attributes
    new_entry = client.entries(cnt_space, 'master').create(
        new_entry_id,
        entry_attributes
    )
    new_entry.publish() # with this command the entries are published, otherwise they are added just as drafts in content type

---
### Get the information from the last published entry in contentful

**NOTE:** If you have not published the new entry (metadata in English) using the notebook but directly in Contentful, you can run the following code to retrieve the id of the last entry in contentful


**Get the id from the last entry published in contentful**

In [None]:
# # Set up the Contentful API endpoint and credentials
# api_url = 'https://cdn.contentful.com'
# space_id = cnt_space
# access_token = cnt_token

# # Specify the content type ID of the entries you want to query
# content_type_id = 'metadataProd'

# # Build the request URL to fetch the entries
# url = f'{api_url}/spaces/{space_id}/entries?access_token={access_token}&content_type={content_type_id}&order=-sys.createdAt&limit=1'

# # Send the GET request to retrieve the entries
# response = requests.get(url)

# # Check if the request was successful (status code 200)
# if response.status_code == 200:
#     # Access the entry data from the response
#     entry_data = response.json()

#     # Check if any entries were returned
#     if 'items' in entry_data and len(entry_data['items']) > 0:
#         # Retrieve the ID of the last published entry
#         last_entry_id = entry_data['items'][0]['sys']['id']
#     else:
#         print('No published entries found.')

# else:
#     # Display an error message if the request failed
#     print(f'Request failed with status code {response.status_code}')

---
**Get the description and title of the last entry in metadataProd content type**

In [10]:
# # Set up the Contentful API endpoint and credentials
# api_url = 'https://cdn.contentful.com'
# space_id = cnt_space
# access_token = cnt_token

# # Specify the content type ID of the entries you want to query
# content_type_id = 'metadataProd'

# # Specify the entry ID you want to retrieve
# entry_id = random_id # last_entry_id

# # Build the request URL
# url = f'{api_url}/spaces/{space_id}/entries/{entry_id}?access_token={access_token}'

# # Send the GET request to retrieve the entry
# response = requests.get(url)

# # Check if the request was successful (status code 200)
# if response.status_code == 200:
#     # Access the entry data from the response
#     entry_data = response.json()

#     # Access the 'description' field of the entry
#     description_en = entry_data['fields']['description']
#     title_en = entry_data['fields']['title']

#     # Print the description
#     print(description_en)
#     print(title_en)

# else:
#     # Display an error message if the request failed
#     print(f'Request failed with status code {response.status_code}')

Shows areas where there is high anthropogenic pressure, which are areas that have been highly modified or transformed from their natural state. The land use practices that typically result in these human pressures are urban areas, high-intensity agriculture, transportation, energy and extractive resources and human intrusion. Human pressures must be considered when establishing protected areas as some species are less tolerant to human disturbance. 
Test to check how the metadata is translated


------
## Translate metadata for entries that were updated in English. 

For this it's necessary to know the layerslug of the entries that were updated. Unfortunately, so far it's not possible to automatically update the translated entries but the following code provides the translations

In [5]:
# Specify the content type ID of the entries you want to query
content_type_id = 'metadataProd'

# Specify the field and value you want to search for
field_name = 'fields.layerSlug'
field_value = 'energy_human_pressures'

# List of layerSlugs you want to query
layerSlugs = [
    'energy_human_pressures',
    'transportation_human_pressures',
    'agriculture_human_pressures',
    'urban_human_pressures',
    'intrusion_human_pressures',
    'marine_and_land_human_pressures',
    'marine_ocean_drivers_human_pressures',
    'commercial_fishing_human_pressures',
    'artisanal_fishing_human_pressures',
    'carbon-layer-land'
]

# List to store dictionaries containing description and layerSlug for each entry
data_list = []

# Query the entries for each layerSlug
for layerSlug in layerSlugs:
    # Query the entries with the specified layerSlug value
    entries = client2.entries({'content_type': content_type_id, 'fields.layerSlug': layerSlug})

    # Check if any entries were returned for the current layerSlug
    if entries.total > 0:
        # Access the description field of the first matching entry
        first_matching_entry = entries.items[0]

        # Retrieve the "description" field value for the current layerSlug
        description = first_matching_entry.description

        if description:
            data_list.append({'LayerSlug': layerSlug, 'Description': description})
        else:
            data_list.append({'LayerSlug': layerSlug, 'Description': 'No "description" field found for this layerSlug.'})
    else:
        data_list.append({'LayerSlug': layerSlug, 'Description': 'No matching entries found for this layerSlug.'})

# Create a pandas DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)

# Print the DataFrame
df


Unnamed: 0,LayerSlug,Description
0,energy_human_pressures,Energy and extractive resources pressure repre...
1,transportation_human_pressures,Transportation pressure represents the degree ...
2,agriculture_human_pressures,Agriculture pressure represents the degree of ...
3,urban_human_pressures,This layer shows areas where the land is used ...
4,intrusion_human_pressures,Human intrusion pressure represents the degree...
5,marine_and_land_human_pressures,This layer shows areas where there is high ant...
6,marine_ocean_drivers_human_pressures,Taking into account thirteen different types o...
7,commercial_fishing_human_pressures,Taking into account thirteen different types o...
8,artisanal_fishing_human_pressures,Taking into account thirteen different types o...
9,carbon-layer-land,Irrecoverable carbon refers to the vast natura...


In [6]:
# Ask the AI to translate the text
# prompt = "Translate the following English text to French, Brazilian Portuguese and Spanish making it sound natural: {text}"
# Ask the AI to translate the text
texts = df['Description']
languages = ['fr', 'pt', 'es']

for text in texts:
    print(text)
    translations = {}
    for lang in languages:
        translation_prompt = f"Translate the following English text to {lang}: {text}"
        translation = get_completion(translation_prompt)
        translations[lang] = translation
        print(translation)
        
        rephrase_prompt = f"Proofread and rephrase the text in {lang} to make it sound more natural: {translation}"
        rephrased_translation = get_completion(rephrase_prompt)
        translations[lang] = rephrased_translation
        print(rephrased_translation)
    
    # Assign the translated values to the DataFrame columns
    df.loc[df['Description'] == text, 'Descriptions_fr'] = translations.get('fr', "Translation not available")
    df.loc[df['Description'] == text, 'Descriptions_pt'] = translations.get('pt', "Translation not available")
    df.loc[df['Description'] == text, 'Descriptions_es'] = translations.get('es', "Translation not available")



Energy and extractive resources pressure represent the degree of human modification due to extractive energy production (oil and gas), power plants, mining, and quarrying. Data are shown at a 1 km resolution.
La pression exercée par l'énergie et les ressources extractives représente le degré de modification humaine due à la production d'énergie extractive (pétrole et gaz), aux centrales électriques, à l'exploitation minière et à l'extraction de carrières. Les données sont présentées à une résolution de 1 km.
La pression exercée par l'énergie et les ressources extractives mesure le niveau de modification humaine causée par la production d'énergie (pétrole et gaz), les centrales électriques, l'exploitation minière et l'extraction de carrières. Les données sont présentées avec une résolution de 1 km.
A pressão das energias e recursos extrativos representa o grau de modificação humana devido à produção de energia extrativa (petróleo e gás), usinas de energia, mineração e extração de pedrei

In [163]:
# Save translations in csv
df.csv = df.to_csv('/Users/sofia/Documents/HE_Data/metadata/translated_descriptions_updates20230718.csv', index=False)



### Use this csv to manually update the "es", "pt" and "fr" entries that need to be modified

----
----

Testing how to update contenful entries 

In [164]:
# ID of the content type you are working with
content_type_id = 'metadataProd'

# List of layerSlugs you want to query
layerSlugs = [
    'test_es'
]

# Variable text containing the new description
text = "Otra vez el texto en español, pero es otro texto"

# Query the entries for each layerSlug
for layerSlug in layerSlugs:
    # Query the entries with the specified layerSlug value
    entries = client2.entries({'content_type': content_type_id, 'fields.layerSlug': layerSlug})

    # Check if any entries were returned for the current layerSlug
    if entries.total > 0:
        # Access the description field of the first matching entry
        first_matching_entry = entries.items[0]

        # Retrieve the "description" field value for the current layerSlug
        description = first_matching_entry.description
        id = first_matching_entry.id
        print(description)
        print(id)


Mismo texto pero en español
2RDLjmNQVJvP7DROTw1V08
