# Add new metadata into Contentful with corresponding translations in French, Portuguese, and Spanish
**NOTE:** Run with Python 3.9 or higher

## Setup

In [1]:
# Import libraries
import openai
import contentful
import contentful_management
import requests
import uuid


In [2]:
# Get the API keys
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

openai.api_key = env['OPENAI_API_KEY']
cnt_space = env['contentful_space'] # Space in contentful
cnt_token = env['contentful_token'] # This token is only for read-only purposes, it doesn't allow management
cnt_management = env['contentful_personal_token'] # This is the token needed for management purposes

In [3]:
# Query the OpenAI Completion API

def get_completion(prompt, model="gpt-3.5-turbo", temperature=0): 
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
    )
    return response.choices[0].message["content"]

In [4]:
# Connect to Contentful
client = contentful_management.Client(cnt_management) # This allows managing
client2 = contentful.Client(cnt_space, cnt_token) # this only allows queries

## Add new metadata (in English) into contenful

In [6]:
# Add new entry to contentful (the description must be kept in a single line to be correctly published in contentful)
entry_attributes = {
    'content_type_id': 'metadataProd',
    'fields': {
        'layerSlug': {
            'en-US': 'test'
        },
        'description': {
            'en-US': 'Shows areas where there is high anthropogenic pressure, which are areas that have been highly modified or transformed from their natural state. The land use practices that typically result in these human pressures are urban areas, high-intensity agriculture, transportation, energy and extractive resources and human intrusion. Human pressures must be considered when establishing protected areas as some species are less tolerant to human disturbance. '
        },
        'source':{
            'en-US': 'Vizzuality'
        },
        'molLogo':{
            'en-US': 'FALSE',
        },
        'hasAdditionalContent':{
            'en-US': 'FALSE',
        },
        'title':{
            'en-US': 'Test to check how the metadata is translated'
        },
        'language':{
            'en-US': 'en'
        }
        
    }
}

random_id = str(uuid.uuid4())
new_entry = client.entries(cnt_space, 'master').create(
    random_id,
    entry_attributes
)
new_entry.publish()

<Entry[metadataProd] id='1f9943b7-6c80-4b04-b63e-90184f8d0a26'>

## Translate description and title to FR, PT, ES

In [7]:
# Get the information from the entry that needs to be translated
description_en = entry_attributes['fields']['description']['en-US']
title_en = entry_attributes['fields']['title']['en-US']
print(description_en)
print(title_en)

Shows areas where there is high anthropogenic pressure, which are areas that have been highly modified or transformed from their natural state. The land use practices that typically result in these human pressures are urban areas, high-intensity agriculture, transportation, energy and extractive resources and human intrusion. Human pressures must be considered when establishing protected areas as some species are less tolerant to human disturbance. 
Test to check how the metadata is translated


In [19]:
# Ask the AI to translate the text
prompt = "Translate the following English text to French, Brazilian Portuguese and Spanish making it sound natural: {text}"
texts = [description_en, title_en]
languages = ['fr', 'pt', 'es']
responses = {}

for text in texts:
    response = get_completion(prompt.format(text=text))
    # Divide the response into the three languages
    translations = [translation for translation in response.split("\n") if translation.strip() != '']
    for i, lang in enumerate(languages):
        translated_text = translations[i].split(': ', 1)[1]
        # Use separate keys for descriptions and titles
        if text == description_en:
            responses[f'description_{lang}'] = translated_text
        elif text == title_en:
            responses[f'title_{lang}'] = translated_text

# Assign the translated values to variables
description_fr = responses['description_fr']
description_pt = responses['description_pt']
description_es = responses['description_es']
title_fr = responses['title_fr']
title_pt = responses['title_pt']
title_es = responses['title_es']
print(description_es)
print(title_es)

Muestra áreas donde hay alta presión antropogénica, que son áreas que han sido altamente modificadas o transformadas de su estado natural. Las prácticas de uso de la tierra que típicamente resultan en estas presiones humanas son áreas urbanas, agricultura de alta intensidad, transporte, energía y recursos extractivos e intrusión humana. Las presiones humanas deben ser consideradas al establecer áreas protegidas ya que algunas especies son menos tolerantes a la perturbación humana.
Prueba para verificar cómo se traducen los metadatos


**Because the translation is literal from the English string, we are going to rephase it to make it sound more natural**

In [21]:
# Ask the AI to rephrase the translations to make them sound more natural
translations = [description_es, description_fr, description_pt, title_es, title_fr, title_pt]
for i, t in enumerate(translations):
    prompt = f"""Proofread and rephrase the text in its original language to make it sound more natural.
    ```{t}```"""
    response = get_completion(prompt)
    rephrased_text = response.strip().replace('"', '')
    if i == 0:
        description_es = rephrased_text
    elif i == 1:
        description_fr = rephrased_text
    elif i == 2:
        description_pt = rephrased_text
    elif i == 3:
        title_es = rephrased_text
    elif i == 4:
        title_fr = rephrased_text
    elif i == 5:
        title_pt = rephrased_text

# Updated variables with rephrased strings
print(description_es)
print(description_fr)
print(description_pt)
print(title_es)
print(title_fr)
print(title_pt)


Este mapa muestra las áreas con alta presión antropogénica, es decir, aquellas que han sido fuertemente modificadas o transformadas de su estado natural. Las prácticas de uso de la tierra que suelen generar estas presiones humanas son la urbanización, la agricultura intensiva, el transporte, la extracción de recursos y la intrusión humana. Al establecer áreas protegidas, es importante tener en cuenta estas presiones humanas, ya que algunas especies son menos tolerantes a la perturbación causada por la actividad humana.
Montrez les zones où la pression anthropique est forte, ce qui signifie qu'elles ont été considérablement modifiées ou transformées par l'homme. Les pratiques d'utilisation des terres qui entraînent généralement ces pressions humaines sont les zones urbaines, l'agriculture intensive, les transports, l'énergie et les ressources extractives, ainsi que l'intrusion humaine. Il est important de prendre en compte ces pressions humaines lors de l'établissement de zones protégée

In [22]:
print(description_fr)
print(description_pt)
print(description_es)

Montrez les zones où la pression anthropique est forte, ce qui signifie qu'elles ont été considérablement modifiées ou transformées par l'homme. Les pratiques d'utilisation des terres qui entraînent généralement ces pressions humaines sont les zones urbaines, l'agriculture intensive, les transports, l'énergie et les ressources extractives, ainsi que l'intrusion humaine. Il est important de prendre en compte ces pressions humaines lors de l'établissement de zones protégées, car certaines espèces sont moins tolérantes aux perturbations causées par l'homme.
Esta seção mostra áreas com alta pressão antropogênica, ou seja, áreas que foram altamente modificadas ou transformadas de seu estado natural. As práticas de uso da terra que geralmente resultam nessas pressões humanas incluem áreas urbanas, agricultura intensiva, transporte, energia e recursos extrativos, bem como a intrusão humana. Ao estabelecer áreas protegidas, é importante considerar as pressões humanas, pois algumas espécies são

## Publish the translations as new entries in contentful


In [18]:
### Publish the new entries in the different languages

# Define the languages to iterate over
languages = ['es', 'pt', 'fr']

# Repeat the code block for each language
for language in languages:
    # Create new entry attributes for the specific language
    entry_attributes = {
        'content_type_id': 'metadataProd',
        'fields': {
            'layerSlug': {
                'en-US': entry_attributes['fields']['layerSlug']['en-US'].split('_', 1)[0] + f"_{language}"
            },
            'description': {
                'en-US': globals()[f'description_{language}']
            },
            'source': {
                'en-US': entry_attributes['fields']['source']['en-US']
            },
            'molLogo': {
                'en-US': entry_attributes['fields']['molLogo']['en-US']
            },
            'hasAdditionalContent': {
                'en-US': entry_attributes['fields']['hasAdditionalContent']['en-US']
            },
            'title': {
                'en-US': globals()[f'title_{language}']
            },
            'language': {
                'en-US': language
            }
        }
    }

    # Extract the desired format from the existing entry's ID
    existing_id_parts = random_id.split('-')
    existing_id_format = existing_id_parts[0]

    # Generate a new entry ID using the desired format and the current language
    new_entry_id = f'{existing_id_format}_{language}'

    # Create the new entry with the custom ID and attributes
    new_entry = client.entries(cnt_space, 'master').create(
        new_entry_id,
        entry_attributes
    )
    new_entry.publish() # with this command the entries are published, otherwise they are added just as drafts in content type

---
### Get the information from the last published entry in contentful

**NOTE:** If you have not published the new entry (metadata in English) using the notebook but directly in Contentful, you can run the following code to retrieve the id of the last entry in contentful


**Get the id from the last entry published in contentful**

In [None]:
# # Set up the Contentful API endpoint and credentials
# api_url = 'https://cdn.contentful.com'
# space_id = cnt_space
# access_token = cnt_token

# # Specify the content type ID of the entries you want to query
# content_type_id = 'metadataProd'

# # Build the request URL to fetch the entries
# url = f'{api_url}/spaces/{space_id}/entries?access_token={access_token}&content_type={content_type_id}&order=-sys.createdAt&limit=1'

# # Send the GET request to retrieve the entries
# response = requests.get(url)

# # Check if the request was successful (status code 200)
# if response.status_code == 200:
#     # Access the entry data from the response
#     entry_data = response.json()

#     # Check if any entries were returned
#     if 'items' in entry_data and len(entry_data['items']) > 0:
#         # Retrieve the ID of the last published entry
#         last_entry_id = entry_data['items'][0]['sys']['id']
#     else:
#         print('No published entries found.')

# else:
#     # Display an error message if the request failed
#     print(f'Request failed with status code {response.status_code}')

---
**Get the description and title of the last entry in metadataProd content type**

In [10]:
# # Set up the Contentful API endpoint and credentials
# api_url = 'https://cdn.contentful.com'
# space_id = cnt_space
# access_token = cnt_token

# # Specify the content type ID of the entries you want to query
# content_type_id = 'metadataProd'

# # Specify the entry ID you want to retrieve
# entry_id = random_id # last_entry_id

# # Build the request URL
# url = f'{api_url}/spaces/{space_id}/entries/{entry_id}?access_token={access_token}'

# # Send the GET request to retrieve the entry
# response = requests.get(url)

# # Check if the request was successful (status code 200)
# if response.status_code == 200:
#     # Access the entry data from the response
#     entry_data = response.json()

#     # Access the 'description' field of the entry
#     description_en = entry_data['fields']['description']
#     title_en = entry_data['fields']['title']

#     # Print the description
#     print(description_en)
#     print(title_en)

# else:
#     # Display an error message if the request failed
#     print(f'Request failed with status code {response.status_code}')

Shows areas where there is high anthropogenic pressure, which are areas that have been highly modified or transformed from their natural state. The land use practices that typically result in these human pressures are urban areas, high-intensity agriculture, transportation, energy and extractive resources and human intrusion. Human pressures must be considered when establishing protected areas as some species are less tolerant to human disturbance. 
Test to check how the metadata is translated
