In [1]:
import requests
import pandas as pd

In [2]:
API_KEY = "a0f1bbeaf47325894d70428617d3bbbf"
BASE_URL = "https://manifesto-project.wzb.eu/api/v1/"

In [3]:
from typing import List, Dict

def get_metadata(keys: List[str], version: str, batch_size: int = 50) -> List[Dict]:
    """Obtiene los metadatos para las claves dadas, usando solicitudes POST y procesando por lotes"""
    url = f"{BASE_URL}metadata"
    all_metadata = []

    for i in range(0, len(keys), batch_size):
        batch_keys = keys[i:i+batch_size]
        data = {'api_key': API_KEY, 'keys[]': batch_keys, 'version': version}
        response = requests.post(url, data=data)
        
        if response.status_code == 200:
            all_metadata.extend(response.json()['items'])
        else:
            print(f"Error al obtener los metadatos para el lote {i//batch_size + 1}: {response.status_code}")
    
    return all_metadata

In [5]:
def get_all_texts_and_annotations(keys: List[str], version: str, batch_size: int = 50) -> List[Dict]:
    """Obtiene los textos y anotaciones para las claves dadas, procesando por lotes"""
    url = f"{BASE_URL}texts_and_annotations"
    all_texts_and_annotations = []

    for i in range(0, len(keys), batch_size):
        batch_keys = keys[i:i+batch_size]
        params = {'api_key': API_KEY, 'keys[]': batch_keys, 'version': version}
        response = requests.get(url, params=params)
        
        if response.status_code == 200:
            all_texts_and_annotations.extend(response.json()['items'])
        else:
            print(f"Error al obtener los textos y anotaciones para el lote {i//batch_size + 1}: {response.status_code}")
    
    return all_texts_and_annotations

In [8]:
df_manifesto_spanish = pd.read_csv("Manifesto_spanish.csv")
df_manifesto_spanish.head()

Unnamed: 0.1,Unnamed: 0,country,countryname,oecdmember,eumember,edate,date,party,partyname,partyabbrev,...,per703_1,per703_2,rile,planeco,markeco,welfare,intpeace,datasetversion,id_perm,key
0,1566,33,Spain,10,0,1977-06-15,197706,33220,Communist Party of Spain,PCE,...,,,-13.953,4.264,0.388,10.853,0.388,2024a,VVUNDW,33220_197706
1,1567,33,Spain,10,0,1977-06-15,197706,33320,Spanish Socialist Workers’ Party,PSOE,...,,,-9.253,0.356,0.712,13.523,1.423,2024a,CAFE6R,33320_197706
2,1568,33,Spain,10,0,1977-06-15,197706,33430,Union of the Democratic Centre/Centrist Bloc,UCD,...,,,-0.469,2.347,6.103,15.493,3.286,2024a,NVYU5M,33430_197706
3,1569,33,Spain,10,0,1977-06-15,197706,33610,Popular Alliance,AP,...,,,12.069,3.448,14.655,20.69,0.0,2024a,BAWT83,33610_197706
4,1570,33,Spain,10,0,1977-06-15,197706,33901,Basque Left,EE,...,,,-23.499,1.044,0.783,8.094,3.133,2024a,74CMWM,33901_197706


In [10]:
df_manifesto_spanish['key'] = df_manifesto_spanish['party'].astype(str) + '_' + df_manifesto_spanish['date'].astype(str)
keys = df_manifesto_spanish['key'].tolist()

In [12]:
import random
# Crear una copia de la lista para no modificar la original
keys_shuffle = keys.copy()

# Barajar la copia
random.shuffle(keys_shuffle)

In [13]:
len(keys_shuffle)

502

In [14]:
metadata_version = '2024-1' 
metadata = get_metadata(keys_shuffle, metadata_version)

In [15]:
available_keys = [item['manifesto_id'] for item in metadata if item.get('is_primary_doc', False)]


In [18]:
texts_and_annotations = get_all_texts_and_annotations(available_keys, metadata_version)


In [36]:
texts = []
cmp_codes = []
for i in texts_and_annotations:
    for item in i['items']:
        texts.append(item['text'])
        cmp_codes.append(item['cmp_code'])

In [41]:
df = pd.DataFrame({'Text': texts, 'Code': cmp_codes})


In [43]:
df.to_csv("manifestos_etiquetados.csv")

In [31]:
texts_and_annotations[3]['items'][0]

{'text': 'Rumbo a una Bolivia Líder', 'cmp_code': 'H', 'eu_code': 'NA'}