### Manifesto project database 

Notebook para generar un corpus de datos según el manifesto porject database.
pip install requests pandas pymanifesto

In [1]:
import requests
import pandas as pd

In [2]:
API_KEY = "a0f1bbeaf47325894d70428617d3bbbf"
BASE_URL = "https://manifesto-project.wzb.eu/api/v1/"


In [3]:
def get_core_dataset(version):
    """Obtiene el dataset principal"""
    url = f"{BASE_URL}get_core"
    params = {'api_key': API_KEY, 'key': version, 'kind': 'xlsx'}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        # Decodificar el contenido base64 y leer como un DataFrame de pandas
        import io
        import base64
        content = base64.b64decode(response.json()['content'])
        return pd.read_excel(io.BytesIO(content))
    else:
        print(f"Error al obtener el dataset principal: {response.status_code}")
        return None

In [4]:
from typing import List, Dict

def get_metadata(keys: List[str], version: str, batch_size: int = 50) -> List[Dict]:
    """Obtiene los metadatos para las claves dadas, usando solicitudes POST y procesando por lotes"""
    url = f"{BASE_URL}metadata"
    all_metadata = []

    for i in range(0, len(keys), batch_size):
        batch_keys = keys[i:i+batch_size]
        data = {'api_key': API_KEY, 'keys[]': batch_keys, 'version': version}
        response = requests.post(url, data=data)
        
        if response.status_code == 200:
            all_metadata.extend(response.json()['items'])
        else:
            print(f"Error al obtener los metadatos para el lote {i//batch_size + 1}: {response.status_code}")
    
    return all_metadata

In [5]:
def get_all_texts_and_annotations(keys: List[str], version: str, batch_size: int = 50) -> List[Dict]:
    """Obtiene los textos y anotaciones para las claves dadas, procesando por lotes"""
    url = f"{BASE_URL}texts_and_annotations"
    all_texts_and_annotations = []

    for i in range(0, len(keys), batch_size):
        batch_keys = keys[i:i+batch_size]
        params = {'api_key': API_KEY, 'keys[]': batch_keys, 'version': version}
        response = requests.get(url, params=params)
        
        if response.status_code == 200:
            all_texts_and_annotations.extend(response.json()['items'])
        else:
            print(f"Error al obtener los textos y anotaciones para el lote {i//batch_size + 1}: {response.status_code}")
    
    return all_texts_and_annotations

In [6]:
def get_texts_and_annotations(keys, version):
    url = f"{BASE_URL}texts_and_annotations"
    params = {
        "api_key": API_KEY,
        "keys[]": keys,
        "version": version
    }
    response = requests.get(url, params=params)
    return response.json()

In [7]:
def create_corpus(id_perms, metadata_version, text_version):
    # Obtener metadata
    metadata = get_metadata(id_perms, metadata_version)
    
    # Obtener textos y anotaciones
    texts = get_texts_and_annotations([item['manifesto_id'] for item in metadata['items']], text_version)
    
    # Crear DataFrame
    df = pd.DataFrame(metadata['items'])
    
    # Añadir textos al DataFrame
    text_dict = {item['key']: item['text'] for item in texts['items']}
    df['full_text'] = df['manifesto_id'].map(text_dict)
    
    return df

In [8]:
def get_manifesto_texts(keys, version = "2024-1", translation='en'):
    url = "https://manifesto-project.wzb.eu/api/v1/texts_and_annotations"
    
    params = {
        "api_key": API_KEY,
        "keys[]": keys,
        "version": version
    }
    
    if translation:
        params["translation"] = translation
    
    response = requests.get(url, params=params)
    data = response.json()
    
    return data

In [9]:
core_version = 'MPDS2024a' 
core_df = get_core_dataset(core_version)


In [10]:
core_df

Unnamed: 0,country,countryname,oecdmember,eumember,edate,date,party,partyname,partyabbrev,parfam,...,per608_3,per703_1,per703_2,rile,planeco,markeco,welfare,intpeace,datasetversion,id_perm
0,11,Sweden,0,0,1944-09-17,194409,11220,Communist Party of Sweden,SKP,20,...,,,,9.600,1.900,1.900,0.000,1.900,2024a,JN1LZH
1,11,Sweden,0,0,1944-09-17,194409,11320,Social Democratic Labour Party,SAP,30,...,,,,-37.800,3.300,2.200,33.400,5.600,2024a,CMR7F6
2,11,Sweden,0,0,1944-09-17,194409,11420,People’s Party,FP,40,...,,,,9.500,3.200,6.400,14.300,1.600,2024a,Z6OL6C
3,11,Sweden,0,0,1944-09-17,194409,11620,Right Party,,60,...,,,,28.000,1.800,22.800,10.600,0.000,2024a,YMKVN2
4,11,Sweden,0,0,1944-09-17,194409,11810,Agrarian Party,,80,...,,,,23.810,0.000,19.048,0.000,4.762,2024a,U4SCRD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5146,181,South Africa,0,0,2019-05-08,201905,181510,African Christian Democratic Party,ACDP,50,...,0.0,5.000,0.0,12.500,0.833,7.500,14.167,0.000,2024a,HSQT6Y
5147,181,South Africa,0,0,2019-05-08,201905,181520,African Transformation Movement,ATM,50,...,0.0,1.042,0.0,-6.771,3.385,1.042,22.135,1.562,2024a,B6QCSE
5148,181,South Africa,0,0,2019-05-08,201905,181710,Freedom Front Plus,FF+,70,...,0.0,3.497,0.0,19.580,0.699,10.664,11.364,0.000,2024a,OS725O
5149,181,South Africa,0,0,2019-05-08,201905,181910,Inkatha Freedom Party,IFP,90,...,0.0,3.289,0.0,-6.579,0.658,0.658,28.947,0.000,2024a,XKM7J7


In [15]:
paises_hispanohablantes = [
    "Argentina", "Bolivia", "Chile", "Colombia", "Costa Rica", 
    "Cuba", "Dominican Republic", "Ecuador", "El Salvador", 
    "Equatorial Guinea", "Guatemala", "Honduras", "Mexico", 
    "Nicaragua", "Panama", "Paraguay", "Peru", "Spain", 
    "Uruguay", "Venezuela"
]

In [18]:
# Filtrar el DataFrame para los países hispanohablantes
core_df_filtrado = core_df[core_df['countryname'].isin(paises_hispanohablantes)]


In [21]:
core_df_filtrado.to_csv("Manifesto_spanish.csv")

In [12]:
import random
# Crear una copia de la lista para no modificar la original
keys_shuffle = keys.copy()

# Barajar la copia
random.shuffle(keys_shuffle)



In [13]:
keys_to_be_used = keys_shuffle[0:1000]

In [14]:
len(keys_to_be_used)

1000

In [40]:
metadata_version = '2024-1' 
metadata = get_metadata(keys_to_be_used, metadata_version)


In [41]:
metadata

[{'party_id': 33905,
  'election_date': '197706',
  'language': None,
  'source': None,
  'has_eu_code': None,
  'is_primary_doc': None,
  'may_contradict_core_dataset': None,
  'manifesto_id': None,
  'md5sum_text': None,
  'url_original': None,
  'md5sum_original': None,
  'annotations': None,
  'handbook': None,
  'is_copy_of': None,
  'title': 'Esquerra de Catalunya. Front Electoral Democratic. Programa',
  'translation_en': None},
 {'party_id': 95711,
  'election_date': '199709',
  'language': None,
  'source': None,
  'has_eu_code': None,
  'is_primary_doc': None,
  'may_contradict_core_dataset': None,
  'manifesto_id': None,
  'md5sum_text': None,
  'url_original': None,
  'md5sum_original': None,
  'annotations': None,
  'handbook': None,
  'is_copy_of': None,
  'title': 'Program srpske radikalne stranke',
  'translation_en': None},
 {'party_id': 41320,
  'election_date': '196109',
  'language': 'german',
  'source': 'CEMP',
  'has_eu_code': False,
  'is_primary_doc': True,
  '

In [42]:
available_keys = [item['manifesto_id'] for item in metadata if item.get('is_primary_doc', False)]


In [43]:
len(available_keys)

622

In [46]:
texts_and_annotations = get_all_texts_and_annotations(available_keys[0:10], metadata_version)


In [47]:
texts_and_annotations

[{'key': '171601_200306',
  'kind': '',
  'items': [{'text': 'Democracia con República.',
    'cmp_code': 'NA',
    'eu_code': 'NA'},
   {'text': 'Acción Nacional revalida su permanente compromiso con la justicia en la democracia en el ámbito de la libertad.',
    'cmp_code': '305',
    'eu_code': 'NA'},
   {'text': 'Ante las elecciones del 2003 para renovar la Cámara de Diputados del Congreso de la Unión, decimos con claridad para qué queremos el voto.',
    'cmp_code': '305',
    'eu_code': 'NA'},
   {'text': 'Nuestra plataforma muestra las acciones legislativas concretas que proponemos para realizar nuestras aspiraciones de justicia y realización del bien común, en el marco de las instituciones democráticas.',
    'cmp_code': '305',
    'eu_code': 'NA'},
   {'text': 'Pedimos el voto renovado de la ciudadanía para producir las leyes que la sociedad necesita, y para consolidar los cambios institucionales que den mayor eficacia a la democracia que las y los mexicanos construimos.',
   