In [1]:
import polars
from itertools import repeat

global q_gen
q_gen = (y+str(x) for x,y in enumerate(repeat('q')))

def get_next_q(*_, **__):
    return next(q_gen)

In [2]:
def _specialize_country(country: str):
    def _(q: str) -> str:
        return str.format(q, COUNTRY=country)
    return _

def specialize_country(country: str):
    return (polars.col('pregunta')
                .map_elements(
                    _specialize_country(country), 
                    return_dtype = polars.String))

countries = (
    ('arg', 'Argentina'), 
    ('tur', 'Turkey'), 
    ('ukr', 'Ukraine'), 
    ('egy', 'Egypt')
)

colnames_map = dict(
    Preguntas   = 'pregunta',
    Etiqueta    = 'etiqueta',
    Testeada    = 'testeada',
    Incluir     = 'incluir',
    Comentarios = 'comentarios',
)

etiquetas_map = {
    'Gasto social'              : 'gasto_social',
    'Control de capitales'      : 'control_capitales',
    'Gasto social/conocimiento' : 'gasto_social_conocimiento',
    'Inflación'                 : 'inflacion',
    'Distribución del ingreso'  : 'distribucion_ingreso' 
}

bool_map = dict(
    Sí = True,
    No = False
)

def parametrize_country(x: str):
    return x.replace('COUNTRY', '{COUNTRY}')

In [3]:
preguntas = (
    polars.scan_csv('../data/preguntas.csv')
        .rename(colnames_map)
        .drop('comentarios')
        .with_columns([

            polars.col('pregunta')
                .map_elements(
                    function = parametrize_country,
                    return_dtype = polars.String
                ),

            polars.col('incluir')
                .map_elements(
                    function = bool_map.get, 
                    return_dtype = polars.Boolean),

            polars.col('testeada')
                .map_elements(
                    function = bool_map.get,
                    return_dtype = polars.Boolean
                ),
            
            polars.col('etiqueta')
                .map_elements(
                    function = etiquetas_map.get,
                    return_dtype = polars.String
                ),
        ])

    .filter(polars.col('incluir') & ~polars.col('testeada'))
    .drop('incluir', 'testeada')
    
    .with_columns([
        polars.col('pregunta')
            .map_elements(
                get_next_q,
                return_dtype=polars.String)
            .alias('qid')
    ])

    .select('qid', 'pregunta', 'etiqueta') # reordeno cols.
)

preguntas = preguntas.collect()

In [4]:
for short_name, long_name in countries: (
        preguntas
            .with_columns(specialize_country(long_name))
            .write_csv(f'../data/preguntas_clean_{short_name}.csv')
)