In [1]:
pip install tableauhyperapi pandas

Collecting tableauhyperapi
  Using cached tableauhyperapi-0.0.19484-py3-none-win_amd64.whl.metadata (1.3 kB)
Collecting pandas
  Using cached pandas-2.2.2-cp39-cp39-win_amd64.whl.metadata (19 kB)
Collecting cffi!=1.14.3,<2,>=1.12.2 (from tableauhyperapi)
  Using cached cffi-1.16.0-cp39-cp39-win_amd64.whl.metadata (1.5 kB)
Collecting numpy>=1.22.4 (from pandas)
  Using cached numpy-2.0.0-cp39-cp39-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting pycparser (from cffi!=1.14.3,<2,>=1.12.2->tableauhyperapi)
  Using cached pycparser-2.22-py3-none-any.whl.metadata (943 bytes)
Using cached tableauhyperapi-0.0.19484-py3-none-win_amd64.whl (53.8 MB)
Using cached pandas-2.2.2-cp39-cp39-win_amd64.whl (11.6 MB)
Using cached cffi-1.16.0-cp39-cp39-win_amd64.whl (181 kB)
Using cached numpy-2.0.0-cp39-

In [3]:
import zipfile
import os
from tableauhyperapi import HyperProcess, Connection, Telemetry, TableName, Inserter, HyperException
import pandas as pd
import requests

In [13]:
twbx_url = 'https://public.tableau.com/workbooks/Atenciones_HisMinsa_17005837025700.twb'
twbx_file_path = 'Atenciones_HisMinsa.twbx'
extracted_folder = 'folder_extraido'
output_csv_path = 'tengohambre.csv'

In [None]:
# Descargar el archivo .twbx
response = requests.get(twbx_url)
with open(twbx_file_path, 'wb') as file:
    file.write(response.content)
print(f"Archivo descargado: {twbx_file_path}")

In [14]:
# Descomprimir el archivo .twbx
with zipfile.ZipFile(twbx_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder)

In [15]:
# Buscar archivos .hyper o .tde dentro de la carpeta extraída
data_file_path = None
for root, dirs, files in os.walk(extracted_folder):
    for file in files:
        if file.endswith('.hyper') or file.endswith('.tde'):
            data_file_path = os.path.join(root, file)
            break

print(data_file_path)

folder_extraido\Data\TableauTemp\TEMP_0ifl42b0fpp8d811cnnoi11ygtcj.hyper


In [7]:
# Lista de columnas que queremos conservar
desired_columns = ["ambito", "diris", "aniomes", "etapa", "id_genero", "eess_n", "eess_c", "eess_r", "fec_aten", "diriss", "Departamento", "Provincia", "Distrito", "REGION", "AREA_Res", "CCDD", "NOMBDEP", "CCPP", "NOMBPROV", "Area", "Length"]

# Información extraída del log
num_rows = 2188790  # Número de filas
num_cols = 24  # Número de columnas
result_size_mb = 63124.1  # Tamaño del resultado en MB

# Imprimir información del conjunto de datos
print(f"Cantidad de filas: {num_rows}")
print(f"Cantidad de columnas: {num_cols}")
print(f"Tamaño del resultado: {result_size_mb} MB")



Cantidad de filas: 2188790
Cantidad de columnas: 24
Tamaño del resultado: 63124.1 MB


In [None]:
# Columnas deseadas para la salida
desired_columns = [
    "ambito", "diris", "aniomes", "etapa", "id_genero", "eess_n", "eess_c", "eess_r", 
    "fec_aten", "diriss", "Departamento", "Provincia", "Distrito", "REGION", "AREA_Res", 
    "CCDD", "NOMBDEP", "CCPP", "NOMBPROV", "Area", "Length"
]

# Definir filtros para la consulta
year_filter = "2023"
etapa_filter = "01"  # Ejemplo: "< 01 mes" debería ser ajustado según el formato de los datos
region_filter = "AREQUIPA"
genero_filter = "F"

# Construir la consulta SQL con los filtros necesarios
columns_str = ', '.join(desired_columns)
query = f"""
SELECT {columns_str} 
FROM "Extract"."Extract" 
WHERE aniomes LIKE '{year_filter}%' 
AND etapa = '{etapa_filter}' 
AND Departamento = '{region_filter}'
AND id_genero = '{genero_filter}'
"""


In [26]:
# Columnas deseadas (excluyendo Geometry, Geometry1 y Geometry2)
desired_columns = [
    "ambito", "diris", "aniomes", "etapa", "id_genero", "eess_n", "eess_c", "eess_r", 
    "fec_aten", "diriss", "Departamento", "Provincia", "Distrito", "REGION", "AREA_Res", 
    "CCDD", "NOMBDEP", "CCPP", "NOMBPROV", "Area", "Length"
]

# Definir filtros para la consulta
year_filter = "2023"
etapa_filter = "0"  # Ejemplo de etapa
region_filter = "AREQUIPA"
genero_filter = "F"

# Construir la consulta SQL con los filtros necesarios
columns_str = ', '.join(desired_columns)
query = f"""
SELECT {columns_str} 
FROM "Extract"."Extract" 
WHERE aniomes LIKE '{year_filter}%' 
AND etapa = '{etapa_filter}' 
AND Departamento = '{region_filter}'
AND id_genero = '{genero_filter}'
"""

In [None]:
query = """
SELECT * 
FROM "Extract"."Extract" 
WHERE aniomes LIKE '2023%' 
AND etapa = '0' 
AND id_genero = 'F'
AND diris = 'AREQUIPA'
LIMIT 100
"""

In [39]:
query = """
SELECT DISTINCT etapa
FROM "Extract"."Extract"
"""

In [57]:
# Conectar al archivo .hyper y listar las tablas
with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
    with Connection(endpoint=hyper.endpoint, database=data_file_path) as connection:
        # Listar las tablas disponibles en el archivo .hyper
        catalog = connection.catalog
        schema_names = catalog.get_schema_names()
        
        for schema in schema_names:
            table_names = catalog.get_table_names(schema=schema)
            print(f"Tablas en el esquema '{schema}': {table_names}")

        # Acceder a la tabla identificada
        table_name = TableName('Extract', 'Extract')  # Usar el esquema y nombre de tabla correctos
        if table_name in table_names:
            # Crear una lista para almacenar las filas
            df_list = []

            try:
                # Iterar sobre las filas del resultado y agregarlas a la lista
                with connection.execute_query(query=query) as result:
                    for row in result:
                        row_data = {desired_columns[i]: row[i] for i in range(len(desired_columns))}
                        df_list.append(row_data)
            except HyperException as e:
                print(f"Error al ejecutar la consulta: {e}")

            # Convertir la lista de filas a un DataFrame
            df = pd.DataFrame(df_list, columns=desired_columns)

            # Guardar el DataFrame en un archivo CSV
            df.to_csv(output_csv_path, index=False)
            print(f"Datos guardados en {output_csv_path}")
        else:
            print(f"La tabla '{table_name}' no existe en el archivo .hyper")

Tablas en el esquema '"public"': []
Tablas en el esquema '"Extract"': [TableName('Extract', 'Extract')]
Datos guardados en archivo_salida.csv


In [56]:
# Columnas deseadas (excluyendo Geometry, Geometry1 y Geometry2)
desired_columns = [
    "ambito", "diris", "aniomes", "etapa", "id_genero", "eess_n", "eess_c", "eess_r", 
    "fec_aten", "diriss", "Departamento", "Provincia", "Distrito", "REGION", "AREA_Res", 
    "CCDD", "NOMBDEP", "CCPP", "NOMBPROV", "Area", "Length"
]

# Definir filtros para la consulta
year_filter = "2023"
etapa_filter = "0"  # Índice de etapa para '< 01 mes'
genero_filter = "F"
region_filter = "AREQUIPA"

# Construir la consulta SQL
query = f"""
SELECT * 
FROM "Extract"."Extract" 
WHERE aniomes LIKE '{year_filter}%' 
"""
