In [1]:
from google.cloud import bigquery
import re
import os
import pandas as pd
from google.cloud import storage
import io
project_id = "interseguro-data"
client = bigquery.Client(project_id)

In [175]:
query = """
SELECT
TABLE_NAME
,COLUMN_NAME
,COLUMN_ID
,1 AS INDICADOR
FROM `interseguro-data.acsele_data.ALL_TAB_COLUMNS_raw`
--WHERE TABLE_NAME IN ('RECIBIRDOCUMENTO', 'DERIVARCOMERCIAL')
--WHERE TABLE_NAME IN ('APLAZARPROPUESTA','CAMBIARPROPUESTA','OBSERVARPROPUESTA')
WHERE TABLE_NAME IN 
('ENDOSARND','ENDOSARVCASH','EXCLUIRCOBENDOSO'
,'ENDOSARAPLAZADO','ENDOSARRECHAZADO','ENDOSAR','ENDOSARVIDA'
,'ENDOSARINSPECCIONVEH','ENDOSARVP','ENDOSARSALPROSURA','ENDOSARFLEX'
,'ENDOSARVEH','ENDOSARPROPRAPIDA','ENDOSOCAMBIOFONDOFLEX'
,'ENDOSARACC','ENDOSARPROPUESTA','ENDOSARDESISTIDO','ENDOSARVF','ENDOSARVLC'
,'ENDOSARRH','ENDOSARSURA')

ORDER BY TABLE_NAME,COLUMN_ID ASC
"""

In [176]:
def sqlbigquery(filename):
    with open(filename,"r") as file:
        statement = file.readlines()
        statement = " ".join(statement)
    print(f" {filename}")
    return statement

def read_table_bq(sql):
    df = pd.read_gbq(sql, project_id=project_id, dialect='standard')
    return df

def load_data(df,filename):
    par_project_output = 'interseguro-data'
    par_bucket_output = 'interseguro-datalake-prod'
    client_output = storage.Client(par_project_output)
    bucket_output = client_output.bucket(par_bucket_output)
    bytes_writer = io.BytesIO()
    df_bytes = df.to_parquet(bytes_writer,engine="pyarrow")
    data_length = bytes_writer.tell()
    print(f"[INFO] size file : {data_length}")
    bytes_writer.seek(0)
    blob_n = bucket_output.blob(f"SPARK_TABLE/ACSELE/{filename}/{filename}.parquet")
    blob_n.upload_from_string(
        data=bytes_writer.getvalue(),
        content_type='application/octet-stream',
        client=client_output
    )


In [177]:
df = read_table_bq (query)

  df = pd.read_gbq(sql, project_id=project_id, dialect='standard')


In [178]:
pivot_df = df.pivot(index='COLUMN_NAME', columns='TABLE_NAME', values='INDICADOR')
pivot_df = pivot_df.fillna(0)
pivot_df.to_excel('testeo.xlsx')
columnas = pivot_df.columns
print(columnas)
df_temp = pd.read_excel('testeo.xlsx')

Index(['ENDOSAR', 'ENDOSARACC', 'ENDOSARAPLAZADO', 'ENDOSARDESISTIDO',
       'ENDOSARFLEX', 'ENDOSARINSPECCIONVEH', 'ENDOSARND', 'ENDOSARPROPRAPIDA',
       'ENDOSARPROPUESTA', 'ENDOSARRECHAZADO', 'ENDOSARRH',
       'ENDOSARSALPROSURA', 'ENDOSARSURA', 'ENDOSARVCASH', 'ENDOSARVEH',
       'ENDOSARVF', 'ENDOSARVIDA', 'ENDOSARVLC', 'ENDOSARVP',
       'ENDOSOCAMBIOFONDOFLEX', 'EXCLUIRCOBENDOSO'],
      dtype='object', name='TABLE_NAME')


In [186]:
def aplicar_condicion(row,num_filas,columnas):
    resultado = {}
    for col in columnas:
        if row.name == num_filas -1 :
            resultado[col] =  'FROM INTERSEGURO.'+col + '\nUNION ALL ' 
        elif row.name == 0 :
            resultado[col] = f"SELECT \n'{col}' AS ESTADO \n," + ( row['COLUMN_NAME'] if row[col]==1 else 'NULL AS ' + row['COLUMN_NAME'])
        else:
            resultado[col] = ','+ row['COLUMN_NAME'] if row[col]== 1 else ',NULL AS ' + row['COLUMN_NAME']
    return pd.Series(resultado)

In [187]:
def generate_file(df):
    with open('output.sql', 'w') as file:
        for col in columnas:
            for value in df[col]:
                file.write(str(value) + '\n')
                
num_filas = df_temp.shape[0]
#nuevas_columnas = df_temp.apply(aplicar_condicion, axis=1)
nuevas_columnas = df_temp.apply(lambda row: aplicar_condicion(row, num_filas, columnas), axis=1)

df_t = pd.concat([df_temp['COLUMN_NAME'], nuevas_columnas], axis=1)                
generate_file(df_t)            

In [188]:
df_temp

Unnamed: 0,COLUMN_NAME,ENDOSAR,ENDOSARACC,ENDOSARAPLAZADO,ENDOSARDESISTIDO,ENDOSARFLEX,ENDOSARINSPECCIONVEH,ENDOSARND,ENDOSARPROPRAPIDA,ENDOSARPROPUESTA,...,ENDOSARSALPROSURA,ENDOSARSURA,ENDOSARVCASH,ENDOSARVEH,ENDOSARVF,ENDOSARVIDA,ENDOSARVLC,ENDOSARVP,ENDOSOCAMBIOFONDOFLEX,EXCLUIRCOBENDOSO
0,APLAZARENDOSOINPUT,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,APLAZARENDOSOVALUE,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,CONTOIPENDINPUT,0,1,0,0,1,0,0,0,0,...,1,1,1,1,1,1,1,1,0,0
3,CONTOIPENDVALUE,0,1,0,0,1,0,0,0,0,...,1,1,1,1,1,1,1,1,0,0
4,DESISTIRENDOSOINPUT,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128,TIPOENDOSOVALUE,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
129,TIPOVIGENCIAENDACCINPUT,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
130,TIPOVIGENCIAENDACCVALUE,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
131,TIPOVIGENCIAENDOSOINPUT,0,0,0,0,0,0,0,0,0,...,1,1,1,0,1,1,1,1,0,0
