In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

import pydata_google_auth
from google.cloud import bigquery

from sksurv.nonparametric import kaplan_meier_estimator
from sksurv.compare import compare_survival

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [2]:
credentials = pydata_google_auth.get_user_credentials(
    ['https://www.googleapis.com/auth/bigquery'], credentials_cache=pydata_google_auth.cache.NOOP
)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=262006177488-3425ks60hkk80fssi9vpohv88g6q1iqd.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fbigquery&state=I9dmLYy3UjHbj4ePo8rVYU8hcClbuH&prompt=consent&access_type=offline


In [6]:
from google.cloud import bigquery
from google.api_core.exceptions import GoogleAPIError
from datetime import datetime
import pandas as pd


def check_client_connection(client):
    """
    Verifica se o cliente do BigQuery está conectado e pode acessar o serviço corretamente.
    
    Args:
        client (bigquery.Client): Uma instância do cliente BigQuery.
    
    Returns:
        bool: Retorna True se o cliente estiver conectado, False caso contrário.
    """
    try:
        # Tenta listar os datasets disponíveis no projeto para verificar a conexão
        _ = list(client.list_datasets())
        return True
    except GoogleAPIError as e:
        print(f"Erro de conexão: {e}")
        return False


def config_client(project_id):
    project_id = project_id.lower()
    if project_id == 'prod' or project_id == 'petlove-dataeng-prod-01':
        project_id = 'petlove-dataeng-prod-01'
    elif project_id == 'stag' or  project_id == 'petlove-dataeng-stag-01' or project_id == 'staging':
        project_id = 'petlove-dataeng-stag-01'
    else:
        raise ValueError("Este é um nome de projeto inválido!")
    
    client = bigquery.Client(project=project_id)
    return client

In [7]:
project_id = 'petlove-dataeng-prod-01'

client = bigquery.Client(project=project_id)

DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.

In [None]:
sql = f"""
with final_base as (
  select * from (
  with
  setup as (
    with
    base as (
      select
        concat('nf-',usp.id) as id
        , parse_date('%d/%m/%Y', substr(usp.digital_signature, 23, 10)) as data_assinatura_contrato
        , b.dt_microchipagem
      from `petlove-dataeng-prod-01.curated_health.health_user_pet` usp
      left join `petlove-dataeng-prod-01.op_health.beneficiarios` b
        on concat('nf-',usp.id) = b.id_pet_bu
      where 1=1
        and digital_signature is not null
        and usp.refdate <= current_date()
        and substr(usp.digital_signature, 23, 10) <> ''
    )
    select
      id as id_pet_bu
      , case
          when data_assinatura_contrato is null then null
          when dt_microchipagem is null then null
          when data_assinatura_contrato is not null and dt_microchipagem is not null then
            case
              when data_assinatura_contrato > dt_microchipagem then data_assinatura_contrato
              when dt_microchipagem > data_assinatura_contrato then dt_microchipagem
            end
      end as data_setup
    from base
  ),
  vacina as (
    select distinct
      id_pet_bu
      , max(data_origem_date) as data_vacina
    from `petlove-dataeng-prod-01.op_health.custos_medicos`
    where 1=1
      and status = 'Ativo'
      and sistema_origem_custo = 'ERP Nofaro'
      and upper(nome_procedimento) like '%VACINA%'
      and upper(nome_procedimento) not like '%CONSULTA%'
    group by 1
  ),
  compra_add as (
    select
      id_pet_bu
      , max(dt_pagamento) as data_pagamento
    from `petlove-dataeng-prod-01.op_health.faturamento_pet_nofaro`
    where 1=1
      and vlr_carencia <> 0
    group by 1
  ),
  clinico_geral as (
    select
      id_pet_bu
      , max(data_origem_date) as data_atend_clin_geral
    from `petlove-dataeng-prod-01.op_health.custos_medicos`
    where 1=1
      and status = 'Ativo'
      and sistema_origem_custo = 'ERP Nofaro'
      and (upper(nome_procedimento) like '%CONSULTA%' and upper(nome_procedimento) like '%GERAL%' and upper(nome_procedimento) not like '%VACINA%')
    group by id_pet_bu
  ),
  pedido_ecom as (
    select
      b.id_pet_bu
      , b.nome_pet
      , case
          when max(fpf1.chv_data_emissao_nota_fiscal) > max(fpf2.chv_data_emissao_nota_fiscal) then max(fpf1.chv_data_emissao_nota_fiscal)
          when max(fpf1.chv_data_emissao_nota_fiscal) < max(fpf2.chv_data_emissao_nota_fiscal) then max(fpf2.chv_data_emissao_nota_fiscal)
          when max(fpf1.chv_data_emissao_nota_fiscal) = max(fpf2.chv_data_emissao_nota_fiscal) then max(fpf1.chv_data_emissao_nota_fiscal)
      end as dt_pedido_ecom
    from `petlove-dataeng-prod-01.op_health.beneficiarios` b
    left join `petlove-dataeng-prod-01.dw_corporativo.dim_cliente` dc1
      on dc1.cpf_aberto = b.documento
    left join `petlove-dataeng-prod-01.dw_corporativo.dim_cliente` dc2
        on dc2.email_aberto = lower(trim(b.email))
    left join `petlove-dataeng-prod-01.dw_corporativo.ft_pedido_faturado` fpf1
      on dc1.chv_cliente = fpf1.chv_cliente
    left join `petlove-dataeng-prod-01.dw_corporativo.ft_pedido_faturado` fpf2
      on dc2.chv_cliente = fpf2.chv_cliente
    group by 1,2
  ),
  beneficiarios as (
    select
      id_tutor,
      idade_pet_venda,
      idade_pet_anos,
      cast(id_pet as int64) as id_pet,
      nome_pet,
      dt_inclusao_pet,
      date_diff( coalesce(dt_desligamento, current_date),dt_inclusao_pet, day) as tempo_ativo,
      fl_ativo,
      especie,
      status_microchip,
      coalesce(dt_desligamento, current_date) as dt_desligamento_nn,
      case
        when lower(nome_plano_dts_pet) like '%leve%' then 'leve'
        when lower(nome_plano_dts_pet) like '%tranquilo%' then 'tranquilo'
        when lower(nome_plano_dts_pet) like '%ideal%' then 'ideal'
        when lower(nome_plano_dts_pet) like '%completo%' then 'completo'
        when lower(nome_plano_dts_pet) like '%essencial%' then 'essencial'
        else 'outros' 
      end as plano,
      fl_inside_sales
    from `op_health.beneficiarios`
    where 1=1
      and fl_contrato_migrado=0
      and fl_pet_sem_inclusao=0
      and fl_colaborador=0
      and dt_inclusao_pet>='2022-01-01'
      and idade_pet_venda is not null 
      and idade_pet_venda>=0
      AND empresa = 'Nofaro'
      AND especie is not null
      AND (motivo_desligamento not in ('Óbito', 'defaulting_wating', 'defaulting_failed', 'death', 'defaulting', 'Morte de pet', 'Inadimplência', 'Inadimplência: aguardando', 'Inadimplência: falha no p')
      or motivo_desligamento is null)
  )
  , simultaneos as (
    select
      b1.id_tutor,
      b1.id_pet,
      count(distinct b2.id_pet) as pets_simultaneos_ativos
    from beneficiarios b1
    left join beneficiarios b2
      on b1.id_tutor = b2.id_tutor
      and ((b1.dt_desligamento_nn > b2.dt_inclusao_pet and b1.dt_desligamento_nn <= b2.dt_desligamento_nn)
      or (b1.dt_inclusao_pet > b2.dt_inclusao_pet and b1.dt_inclusao_pet <= b2.dt_desligamento_nn))
    group by 1, 2
  )
  select
    b.id_tutor,
    b.idade_pet_venda,
    b.idade_pet_anos,
    b.id_pet,
    b.nome_pet,
    b.dt_inclusao_pet,
    (case when b.tempo_ativo<0 then 0 else b.tempo_ativo end) as tempo_ativo,
    cast(b.fl_ativo as bool) as fl_ativo,
    b.plano,
    b.especie,
    b.status_microchip,
    b.fl_inside_sales,
    s.pets_simultaneos_ativos,
    st.data_setup as dt_setup,
    v.data_vacina as dt_max_vacina,
    ca.data_pagamento as dt_max_compra_add,
    cg.data_atend_clin_geral as dt_max_clin_geral,
    pe.dt_pedido_ecom as dt_max_pedido_ecom
  from beneficiarios b
  join simultaneos s
    on b.id_tutor = s.id_tutor 
    and b.id_pet = s.id_pet
  left join setup st
    on concat('nf-',b.id_pet) = st.id_pet_bu
  left join vacina v
    on concat('nf-',b.id_pet) = v.id_pet_bu
  left join compra_add ca
    on concat('nf-',b.id_pet) = ca.id_pet_bu
  left join clinico_geral cg
    on concat('nf-',b.id_pet) = cg.id_pet_bu
  left join pedido_ecom pe
    on concat('nf-',b.id_pet) = pe.id_pet_bu
    where b.plano not in ('outros')
  )
  unpivot(data for habito in (dt_inclusao_pet as 'inclusao', dt_setup as 'setup', dt_max_vacina as 'vacina', dt_max_compra_add as 'add', dt_max_clin_geral as 'clinico', dt_max_pedido_ecom as 'ecom'))
)
select
  f.id_tutor
  , f.idade_pet_venda
  , f.idade_pet_anos
  , f.id_pet
  , b.dt_inclusao_pet
  , coalesce(b.dt_desligamento, current_date()) as dt_desligamento
  , f.nome_pet
  , f.tempo_ativo
  , case
      when f.fl_ativo = true then false
      when f.fl_ativo = false then true
  end as fl_ativo
  , f.plano
  , f.especie
  , f.status_microchip
  , f.fl_inside_sales
  , f.pets_simultaneos_ativos
  , string_agg(f.habito order by data) as mix_procedimentos_data
  , string_agg(f.habito order by f.habito) as mix_procedimentos_alf
  , split(string_agg(f.habito order by data desc), ',')[offset(0)] ultimo_evento
  , max(f.data) as data_ultimo_evento
  , case when date_diff(coalesce(b.dt_desligamento, current_date()),max(f.data), day)<0 then 0
      else date_diff(coalesce(b.dt_desligamento, current_date()),max(f.data), day) end as survival_in_days
from final_base f
left join `petlove-dataeng-prod-01.op_health.beneficiarios` b
  on concat('nf-',f.id_pet) = b.id_pet_bu
group by 1,2,3,4,5,6,7,8,9,10,11,12,13,14, b.dt_desligamento
order by 4 asc
"""

df = client.query(sql).to_dataframe()

In [None]:
df.info()