# Contar doses de vacinas da COVID-19

In [4]:
import pandas as pd
from google.colab import auth
auth.authenticate_user()
print('Authenticated')
## Defina o id do seu projeto no bigquery!!!!!
project_id = 'enap-331414' # Defina o id do seu projeto no bigquery!!!!!
## Defina o id do seu projeto no bigquery!!!!!

Authenticated


### Dica: Restrinja seus testes aos estados do AC, AP e RR para diminuir o tamanho da sua Query.

In [None]:


df_vacina = pd.io.gbq.read_gbq('''
SELECT 
    sigla_uf
    ,vacina
    ,count(*) as qt_total
    ,sum(case when regexp_contains(dose,'1ª Dose$|^Dose$|Inicial') then 1 else 0 end) as qt_D1
    ,sum(case when regexp_contains(dose,'2ª Dose$') then 1 else 0 end) as qt_D2
    ,sum(case when regexp_contains(dose,'Reforço') then 1 else 0 end) as qt_Reforco
    ,sum(case when regexp_contains(dose,'Dose Adicional|3ª Dose') then 1 else 0 end) as qt_Adicional
    ,sum(case when regexp_contains(dose,'Única') then 1 else 0 end) as qt_Unica
FROM `basedosdados.br_ms_vacinacao_covid19.microdados_vacinacao`
where vacina in ('85','86','87','88','89') and sigla_uf in ('AC','AP','RR')
group by sigla_uf, vacina;
''', project_id=project_id)

df_vacina.head()

### Calcular a semana epidemiológica (domingo a sábado) a partir da data de vacinação

In [13]:
df_vacina = pd.io.gbq.read_gbq('''
SELECT 
    sigla_uf
    ,vacina
    ,(case 
        when vacina='86' then 'Coronavac'
        when vacina='87' then 'Pfizer'
        when vacina='88' then 'Janssen'
        else 'Astrazeneca'
      end
    ) as vacina_apelido
    ,DATE_TRUNC(data_aplicacao, WEEK(SUNDAY)) as semana
    ,DATE_TRUNC(data_aplicacao, MONTH) as mes
    ,count(*) as qt_total
    ,sum(case when regexp_contains(dose,'1ª Dose$|^Dose$|Inicial') then 1 else 0 end) as qt_D1
    ,sum(case when regexp_contains(dose,'2ª Dose$') then 1 else 0 end) as qt_D2
    ,sum(case when regexp_contains(dose,'Reforço') then 1 else 0 end) as qt_Reforco
    ,sum(case when regexp_contains(dose,'Dose Adicional|3ª Dose') then 1 else 0 end) as qt_Adicional
    ,sum(case when regexp_contains(dose,'Única') then 1 else 0 end) as qt_Unica
FROM `basedosdados.br_ms_vacinacao_covid19.microdados_vacinacao`
where vacina in ('85','86','87','88','89') 
group by sigla_uf, vacina, semana, mes
''', project_id=project_id)

df_vacina.head()


Unnamed: 0,sigla_uf,vacina,vacina_apelido,semana,mes,qt_total,qt_D1,qt_D2,qt_Reforco,qt_Adicional,qt_Unica
0,RO,85,Astrazeneca,2021-01-31,2021-02-01,6870,6857,13,0,0,0
1,MS,85,Astrazeneca,2021-09-05,2021-09-01,11662,241,11421,0,0,0
2,MS,85,Astrazeneca,2021-01-24,2021-01-01,12107,12089,18,0,0,0
3,PE,85,Astrazeneca,2021-10-24,2021-10-01,64944,305,64576,43,20,0
4,SP,85,Astrazeneca,2021-07-18,2021-07-01,955784,397788,557966,4,26,0


In [14]:
df_vacina.to_gbq("enapdatasets.vacinacao",
  project_id=project_id,
  chunksize=40000,
  if_exists='replace',
  )

1it [00:03,  3.66s/it]


## Query para projeção de 2a dose da vacina

In [22]:
df_vacina_proj = pd.io.gbq.read_gbq('''
SELECT v.sigla_uf, v.vacina_apelido, v.semana, v.mes, v.qt_total, v.qt_D1, v.qt_D2, v.qt_Reforco, v.qt_Adicional, v.qt_Unica, vp.qt_D2_Proj, vp.semana_proj, vp.sigla_uf_proj, vp. vacina_apelido_proj
FROM `enap-331414.enapdatasets.vacinacao` v
JOIN (
    SELECT sigla_uf as sigla_uf_proj, vacina_apelido as vacina_apelido_proj, qt_D1 as qt_D2_Proj, DATE_ADD(semana, INTERVAL 56 DAY) as semana_proj 
    FROM `enap-331414.enapdatasets.vacinacao`
    ) as vp 
ON v.sigla_uf=vp.sigla_uf_proj and v.vacina_apelido=vp.vacina_apelido_proj and vp.semana_proj=v.semana 
order by v.sigla_uf, v.vacina, v.semana, vp.semana_proj, vp.sigla_uf_proj, vp.vacina_apelido_proj
''', project_id=project_id)

df_vacina_proj.head()

Unnamed: 0,sigla_uf,vacina_apelido,semana,mes,qt_total,qt_D1,qt_D2,qt_Reforco,qt_Adicional,qt_Unica,qt_D2_Proj,semana_proj
0,,,NaT,NaT,,,,,,,11.0,2021-10-31 00:00:00+00:00
1,,,NaT,NaT,,,,,,,771.0,2021-11-21 00:00:00+00:00
2,,,NaT,NaT,,,,,,,217.0,2021-11-21 00:00:00+00:00
3,,,NaT,NaT,,,,,,,73.0,2022-01-09 00:00:00+00:00
4,,,NaT,NaT,,,,,,,318.0,2021-11-21 00:00:00+00:00


In [23]:
df_vacina_proj['semana_proj'].max()

Timestamp('2022-01-09 00:00:00+0000', tz='UTC')

### Query com parâmetro



```
SELECT v.sigla_uf, v.vacina_apelido, v.semana, v.mes, v.qt_total, v.qt_D1, v.qt_D2, v.qt_Reforco, v.qt_Adicional, v.qt_Unica, vp.qt_D2_Proj, vp.semana_proj, vp.sigla_uf_proj, vp. vacina_apelido_proj
FROM `enap-331414.enapdatasets.vacinacao` v
JOIN (
    SELECT sigla_uf as sigla_uf_proj, vacina_apelido as vacina_apelido_proj, qt_D1 as qt_D2_Proj, DATE_ADD(semana, INTERVAL @qtd_dias_proj_d2 DAY) as semana_proj 
    FROM `enap-331414.enapdatasets.vacinacao`
    ) as vp 
ON v.sigla_uf=vp.sigla_uf_proj and v.vacina_apelido=vp.vacina_apelido_proj and vp.semana_proj=v.semana 
order by v.sigla_uf, v.vacina, v.semana, vp.semana_proj, vp.sigla_uf_proj, vp.vacina_apelido_proj
```



### Corrigindo anos menores do que 2021

In [26]:
df_vacina = pd.io.gbq.read_gbq('''
SELECT 
    sigla_uf
    ,vacina
    ,(case 
        when vacina='86' then 'Coronavac'
        when vacina='87' then 'Pfizer'
        when vacina='88' then 'Janssen'
        else 'Astrazeneca'
      end
    ) as vacina_apelido
    ,(case when EXTRACT(YEAR FROM data_aplicacao) >= 2021 then DATE_TRUNC(data_aplicacao, WEEK(SUNDAY)) else PARSE_DATE('%d/%m/%Y',  '03/01/2021') end) as semana
    ,DATE_TRUNC(data_aplicacao, MONTH) as mes
    ,count(*) as qt_total
    ,sum(case when regexp_contains(dose,'1ª Dose$|^Dose$|Inicial') then 1 else 0 end) as qt_D1
    ,sum(case when regexp_contains(dose,'2ª Dose$') then 1 else 0 end) as qt_D2
    ,sum(case when regexp_contains(dose,'Reforço') then 1 else 0 end) as qt_Reforco
    ,sum(case when regexp_contains(dose,'Dose Adicional|3ª Dose') then 1 else 0 end) as qt_Adicional
    ,sum(case when regexp_contains(dose,'Única') then 1 else 0 end) as qt_Unica
FROM `basedosdados.br_ms_vacinacao_covid19.microdados_vacinacao`
where vacina in ('85','86','87','88','89') 
group by sigla_uf, vacina, semana, mes
''', project_id=project_id)


df_vacina.to_gbq("enapdatasets.vacinacao",
  project_id=project_id,
  chunksize=40000,
  if_exists='replace',
  )
df_vacina.head()


1it [00:04,  4.31s/it]


Unnamed: 0,sigla_uf,vacina,vacina_apelido,semana,mes,qt_total,qt_D1,qt_D2,qt_Reforco,qt_Adicional,qt_Unica
0,MS,85,Astrazeneca,2021-08-22,2021-08-01,46451,182,46269,0,0,0
1,RO,85,Astrazeneca,2021-07-25,2021-07-01,47228,28063,19165,0,0,0
2,RO,85,Astrazeneca,2021-05-02,2021-05-01,23157,16791,6366,0,0,0
3,DF,85,Astrazeneca,2021-04-18,2021-04-01,38761,35249,3512,0,0,0
4,DF,85,Astrazeneca,2021-05-02,2021-05-01,70975,51711,19264,0,0,0


## Query de Projeção com FULL OUTER JOIN (BigQuery)

In [27]:
df_vacina_proj = pd.io.gbq.read_gbq('''
SELECT v.sigla_uf, v.vacina_apelido, v.semana, v.mes, v.qt_total, v.qt_D1, v.qt_D2, v.qt_Reforco, v.qt_Adicional, v.qt_Unica, vp.qt_D2_Proj, vp.semana_proj, vp.sigla_uf_proj, vp. vacina_apelido_proj
FROM `enap-331414.enapdatasets.vacinacao` v
FULL OUTER JOIN (
    SELECT sigla_uf as sigla_uf_proj, vacina_apelido as vacina_apelido_proj, qt_D1 as qt_D2_Proj, DATE_ADD(semana, INTERVAL 56 DAY) as semana_proj 
    FROM `enap-331414.enapdatasets.vacinacao`
    ) as vp ON v.sigla_uf=vp.sigla_uf_proj and v.vacina_apelido=vp.vacina_apelido_proj and vp.semana_proj=v.semana 
order by v.sigla_uf, v.vacina, v.semana, vp.semana_proj, vp.sigla_uf_proj, vp.vacina_apelido_proj

''', project_id=project_id)

df_vacina_proj.head()

Unnamed: 0,sigla_uf,vacina_apelido,semana,mes,qt_total,qt_D1,qt_D2,qt_Reforco,qt_Adicional,qt_Unica,qt_D2_Proj,semana_proj
0,,,NaT,NaT,,,,,,,31.0,2021-12-12 00:00:00+00:00
1,,,NaT,NaT,,,,,,,1384.0,2022-01-02 00:00:00+00:00
2,,,NaT,NaT,,,,,,,4.0,2021-11-14 00:00:00+00:00
3,,,NaT,NaT,,,,,,,11.0,2021-12-12 00:00:00+00:00
4,,,NaT,NaT,,,,,,,0.0,2022-01-02 00:00:00+00:00


```
SELECT v.sigla_uf, v.vacina_apelido, v.semana, v.mes, v.qt_total, v.qt_D1, v.qt_D2, v.qt_Reforco, v.qt_Adicional, v.qt_Unica, vp.qt_D2_Proj, vp.semana_proj, vp.sigla_uf_proj, vp. vacina_apelido_proj
FROM `enap-331414.enapdatasets.vacinacao` v
FULL OUTER JOIN (
    SELECT sigla_uf as sigla_uf_proj, vacina_apelido as vacina_apelido_proj, qt_D1 as qt_D2_Proj, DATE_ADD(semana, INTERVAL @qtd_dias_proj_d2 DAY) as semana_proj 
    FROM `enap-331414.enapdatasets.vacinacao`
    ) as vp ON v.sigla_uf=vp.sigla_uf_proj and v.vacina_apelido=vp.vacina_apelido_proj and vp.semana_proj=v.semana 
order by v.sigla_uf, v.vacina, v.semana, vp.semana_proj, vp.sigla_uf_proj, vp.vacina_apelido_proj

```

### Daria para calcular o conjunto de todas as semanas no python/pandas?