Obtenção dos dados

In [18]:
import ee

ee.Initialize()
ee.Authenticate()

In [82]:
import sys

sys.path.append("../src") 

from data.make_ndci_ndvi import add_s2a_ndci_ndvi
from data.make_ndci_ndvi import add_id_date
from data.make_ndci_ndvi import mask_s2a_clouds
import pandas as pd

In [83]:
region = ee.Geometry.Polygon(
[[[-51.21646818059103,-30.013957938688517],
 [-51.21572789090292,-30.013855747123724],
 [-51.21486958401815,-30.013298336735033],
 [-51.214097107821864,-30.011923377716094],
 [-51.21371086972372,-30.011217310272695],
 [-51.214086378985805,-30.01047407595533],
 [-51.215556229525966,-30.009888775010992],
 [-51.21617850201742,-30.01054839963767],
 [-51.21665057080404,-30.010920017214005],
 [-51.21725138562338,-30.012257828961395],
 [-51.2176161660494,-30.01333549752507],
 [-51.21691879170553,-30.013744265296616],
 [-51.21646818059103,-30.013957938688517]]])

def clp_region(img):
    return img.clip(region)

In [84]:
def reduce(img):
    """Extrai média, mediana, mínimo, máximo e desvio padrão de uma banda"""
    
    serie_reduce = img.reduceRegions(**{
        "collection":region,
        "reducer": ee.Reducer.mean().combine(**{
            "reducer2": ee.Reducer.min(),
                "sharedInputs": True}).combine(**{
            "reducer2": ee.Reducer.max(),
                "sharedInputs": True}).combine(**{
            "reducer2": ee.Reducer.median(),
                "sharedInputs": True}).combine(**{
            "reducer2": ee.Reducer.stdDev(),
                "sharedInputs":True}),
        "scale": 20
    })
    
    serie_reduce = serie_reduce.map(lambda f: f.set({"millis": img.get("millis")}))\
        .map(lambda f: f.set({"date": img.get("date")}))
    
    return serie_reduce.copyProperties(img, ["system:time_start"])

def create_df(img, band):
    """Cria um dataframe para dados extraidos de imageCollection"""
    
    reduced_img = img.select(band).map(reduce)\
        .flatten()\
        .sort("date", True)\
        .select(["millis", "date", "min", "max","mean", "median", "stdDev"])

    lista_df = reduced_img.reduceColumns(
        ee.Reducer.toList(7),
        ["millis", "date", "min", "max","mean", "median", "stdDev"])\
        .values().get(0)

    df = pd.DataFrame(
        lista_df.getInfo(),
        columns=["millis",
                 "date"] +
        [band + "_" + stat for stat in ["min", "max", "mean", "median", "stdDev"]])
    
    df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    
    return df

In [87]:
s2a_amostragem = ee.ImageCollection("COPERNICUS/S2_SR")\
    .filterBounds(region)\
    .filter(ee.Filter.lte("CLOUDY_PIXEL_PERCENTAGE", 30))\
    .map(mask_s2a_clouds)\
    .map(clp_region)\
    .map(add_s2a_ndci_ndvi).map(add_id_date)

df_ndvi = create_df(s2a_amostragem, "NDVI")
df_ndci = create_df(s2a_amostragem, "NDCI")
df_gee = df_ndvi.merge(df_ndci, on=["millis", "date"])

In [106]:
import urllib

VIGILANCIA_URL = "https://sage.saude.gov.br/dados/sisagua/controle_mensal_demais_parametros.zip"
DOWNLOAD_DIR = os.path.join("..", "data", "external", "vigilancia")
MUNICIPIO = "PORTO ALEGRE"
MANANCIAL = "GUAIBA"

def read_vigilancia(download_dir=DOWNLOAD_DIR, municipio=MUNICIPIO, manancial=MANANCIAL, url=VIGILANCIA_URL):
    """Realiza a leitura de dados baixados da Vigilancia (controle mensal demais parametros)"""
    
    filename = os.path.join(DOWNLOAD_DIR, "controle_mensal_demais_parametros.zip")
    
    if not os.path.isfile(filename):
        urllib.request.urlretrieve(url, filename)
        
    vigilancia = pd.read_csv(filename,
           compression="zip",
            sep=";",
            decimal=",",
            encoding="latin-1", low_memory=False,
            parse_dates=["Data de preenchimento do relatório mensal",
                        "Data da coleta"])
    
    vigilancia =  vigilancia.loc[( vigilancia["Município"] == municipio) &
          (vigilancia["Nome do manancial superficial"] == manancial), :]
    
    return vigilancia

In [107]:
df_vigi = read_vigilancia()

In [110]:
ciano_vigi = df_vigi.loc[(df_vigi["Parâmetro"]=="Cianobactérias") &
                         (df_vigi["Unidade"]=="Total de cianobactérias")]

In [113]:
df_gee.to_csv("../data/processed/gee_coleta.csv", index=False)
ciano_vigi.to_csv("../data/processed/ciano_vigi.csv", index=False)