## Generación base de datos el Parque Nacional Cerro Saroche

**PROYECTO:** SISTEMA PARA EL SEGUIMIENTO DE ECOSISTEMAS VENEZOLANOS \
**AUTOR:** Javier Martinez

In [1]:
import rioxarray 
import xarray

import geopandas
from pyproj.crs import CRS

import pandas as pd

import os

Cambiando directorio de trabajo

In [2]:
print('> Directorio actual: ', os.getcwd())  
os.chdir('../')
print('> Directorio actual: ', os.getcwd()) 

> Directorio actual:  /media/javier/Compartida28/doctorado/gee-metview/cerro_saroche/code
> Directorio actual:  /media/javier/Compartida28/doctorado/gee-metview/cerro_saroche


### Proyección

In [3]:
precipitacion_crs = CRS.from_wkt('GEOGCS["Coordinate System imported from GRIB file",DATUM["unnamed",SPHEROID["Sphere",6367470,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]')

### Polígonos

In [4]:
park_poligono = geopandas.read_file('./polygons/park/cerro_saroche.shp')
parque_rectangulo = geopandas.read_file('./polygons/rectangle/rectangulo_cerro_saroche.shp')

### Paths

In [6]:
path_precipitacion = './cdsapi/precipitacion_regrilla.nc'
path_elevacion = './SRTMGL3/elevacion_regrilla.nc'
path_ndvi = './ndvi/ndvi_regrilla.nc'

### Precipitación

In [7]:
precipitacion_rds = rioxarray.open_rasterio(path_precipitacion, masked=True)
precipitacion_rds

In [8]:
def id_point_format(x,y):
  """
  Funcion para identificar el id del centroide
  """

  #--
  if x == -69.43 and y==10.11:
    id_point = 15
  elif x == -69.53 and y==10.11:
    id_point = 14
  elif x == -69.63 and y==10.11:
    id_point = 13
  elif x == -69.73 and y==10.11:
    id_point = 12
  elif x == -69.83 and y==10.11:
    id_point = 11
  #--
  elif x == -69.43 and y==10.21:
    id_point = 10
  elif x == -69.53 and y==10.21:
    id_point = 9
  elif x == -69.63 and y==10.21:
    id_point = 8
  elif x == -69.73 and y==10.21:
    id_point = 7
  elif x == -69.83 and y==10.21:
    id_point = 6
  #--
  elif x == -69.43 and y==10.31:
    id_point = 5
  elif x == -69.53 and y==10.31:
    id_point = 4
  elif x == -69.63 and y==10.31:
    id_point = 3
  elif x == -69.73 and y==10.31:
    id_point = 2
  elif x == -69.83 and y==10.31:
    id_point = 1
  else:
    id_point = None

  return id_point

In [9]:
columns_precipitacion = ['time',	'x',	'y', 'precipitacion_mm']
pd_precipitacion = precipitacion_rds.to_dataframe()\
                                    .reset_index()[columns_precipitacion]

pd_precipitacion['time'] = pd_precipitacion['time'].astype(int)
pd_precipitacion['x'] = pd_precipitacion['x'].astype(float).round(6)
pd_precipitacion['y'] = pd_precipitacion['y'].astype(float).round(6)
pd_precipitacion['id_point'] = pd_precipitacion[['x','y']].round(2).apply(lambda x: id_point_format(x=x.x,y=x.y), 1 ).astype(int)
pd_precipitacion['precipitacion_mm'] = pd_precipitacion['precipitacion_mm'].astype(float)

pd_precipitacion = pd_precipitacion.dropna()

pd_precipitacion.head(10)

Unnamed: 0,time,x,y,precipitacion_mm,id_point
0,719163,-69.83,10.31,0.913065,1
1,719163,-69.73,10.31,0.958915,2
2,719163,-69.63,10.31,1.026073,3
3,719163,-69.53,10.31,1.095035,4
4,719163,-69.43,10.31,1.203287,5
5,719163,-69.83,10.21,1.033501,6
6,719163,-69.73,10.21,1.10399,7
7,719163,-69.63,10.21,1.159936,8
8,719163,-69.53,10.21,1.212156,9
9,719163,-69.43,10.21,1.276562,10


In [10]:
from datetime import datetime

print(datetime.fromordinal(pd_precipitacion.time.min()))
print(datetime.fromordinal(pd_precipitacion.time.max()))

1970-01-01 00:00:00
2022-05-01 00:00:00


### Elevación

In [11]:
elevacion_rds = rioxarray.open_rasterio(path_elevacion, masked=True)
elevacion_rds

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


In [12]:
columns_elevacion = ['x',	'y', 'elevacion_media','elevacion_mediana','elevacion_maxima']
pd_elevacion = elevacion_rds.to_dataframe()\
                            .reset_index()[columns_elevacion]

pd_elevacion['x'] = pd_elevacion['x'].astype(float).round(6)
pd_elevacion['y'] = pd_elevacion['y'].astype(float).round(6)
pd_elevacion['elevacion_media'] = pd_elevacion['elevacion_media'].astype(float)
pd_elevacion['elevacion_mediana'] = pd_elevacion['elevacion_mediana'].astype(float)
pd_elevacion['elevacion_maxima'] = pd_elevacion['elevacion_maxima'].astype(float)

pd_elevacion.head(10)

Unnamed: 0,x,y,elevacion_media,elevacion_mediana,elevacion_maxima
0,0.5,0.5,508.541046,491.0,921.0
1,1.5,0.5,625.942932,614.0,1000.0
2,2.5,0.5,731.954834,731.0,1025.0
3,3.5,0.5,761.12915,737.0,1103.0
4,4.5,0.5,726.967285,709.0,1202.0
5,0.5,1.5,604.697083,558.0,1151.0
6,1.5,1.5,652.016235,636.0,1114.0
7,2.5,1.5,850.282715,839.0,1161.0
8,3.5,1.5,769.925049,764.0,1025.0
9,4.5,1.5,919.076721,895.0,1321.0


### NDVI

In [13]:
ndvi_rds = rioxarray.open_rasterio(path_ndvi, masked=True)
ndvi_rds

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


In [14]:
columns_ndvi = ['time', 'x',	'y', 'ndvi_media','ndvi_mediana','ndvi_maxima']
pd_ndvi = ndvi_rds.to_dataframe()\
                  .reset_index()[columns_ndvi]

pd_ndvi['time'] = pd_ndvi['time'].astype(int)
pd_ndvi['x'] = pd_ndvi['x'].astype(float).round(6)
pd_ndvi['y'] = pd_ndvi['y'].astype(float).round(6)
pd_ndvi['ndvi_media'] = pd_ndvi['ndvi_media'].astype(float)
pd_ndvi['ndvi_mediana'] = pd_ndvi['ndvi_mediana'].astype(float)
pd_ndvi['ndvi_maxima'] = pd_ndvi['ndvi_maxima'].astype(float)

pd_ndvi.head(10)

Unnamed: 0,time,x,y,ndvi_media,ndvi_mediana,ndvi_maxima
0,734503,0.5,0.5,,,
1,734534,0.5,0.5,,,
2,734563,0.5,0.5,,,
3,734594,0.5,0.5,,,
4,734624,0.5,0.5,,,
5,734655,0.5,0.5,,,
6,734685,0.5,0.5,,,
7,734716,0.5,0.5,,,
8,734747,0.5,0.5,,,
9,734777,0.5,0.5,,,


#### Integrando Bases

In [15]:
from datetime import datetime

pd_integracion = pd.merge(pd_precipitacion, pd_ndvi, on = ['time','x','y'], how='left')
pd_integracion = pd.concat([ pd_integracion, pd_ndvi[pd_ndvi.time > pd_precipitacion.time.max()] ])\
                   .merge(pd_elevacion, on = ['x','y'], how='left')\
                   .rename(columns={"x": "longitud", "y": "latitud"})

pd_integracion = pd_integracion[pd_integracion.id_point.notna()]
pd_integracion['id_point'] = pd_integracion['id_point'].astype(int)
pd_integracion['time_actualizacion'] = int(datetime.today().toordinal())
pd_integracion['park'] = 'cerro_saroche'

pd_integracion['periodo'] = pd_integracion['time'].apply(lambda x: datetime.fromordinal(x))

pd_integracion.head(10)

Unnamed: 0,time,longitud,latitud,precipitacion_mm,id_point,ndvi_media,ndvi_mediana,ndvi_maxima,elevacion_media,elevacion_mediana,elevacion_maxima,time_actualizacion,park,periodo
0,719163,-69.83,10.31,0.913065,1,,,,,,,738506,cerro_saroche,1970-01-01
1,719163,-69.73,10.31,0.958915,2,,,,,,,738506,cerro_saroche,1970-01-01
2,719163,-69.63,10.31,1.026073,3,,,,,,,738506,cerro_saroche,1970-01-01
3,719163,-69.53,10.31,1.095035,4,,,,,,,738506,cerro_saroche,1970-01-01
4,719163,-69.43,10.31,1.203287,5,,,,,,,738506,cerro_saroche,1970-01-01
5,719163,-69.83,10.21,1.033501,6,,,,,,,738506,cerro_saroche,1970-01-01
6,719163,-69.73,10.21,1.10399,7,,,,,,,738506,cerro_saroche,1970-01-01
7,719163,-69.63,10.21,1.159936,8,,,,,,,738506,cerro_saroche,1970-01-01
8,719163,-69.53,10.21,1.212156,9,,,,,,,738506,cerro_saroche,1970-01-01
9,719163,-69.43,10.21,1.276562,10,,,,,,,738506,cerro_saroche,1970-01-01


### Interpolación

In [16]:
list_interpolate = []

for id in pd_integracion.sort_values('id_point',ascending=True).id_point.unique():

    pd_interpolate = pd_integracion\
                                .query(f'id_point=={id}')\
                                .sort_values('periodo',ascending=True)

    pd_interpolate['ndvi_media'] = pd_interpolate['ndvi_media'].interpolate(method="linear")

    list_interpolate.append(pd_interpolate)

pd_interpolate = pd.concat(list_interpolate)[['time',
                                            'longitud',
                                            'latitud',
                                            'precipitacion_mm',
                                            'id_point',
                                            'ndvi_media',
                                            'ndvi_mediana',
                                            'ndvi_maxima',
                                            'elevacion_media',
                                            'elevacion_mediana',
                                            'elevacion_maxima',
                                            'time_actualizacion',
                                            'park']]

pd_interpolate.head()

Unnamed: 0,time,longitud,latitud,precipitacion_mm,id_point,ndvi_media,ndvi_mediana,ndvi_maxima,elevacion_media,elevacion_mediana,elevacion_maxima,time_actualizacion,park
0,719163,-69.83,10.31,0.913065,1,,,,,,,738506,cerro_saroche
15,719194,-69.83,10.31,0.081278,1,,,,,,,738506,cerro_saroche
30,719222,-69.83,10.31,0.413783,1,,,,,,,738506,cerro_saroche
45,719253,-69.83,10.31,0.895653,1,,,,,,,738506,cerro_saroche
60,719283,-69.83,10.31,2.90945,1,,,,,,,738506,cerro_saroche


In [17]:
pd_interpolate.groupby(['longitud','latitud']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,time,precipitacion_mm,id_point,ndvi_media,ndvi_mediana,ndvi_maxima,elevacion_media,elevacion_mediana,elevacion_maxima,time_actualizacion,park
longitud,latitud,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
-69.83,10.11,629,629,629,0,0,0,0,0,0,629,629
-69.83,10.21,629,629,629,0,0,0,0,0,0,629,629
-69.83,10.31,629,629,629,0,0,0,0,0,0,629,629
-69.73,10.11,629,629,629,0,0,0,0,0,0,629,629
-69.73,10.21,629,629,629,0,0,0,0,0,0,629,629
-69.73,10.31,629,629,629,0,0,0,0,0,0,629,629
-69.63,10.11,629,629,629,0,0,0,0,0,0,629,629
-69.63,10.21,629,629,629,0,0,0,0,0,0,629,629
-69.63,10.31,629,629,629,0,0,0,0,0,0,629,629
-69.53,10.11,629,629,629,0,0,0,0,0,0,629,629


In [18]:
pd_interpolate.groupby(['time']).count()

Unnamed: 0_level_0,longitud,latitud,precipitacion_mm,id_point,ndvi_media,ndvi_mediana,ndvi_maxima,elevacion_media,elevacion_mediana,elevacion_maxima,time_actualizacion,park
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
719163,15,15,15,15,0,0,0,0,0,0,15,15
719194,15,15,15,15,0,0,0,0,0,0,15,15
719222,15,15,15,15,0,0,0,0,0,0,15,15
719253,15,15,15,15,0,0,0,0,0,0,15,15
719283,15,15,15,15,0,0,0,0,0,0,15,15
...,...,...,...,...,...,...,...,...,...,...,...,...
738156,15,15,15,15,0,0,0,0,0,0,15,15
738187,15,15,15,15,0,0,0,0,0,0,15,15
738215,15,15,15,15,0,0,0,0,0,0,15,15
738246,15,15,15,15,0,0,0,0,0,0,15,15


In [19]:
print(datetime.fromordinal(pd_interpolate.time.min()))
print(datetime.fromordinal(pd_interpolate.time.max()))

1970-01-01 00:00:00
2022-05-01 00:00:00


In [20]:
from datetime import datetime

def diff_month(d1, d2):
    return (d1.year - d2.year) * 12 + d1.month - d2.month + 1

diff_month( datetime.fromordinal(pd_interpolate.time.max()),
                             datetime.fromordinal(pd_interpolate.time.min()))

629

In [21]:
print('> Directorio actual: ', os.getcwd()) 

> Directorio actual:  /media/javier/Compartida28/doctorado/gee-metview/cerro_saroche


In [22]:
import json

json_data = '{ "data":' + pd_interpolate.to_json(orient="records") +\
                ',"park" : "cerro_saroche"' + "}"

with open('./data/json_data.json', 'w') as outfile:
    outfile.write(json_data)

In [23]:
# Mongo coleccion
documentos = json.loads( pd_interpolate.to_json(orient="records") )

documentos[-1]

{'time': 738276,
 'longitud': -69.43,
 'latitud': 10.11,
 'precipitacion_mm': 0.8790558204,
 'id_point': 15,
 'ndvi_media': None,
 'ndvi_mediana': None,
 'ndvi_maxima': None,
 'elevacion_media': None,
 'elevacion_mediana': None,
 'elevacion_maxima': None,
 'time_actualizacion': 738506,
 'park': 'cerro_saroche'}

### Conexión MONGODB

Cambiando directorio

In [24]:
print('> Directorio actual: ', os.getcwd())  
os.chdir('../')
print('> Directorio actual: ', os.getcwd()) 

> Directorio actual:  /media/javier/Compartida28/doctorado/gee-metview/cerro_saroche
> Directorio actual:  /media/javier/Compartida28/doctorado/gee-metview


In [25]:
# Configuracion
import yaml

# Definiendo variables
with open('./config.yml') as stream:
    config = yaml.safe_load(stream)

In [26]:
import pymongo

username = config['MONGO_USER']
password = config['MONGO_PASSWORD']
cluster = config['MONGO_CLUSTER']

conn_str = f"mongodb+srv://{username}:{password}@{cluster}.wsg1gnp.mongodb.net/?retryWrites=true&w=majority"
client = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=5000)

In [27]:
# Creando base de datos
db = client['SSEV']
db.name

'SSEV'

In [28]:
# insertando coleccion
coleccion = db['meteorological']

In [28]:
# coleccion.create_index([("time", pymongo.DESCENDING), 
#                         ("park", pymongo.DESCENDING)],
#                         background=True)

In [29]:
# Insertando documentos
for doc in documentos:
  coleccion.update_one({"time":doc.get('time'),
                        "id_point":doc.get('id_point'),
                        "park":"cerro_saroche"}, {"$set":doc}, upsert=True)