# Test reading a set of rainfields netCDF rain_rate files from the mongo database

In [1]:
import datetime
import os
import gridfs as fs
from pymongo import MongoClient
from netCDF4 import Dataset
from package import rain_base_fs


In [2]:
def get_files(**kwargs):
    """ Return a list of files that match the search criteria or None on error
    """
    config = kwargs.get("config", None)

    # get configuration paramaters
    start_time = datetime.datetime.fromisoformat(
        config["START_DATE"]).replace(tzinfo=datetime.timezone.utc)
    end_time = datetime.datetime.fromisoformat(
        config["END_DATE"]).replace(tzinfo=datetime.timezone.utc)
    station_id = int(config["STN_ID"])
    variable = config["VARIABLE"]

    query = {}
    query['metadata.valid_time'] = {'$gte': int(datetime.datetime.timestamp(
        start_time)), '$lte': int(datetime.datetime.timestamp(end_time))}
    query['metadata.station_id'] = station_id
    query['metadata.variable'] = variable

    client = MongoClient()
    db = client[config["DB_NAME"]]
    cursor = db.fs.files.find(query).sort([("metadata.valid_time",1)]) 

    file_list = []
    if cursor is None:
        print("None found") 
        return None
    else:
        for doc in cursor:
            file_list.append(doc["filename"])
    
    client.close()
    return file_list


In [3]:
config = {
    "START_DATE": "2021-01-28T00:00:00",
    "END_DATE": "2021-01-28T01:00:00",
    "PRODUCT": "prcp-crate.nc",
    "STN_ID": "310",
    "DB_NAME": "radar",
    "VARIABLE": "rain_rate"
}


In [4]:
# get the list of file names that match this search 
file_list = get_files(config=config)
print(file_list)

['310_20210128_000000.prcp-crate.nc', '310_20210128_000500.prcp-crate.nc', '310_20210128_001000.prcp-crate.nc', '310_20210128_001500.prcp-crate.nc', '310_20210128_002000.prcp-crate.nc', '310_20210128_002500.prcp-crate.nc', '310_20210128_003000.prcp-crate.nc', '310_20210128_003500.prcp-crate.nc', '310_20210128_004000.prcp-crate.nc', '310_20210128_004500.prcp-crate.nc', '310_20210128_005000.prcp-crate.nc', '310_20210128_005500.prcp-crate.nc', '310_20210128_010000.prcp-crate.nc']


In [7]:
# loop over the filenames and read in the data 
client = MongoClient()
db = client[config["DB_NAME"]]
radar_fs = fs.GridFS(db)
for filename in file_list:
    file = radar_fs.find_one({"filename":filename})
    data = file.read()
    
    ncFile = Dataset(filename,mode="r",memory=data)
    station_id = int(ncFile.__getattr__("station_id"))
    station_name = str(ncFile.__getattr__("station_name"))
    vtime = int(ncFile['valid_time'][0].item())
    valid_time = datetime.datetime.fromtimestamp(vtime,tz=datetime.timezone.utc)
    print(f"{station_id=}, {station_name=},{valid_time.isoformat()}")
    ncFile.close()

client.close()


station_id=310, station_name='Ausm310',2021-01-28T00:00:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:05:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:10:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:15:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:20:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:25:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:30:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:35:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:40:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:45:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:50:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T00:55:00+00:00
station_id=310, station_name='Ausm310',2021-01-28T01:00:00+00:00
