# This code maps the region name with the geographical coordinates of a given data point

In this example a .json file with french region coordinates is used

* Step 1: read the .grib files that are usually used to store meteo data

In [None]:
import pygrib
import pandas as pd

def read_grib(path):
    # Encode file path to handle special characters
    file_path = path
    encoded_file_path = file_path.encode('unicode_escape').decode('ascii')
    
    # Open the GRIB file
    grbs = pygrib.open(encoded_file_path)
    
    # Initialize a list to store extracted data
    data = []
    
    # Loop through each GRIB message
    for grb in grbs:
        # Extract the relevant data from the GRIB message
        grb_data = {
            'validity_date': grb.validityDate,
            'parameter_name': grb.parameterName,
            'parameter_units': grb.parameterUnits,
            'value' : grb.values,
            'latitude' : grb.latlons()[0],
            'longitude' : grb.latlons()[1]
            # Add more fields as needed
        }
        data.append(grb_data)

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(data)
    
    # Expand rows containing lists of values
    df = pd.concat([row_converter(row) for _, row in df.iterrows()], ignore_index=True)
    df = pd.concat([row_converter(row) for _, row in df.iterrows()], ignore_index=True)

    return df


* Step 2: map the regions

In [None]:
def get_regions(path):
    # Read GRIB file and convert to GeoDataFrame
    meteo = read_grib(path)
    
    # Convert latitude and longitude to a single geometry column
    geometry = [Point(xy) for xy in zip(meteo['longitude'], meteo['latitude'])]
    
    # Drop "useless" columns
    meteo = meteo.drop(["latitude", "longitude"], axis=1)
    
    # Convert Date to datetime
    meteo["Date"] = pd.to_datetime(meteo["validity_date"], format="%Y%m%d")
    
    # Convert DataFrame to GeoDataFrame
    meteo = GeoDataFrame(meteo, crs="EPSG:4326", geometry=geometry)
    
    # Lowercase column names
    meteo.columns = [col_name.lower() for col_name in meteo.columns]
    
    # Assert meteo type is GeoDataFrame
    assert isinstance(meteo, geopandas.geodataframe.GeoDataFrame)
    
    # Load region geojson
    region = geopandas.read_file("../data/fichiers_open_data/meteo/region.geojson")

    # Assert region type is GeoDataFrame
    assert isinstance(region, geopandas.geodataframe.GeoDataFrame)
    
    # Perform spatial join to get regions for each point in the meteo data
    meteo_region = geopandas.sjoin(
        left_df=meteo,
        right_df=region,
        how="left",
    )
    
    # Clean up and rename columns
    meteo_region = (meteo_region
        .drop(["index_right", "code"], axis=1)
        .rename(columns={
            "date": "DATE",
            "value": "VALUE",
            "nom": "REGION",
            "parameter_units": "UNITS"
        })
        .assign(LONGITUDE=meteo_region["geometry"].x, 
                LATITUDE=meteo_region["geometry"].y)
        .drop(["geometry"], axis=1)
        .dropna()
        .reset_index(drop=True) 
    )
    return meteo_region
