In [1]:
import numpy as np
import pandas as pd
import geopandas as gp
import requests
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from cartopy.util import add_cyclic_point
import cartopy.io.shapereader as shpreader
import cartopy.feature as cfeature
from datetime import datetime, timedelta
from matplotlib import pyplot
import matplotlib.pyplot as plt
from matplotlib.tri import Triangulation
import shapely.speedups
from shapely import Polygon
import subprocess
from dataclasses import dataclass
import xarray as xr
import cfgrib
import zipfile
import os
import shutil
import rasterio
from rasterio.transform import from_origin
from rasterio.transform import from_bounds
from rasterio.crs import CRS
#import h3
from pyproj import Transformer
import json
import pprint
#import dask
import eccodes
import pygrib
import psycopg2
from osgeo import gdal
from collections import defaultdict
from scipy.spatial import KDTree

@dataclass
class DateTimeParts:
    year: int
    month: int
    day: int
    hour: int
    minute: int

    @classmethod
    def from_datetime(cls, dt: datetime):
        return cls(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute)
    
def windCalc(u,v):
        #print('windCalc Function')
        wind_abs = np.sqrt(u**2 + v**2)
        wind_dir_trig_to = np.arctan2(u/wind_abs, v/wind_abs)
        wind_dir_trig_to_degrees = wind_dir_trig_to * 180/np.pi ## -111.6 degrees
        wind_dir = wind_dir_trig_to_degrees + 180
        return wind_abs * 2.23694 #TO MPH
def K_to_F(temp):
    temp = ((temp - 273.15) * (9/5)) + 32
    return temp

def F_to_K(temp):
    temp = ((temp - 32) * 5/9) + 273.15
    return temp

In [None]:
def getFilterGrib2(runDate, indexHour, model='NBM'):
    index_list = []
    runTime = runDate.hour
    year = runDate.year
    month = runDate.month
    day = runDate.day
    fcsthr = indexHour
    runTime_str = str(runTime).zfill(2)
    fcsthr_str = str(fcsthr).zfill(3)
    runDate = rf'{year}{str(month).zfill(2)}{str(day).zfill(2)}'
    byte_ranges = defaultdict(list)

    if model == 'NBM':
        url_name = rf'https://noaa-nbm-grib2-pds.s3.amazonaws.com/blend.{runDate}/{runTime_str}/core/blend.t{runTime_str}z.core.f{fcsthr_str}.co.grib2'
    else:
        raise ValueError('Invalid model type')

    index_url = f"{url_name}.idx"
    print(f"Index URL: {index_url}")
    
    response = requests.get(index_url)
    index_content = response.text.splitlines()

   # Conditions for matching variables and levels dynamically
    conditions = {
        'ASNOW': lambda param, level: param == 'ASNOW' and ((len(level) < 30) and (level.split(":"))[-1] == ''),
        'WIND': lambda param, level: param == 'WIND' and (((level.split(":"))[-1] == '') and (level.split(":"))[0][0] != 's'),
        'TMP': lambda param, level: param == 'TMP' and '2 m above ground' in level,
        'APCP': lambda param, level: param == 'APCP' and ((len(level) < 30) and (level.split(":"))[-1] == ''),
        'DSWRF': lambda param, level: param == 'DSWRF' and 'surface' in level
    }

    conditions = {
        'TMIN': lambda param, level: param == 'TMIN' and (level.split(":"))[-1] == '',
        'TMAX': lambda param, level: param == 'TMAX' and (level.split(":"))[-1] == ''
    }

    prev_param = None  # Variable to track the previous parameter
    prev_level = None  # Track the previous level for handling the end byte
    get_next_startbyte = False

    for line in index_content:
        indexDict = line.split(":")
        startByte = indexDict[1]
        prev_startByte = startByte

        #add current startbyte as endbyte for previous param
        if get_next_startbyte:
            pass
            #print('next startbyte: ', prev_startByte)


        param = indexDict[3].strip()  
        #level = indexDict[4].strip() 
        level = ":".join(indexDict[4:]).strip()  # Combine everything after index 3 to form the level

        # Check if the current line matches any of the conditions
        if get_next_startbyte and (len(byte_ranges) > 0):
            #print('adding endbyte', prev_startByte, prev_param )
            byte_ranges[prev_param][-1]['end'] = prev_startByte
            #print(byte_ranges)
            get_next_startbyte = False

        """ if param in conditions:
            condition = conditions[param]  # Get the condition for the param """

        for condition_name, condition_func in conditions.items():
            if condition_func(param, level):
                #if condition(param, level):  # Call the condition function with param and level
                print(f'Matched {param} {level}: Start byte {startByte}')
                #print('get_next_startbyte set to True')
                get_next_startbyte = True

                # If it's a new range, start tracking it
                if prev_param != param or prev_level != level:
                    byte_ranges[param].append({'start': startByte, 'end': None})

                    # Update tracking variables
                prev_param = condition_name
                prev_level = level

            else:
                pass

        else:
            pass

    # Combine byte ranges as needed for each variable
    index_list = []
    for var, ranges in byte_ranges.items():
        for byte_range in ranges:
            start = byte_range['start']
            end = byte_range['end']  # If end is None, use start as the end
            #print(f"Appending range for {var}: {start}-{end}")
            index_list.append(f"{start}-{end}")


        # Download and merge GRIB file
    #gribFile = f"data/gribs/{model.lower()}/{model.lower()}-{runDate}_{runTime_str}_{fcsthr_str}.grb2"
    #gribFile = f"data/gribs/{model.lower()}/latest/{model.lower()}-{fcsthr_str}.grb2"
    gribFile = f"data/gribs/{model.lower()}/maxmin/{model.lower()}-{fcsthr_str}.grb2"

    if os.path.exists(gribFile):
        os.remove(gribFile)

    if len(index_list) > 0:
        for byte_range in index_list:
            print(f"Downloading byte range: {byte_range}")
            command = rf'curl --range {byte_range} {url_name} >> {gribFile}'
            os.system(command)
        else:
            print(f'no matches for forecast hour {fcsthr_str} ')

    #return gribFile
    return index_list, gribFile

""" modelrun = datetime(2024,10,17,18,0,0)
indexHour = 6
model = 'nbm'
datetime_parts = DateTimeParts.from_datetime(modelrun)
print('getFilterGrib2')
index_list, gribFile, condition_func = getFilterGrib2(datetime_parts, indexHour)
print(index_list) """

In [None]:
if condition_func('WIND', ':10 m above ground:6 hour fcst'):
    print('yup')
else:
    print('nope')

In [None]:
result_list = []
modelrunTimes = []
modelforecastTimes= []
modelforecastSteps = []
dir_root = ''
for indexHour in range(12,241,6):
    modelrun = datetime(2024,10,19,12,0,0)
    model = 'NBM'
    #modelrun2 = datetime(2021,2,4,23,30,0)
    datetime_parts = DateTimeParts.from_datetime(modelrun)
    print('getFilterGrib')
    index_list, result = getFilterGrib2(datetime_parts, indexHour)
    #grbs = open(result)
    if os.path.exists(result):
        result_list.append(result)
        print('result', result)
    #command2 = f'cp {result} {dir_root}data/gribs/{model.lower()}/latest/{model.lower()}-latest_{str(indexHour).zfill(3)}.grb2'
    #subprocess.call(command2, shell=True)
#modelvars = list(ds_temp.data_vars)
str_list = ''
for name in result_list:
    if not str_list:
        str_list = str_list + name
    else:
        str_list = str_list + ' ' + name
    print(str_list)
latestGrb = f'{dir_root}data/gribs/{model.lower()}/maxmin/{model.lower()}-latest.grb2'
if os.path.exists(latestGrb):
    os.remove(latestGrb)

In [None]:
print(result_list)
latestGrb = f'data/gribs/{model.lower()}/latest/{model.lower()}-latest.grb2'
latestGrb = f'data/gribs/{model.lower()}/maxmin/{model.lower()}-latest.grb2'
command_cp = f'cp {result_list[1]} {latestGrb}'
subprocess.call(command_cp, shell=True)


In [None]:
print(result_list[1:])
for grib_single in result_list:
    print(grib_single)
    command_append = f'wgrib2 -append {grib_single} -grib {latestGrb}'
    subprocess.call(command_append, shell=True)

In [None]:
model = 'nbm'
#gribfile = f'data/gribs/{model.lower()}/latest/{model.lower()}-024.grb2'
latestGrb = f'data/gribs/{model.lower()}/latest/{model.lower()}-latest.grb2'
latestGrb = f'data/gribs/{model.lower()}/maxmin/{model.lower()}-latest.grb2'
gribfile = latestGrb
grbs = pygrib.open(gribfile)
for grb in grbs:
    print(grb)

In [None]:
grbs = pygrib.open(gribfile)
tgrb_max = grbs.select(name = 'Maximum temperature')
tgrb_min = grbs.select(name = 'Minimum temperature')
tempgrbs_max = (tgrb_max)
for  tmp in tempgrbs_max:
    print(tmp)
tempgrbs_min = (tgrb_min)
for  tmp in tempgrbs_min:
    print(tmp)

In [None]:
#tgrb = grbs.select(shortName = '2t')
tgrb = grbs.select(name = 'Maximum temperature')
#tgrb = grbs.select(paramId = 167) #t2 ens std
tsd = (tgrb[3])
tsd4 = (tgrb[4])
print(tsd)
print(tsd4)
print(dir(tsd))
print(tsd.validDate)
tsd_list = list(dir(tsd))
tsd_keys = (tsd.keys())
print('2 metre temperature:K (instant):lambert:heightAboveGround:level 2 m:fcst time 36 hrs:from 202410151900:ens std dev')
for key in tsd_list:
    try:
        print(key, tsd[key])
    except:
        print(key, ' does not exist')
print('****************************************************')
print('2 metre temperature:K (instant):lambert:heightAboveGround:level 2 m:fcst time 36 hrs:from 202410151900')
tsd4_list = list(dir(tsd4))
tsd4_keys = (tsd4.keys())
for key4 in tsd4_list:
    try:
        print(key4, tsd4[key4])
    except:
        print(key4, ' does not exist')


In [None]:
grb1 = grbs[1]
print(grb1)
lats, lons = grb1.latlons()
# Flatten the lat/lon arrays to create a 2D list of points
grid_points = np.column_stack((lats.ravel(), lons.ravel()))

# Build a KDTree from the lat/lon grid
tree = KDTree(grid_points)

In [8]:

df_coords = pd.read_csv('stations.csv')
coordinates = df_coords[['lat', 'lon']].values
#coordinates = [(lat1, lon1), (lat2, lon2)]  # Replace with your actual coordinates

# Find the nearest grid points for all coordinates
nearest_indices = [tree.query([lat, lon])[1] for lat, lon in coordinates]
#nearest_indices = [tree.query([lat, lon])[1] for lat, lon in coordinates]

# Initialize an empty dictionary to hold the time series data
#data = {coord: [] for coord in coordinates}


In [None]:
def createOutput(var, nearest_indices, df_coords):   
    data = {icao: [] for icao in df_coords['ICAO']}
    grbs_all = pygrib.open(gribfile)
    #tgrb = grbs_all.select(shortName = '2t')
    tgrb = grbs.select(name=var)
    tempgrbs = tgrb

    # Loop through the GRIB messages to extract data for all variables and times
    for grb in tempgrbs:
        print(grb)
        var_name = grb.name
        valid_time = grb.validDate

        # Flatten the data grid to align with the grid points
        data_values = (K_to_F(grb.values.ravel()).round(0))

        # For each row in the DataFrame, get the value from the nearest grid point
        for (icao, idx) in zip(df_coords['ICAO'], nearest_indices):
            nearest_value = data_values[idx]

            # Collect the value, time, and variable for the given ICAO
            data[icao].append({
                'time': valid_time,
                'variable': var_name,
                'value': int(nearest_value),
                'lat': df_coords.loc[df_coords['ICAO'] == icao, 'lat'].values[0],
                'lon': df_coords.loc[df_coords['ICAO'] == icao, 'lon'].values[0],
                'city': df_coords.loc[df_coords['ICAO'] == icao, 'CITY'].values[0],  # Add city
                'state': df_coords.loc[df_coords['ICAO'] == icao, 'STATE'].values[0]  # Add state
            })

    # Create an empty list to store rows for the DataFrame
    trows = []

    # Flatten the data dictionary into the list of rows
    for icao, records in data.items():
        for record in records:
            # Append each record as a dictionary to the rows list
            trows.append({
                'ICAO': icao,
                'time': record['time'],
                'variable': record['variable'],
                'value': record['value'],
                'lat': record['lat'],
                'lon': record['lon'],
                'CITY': record['city'],  # Add city to DataFrame
                'STATE': record['state']  # Add state to DataFrame
            })
    print(trows)

    # Convert the list of rows into a DataFrame
    result_df = pd.DataFrame(trows)
    result_df.to_csv(f'pointfcst_{model}.csv', index=False)

    # The final DataFrame contains the time series for each ICAO code
    print(result_df)

    features = []
    for icao, records in data.items():
        # Extract lat/lon from one of the records (all will have the same lat/lon for the same ICAO)
        lat = records[0]['lat']
        lon = records[0]['lon']
        city = records[0]['city']  # Extract city
        state = records[0]['state']  # Extract state

        # Prepare time series for this ICAO
        time_series = [{
            'time': str(record['time']),
            'variable': str(record['variable']),
            'value': str(record['value'])
        } for record in records]

        # Create the GeoJSON feature for this ICAO with the time series
        feature = {
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [str(lon), str(lat)]
            },
            "properties": {
                "ICAO": icao,
                "CITY": city,  # Add city to GeoJSON
                "STATE": state,  # Add state to GeoJSON
                "time_series": time_series
            }
        }
        features.append(feature)

    # Create the final GeoJSON structure
    geojson_data = {
        "type": "FeatureCollection",
        "features": features
    }

    # Determine file name based on the variable type
    if var == 'Maximum temperature':
        var_out = 'maxtemp'
    elif var == 'Minimum temperature':
        var_out = 'mintemp'
    else:
        var_out = ''
    
    # Export to GeoJSON file
    with open(f'output_data_{var_out}.geojson', 'w') as f:
        json.dump(geojson_data, f, indent=4)
    
    # Export to GeoJSON file
    with open(f'/var/www/fapi/app/static/data/point/nbm/output_data_{var_out}.geojson', 'w') as f:
        json.dump(geojson_data, f, indent=4)


    print("GeoJSON with time series exported successfully!")


df_coords = pd.read_csv('stations.csv')
coordinates = df_coords[['lat', 'lon']].values
nearest_indices = [tree.query([lat, lon])[1] for lat, lon in coordinates]
createOutput('Maximum temperature', nearest_indices, df_coords)
createOutput('Minimum temperature', nearest_indices, df_coords)

In [None]:
features = []
for icao, records in data.items():
    # Extract lat/lon from one of the records (all will have the same lat/lon for the same ICAO)
    lat = records[0]['lat']
    lon = records[0]['lon']

    # Prepare time series for this ICAO
    time_series = [{
        'time': str(record['time']),
        'variable': str(record['variable']),
        'value': str(record['value'])
    } for record in records]

    # Create the GeoJSON feature for this ICAO with the time series
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [str(lon), str(lat)]
        },
        "properties": {
            "ICAO": icao,
            "time_series": time_series
        }
    }
    features.append(feature)

# Create the final GeoJSON structure
geojson_data = {
    "type": "FeatureCollection",
    "features": features
}

# Export to GeoJSON file
with open('output_data5.geojson', 'w') as f:
    json.dump(geojson_data, f, indent=4)

print("GeoJSON with time series exported successfully!")

In [None]:
result_df.to_csv('output_data.csv', index=False)

# Create GeoJSON structure from the DataFrame
features = []
for i, row in result_df.iterrows():
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [row['lon'], row['lat']]
        },
        "properties": {
            "ICAO": row['ICAO'],
            "time": str(row['time']),
            "variable": row['variable'],
            "value": row['value']
        }
    }
    features.append(feature)

# Create the final GeoJSON structure
geojson_data = {
    "type": "FeatureCollection",
    "features": features
}

# Export to GeoJSON file
with open('output_data.geojson', 'w') as f:
    json.dump(geojson_data, f, indent=4)

print("Data exported successfully as CSV and GeoJSON!")

In [None]:
with open('output_data.geojson', 'r') as f:
    geojson_data = json.load(f)

# Print to check structure
#print(json.dumps(geojson_data, indent=4))
print(geojson_data['features'])
print(geojson_data['features'][-1]['properties'])

In [None]:
# Load the CSV file into a DataFrame
df = pd.read_csv('stations2.csv')

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Define the custom order for the REGION column
regions = ['SOUTH', 'CENTRAL', 'NORTH', 'WEST']

# Convert the 'REGION' column to a categorical type with the specified order
df['REGION'] = pd.Categorical(df['REGION'], categories=regions, ordered=True)

# Sort the DataFrame by 'REGION' (custom order) and 'CITY' (alphabetically)
df_sorted = df.sort_values(['REGION', 'CITY'])

# Print the sorted DataFrame
print("Sorted DataFrame by REGION and CITY:")
print(df_sorted)