In [1]:
import h5py, os, shutil
import numpy as np
from scipy.interpolate import NearestNDInterpolator
from sklearn.cluster import KMeans, DBSCAN, MeanShift, OPTICS
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pandas as pd
from tqdm.notebook import tqdm

import warnings
from tables import NaturalNameWarning
# IGNORE SKLEARN KMEANS AND NATURAL NAME WARNINGS
warnings.filterwarnings('ignore') 

In [3]:
ras_base_path = r"D:\Desktop\Roughness\hecras\000_V2\NLCD\\"
file_path     = os.path.join(ras_base_path, "D100-00-00.g01.hdf")
ras_out_path  = r"D:\Desktop\Roughness\hecras\000_V2\NLCD_comp\\"
ras_geo_path  = os.path.join(ras_out_path, "D100-00-00.g01")

z_scale          = 2     # SCALING FACTOR FOR Z-AXIS DURING CLUSTERING
plot             = True  # DO WE EXPORT PLOTS FOR EACH XS?
use_mannings     = False # DO NOT USE. NOT IMPLEMENTED
min_num_clusters = 3     # THE MINIMUM NUMBER OF MANNING'S ROUGHNESSES FOR EACH CROSS SECTION
mann_read_sp     = 800   # SPAN FOR MANNINGS REGION. DETERMINES HOW OFTEN WE CLUSTER PER XS (num_clusters = xs_len // mann_read_sp + 1). 
                         # SET TO NONE TO USE MINIMUM FOR ALL XS.

file = h5py.File(file_path)
file_attributes = pd.read_hdf(file_path, "Geometry/Cross Sections/Attributes")

In [4]:
elev_info = file["Geometry"]["Cross Sections"]["Station Elevation Info"][:]
elev      = file["Geometry"]["Cross Sections"]["Station Elevation Values"][:]
mann_info = file["Geometry"]["Cross Sections"]["Manning's n Info"][:]
mann      = file["Geometry"]["Cross Sections"]["Manning's n Values"][:]

In [5]:
copy_folder(ras_base_path, ras_out_path)

with open(ras_geo_path, 'r') as file:
    lines = file.readlines()

In [6]:
if plot: 
    plot_path = os.path.join(ras_out_path, "xs_plots/")
    os.makedirs(plot_path, exist_ok=True)

scaler     = StandardScaler()

indices = []
values  = []

output_text_lines = []

for idx in tqdm(range(elev_info.shape[0])):
    
    elevations = getElevations(idx, elev_info, elev)
    mannings   = getMannings(idx, mann_info, mann)
    
    xs_len = np.max(elevations[:, 0])

    if mann_read_sp is not None:
        # CALCULATE HOW MANY CLUSTERS WE NEED
        num_clusters = int(xs_len // mann_read_sp + 1)
    
        # ENFORCE MINIMUM NUMBER OF CLUSTERS
        if num_clusters < min_num_clusters:
            num_clusters = min_num_clusters
    
        # ENFORCE HEC-RAS MAX NUMBER OF CLUSTERS
        if num_clusters > 19:
            num_clusters = 19
    else:
        num_clusters = min_num_clusters

    # CHECK IF WE ACTUALLY NEED TO SUMMARIZE ANYTHING
    if mannings.shape[0] <= num_clusters:
        print(f"{title(idx, file_attributes)} only has {mannings.shape[0]} Manning's n values, determined {num_clusters} clusters with {xs_len:.2f} XS length. Skipping.")
        text_line = create_Manning_lines(mannings[:, 0].round(3).tolist(), mannings[:, 1].tolist())
        output_text_lines.append(text_line)
        continue
    
    clustering = KMeans(n_clusters=num_clusters)
    
    # INTERPOLATION TO FILL ALL CROSS SECTION COORDINATES WITH MANNING'S N
    interpolator = NearestNDInterpolator(transform(mannings[:, 0]), mannings[:, 1])
    elev_mannings = interpolator(transform(elevations[:, 0]))

    if use_mannings:

        raise(Exception("Not implemented! Currently, this may result in more XS regions than allowed."))
        
        # NOT IMPLEMENTED BECAUSE IT MAY LEAD TO MORE XS MANNINGS REGIONS
        # THAN ALLOWED. IT'S DIFFICULT TO CONTROL HOW MANY CONTIGUOUS REGIONS ARE CREATED IF CONSIDERING THIS
        # RECOMMENDED STRATEGY IS TO CREATE TONS OF REGIONS AND THEN GROUP SIMILAR NEIGHBORS.
        
        # CLUSTER BASED ON X-Z LOCATIONS AND MANNINGS COEFFICIENTS
        # SCALE COMPONENTS (DIFFERENT UNITS!)
        datastruct  = np.vstack((elevations.T, elev_mannings.T)).T
        scaled_data = scaler.fit_transform(datastruct)

        # CLUSTER
        clustering.fit(scaled_data)
        labels = clustering.labels_
    else:
        # CLUSTER BASED ON X-Z LOCATIONS
        datastruct = elevations.copy()
        datastruct[:, 1] = datastruct[:, 1] * z_scale
        clustering.fit(datastruct)
        labels = clustering.labels_
    
    # CALCULATE WETTED PERIMETER
    perimeters = calcWettedPerimeter(elevations)
    
    region_mann_n  = []
    region_station = []

    if plot:
        plt.figure(figsize=(8, 6))
    
    # FOR EACH LABEL GET RIVER STATION AND COMPOSE MANNING'S
    for label in np.unique(labels):
        mannings    = elev_mannings[labels == label]
        curr_coords = elevations[labels == label]
        curr_pers   = perimeters[labels == label]
        
        region_mann_n.append(composeManningsN(curr_pers, mannings))
        region_station.append(np.min(curr_coords[:, 0]))
        if plot:
            plt.scatter(curr_coords[:,0], curr_coords[:, 1], label=f'Cluster {label}', 
                        s=10, color=np.random.rand(3,))
            plt.text(curr_coords[:,0].mean(), curr_coords[:,1].mean(), f"{region_mann_n[-1]:.3f}")
    if plot:
        mytitle = title(idx, file_attributes)
        plt.title(mytitle + f" Clusters: {num_clusters}")
        plt.xlabel("XS Station")
        plt.ylabel("Elevation")
        plt.savefig(os.path.join(plot_path, mytitle.replace(":", "-").replace(" ", "_")))
        plt.close()

    # SORT LEAST TO GREATEST XS STATION
    sorted_list = sorted(zip(region_station, region_mann_n))

    # EXTEND
    values.extend(sorted_list)
    
    # GET MANNING LINES
    text_line = create_Manning_lines(region_station, region_mann_n)
    output_text_lines.append(text_line)

  0%|          | 0/387 [00:00<?, ?it/s]

River: D100-00-00 RS: 157812 only has 3 Manning's n values, determined 3 clusters with 640.96 XS length. Skipping.
River: D100-00-00 RS: 157586 only has 2 Manning's n values, determined 3 clusters with 640.96 XS length. Skipping.


In [7]:
def get_matching_line_indices(lines, starting_string):
    return [idx for idx, line in enumerate(lines) if line.startswith(starting_string)]

start_lines = get_matching_line_indices(lines, "#Mann=")
end_lines   = get_matching_line_indices(lines, "Bank Sta=")

In [8]:
# REVERSE ORDER TO NOT AFFECT PREVIOUS DETECTIONS
for i, start_line in reversed(list(enumerate(start_lines))):
    end_line = end_lines[i]
    
    # DELETE LINES
    del lines[start_line:end_line]
    
    # INSERT LINES
    lines[start_line:start_line] = output_text_lines[i]
    
with open(ras_geo_path, 'a') as file:
    file.truncate(0)
    file.writelines(lines)

In [9]:
'''def updateField(file, fieldName, values):
    data = file[fieldName]
    data = np.array(values)

shutil.copyfile(file_path, out_file_path)
out_file = h5py.File(out_file_path, 'r+')
# out_file["Geometry/Cross Sections/Manning's n Info"] = np.array(indices)
# out_file["Geometry/Cross Sections/Manning's n Values"] = np.array(values)
updateField(out_file, "Geometry/Cross Sections/Manning's n Info", np.array(indices))
updateField(out_file, "Geometry/Cross Sections/Manning's n Values", np.array(values))
out_file.close()'''

'def updateField(file, fieldName, values):\n    data = file[fieldName]\n    data = np.array(values)\n\nshutil.copyfile(file_path, out_file_path)\nout_file = h5py.File(out_file_path, \'r+\')\n# out_file["Geometry/Cross Sections/Manning\'s n Info"] = np.array(indices)\n# out_file["Geometry/Cross Sections/Manning\'s n Values"] = np.array(values)\nupdateField(out_file, "Geometry/Cross Sections/Manning\'s n Info", np.array(indices))\nupdateField(out_file, "Geometry/Cross Sections/Manning\'s n Values", np.array(values))\nout_file.close()'