In [None]:
# Import packages
# Dataframe Packages
import numpy as np
import xarray as xr
import pandas as pd

# Vector Packages
import geopandas as gpd
import shapely
from shapely.geometry import Point, Polygon

# Raster Packages
import rioxarray as rxr
from rioxarray.merge import merge_arrays
import rasterstats as rs
import gdal

# Data Access Packages
import earthaccess as ea
import h5py
import pickle
from tensorflow.keras.models import load_model

# General Packages
import os
import re
import shutil
from datetime import datetime
import glob
from pprint import pprint
from typing import Union
from pathlib import Path
from tqdm import tqdm
import time
import requests

In [5]:
import NSIDC_Data

class ASODataTool:
    def __init__(self, short_name, version, polygon='', filename_filter=''):
        self.short_name = short_name
        self.version = version
        self.polygon = polygon
        self.filename_filter = filename_filter
        self.url_list = []
        self.CMR_URL = 'https://cmr.earthdata.nasa.gov'
        self.CMR_PAGE_SIZE = 2000
        self.CMR_FILE_URL = ('{0}/search/granules.json?provider=NSIDC_ECS'
                             '&sort_key[]=start_date&sort_key[]=producer_granule_id'
                             '&scroll=true&page_size={1}'.format(self.CMR_URL, self.CMR_PAGE_SIZE))

    def cmr_search(self, time_start, time_end, bounding_box):
        try:
            if not self.url_list:
                self.url_list = NSIDC_Data.cmr_search(
                    self.short_name, self.version, time_start, time_end,
                    bounding_box=self.bounding_box, polygon=self.polygon,
                    filename_filter=self.filename_filter, quiet=False)
            return self.url_list
        except KeyboardInterrupt:
            quit()

    def cmr_download(self, directory):
        if not os.path.exists(directory):
            os.makedirs(directory, exist_ok=True)

        NSIDC_Data.cmr_download(self.url_list, directory, False)

    @staticmethod
    def get_bounding_box(region):
        regions = pd.read_pickle("C:\\Users\\VISH NU\\National_snow_model\\National-Snow-Model\\Data\\Processed\\RegionVal.pkl")
        superset = []

        superset.append(regions[region])
        superset = pd.concat(superset)
        superset = gpd.GeoDataFrame(superset, geometry=gpd.points_from_xy(superset.Long, superset.Lat, crs="EPSG:4326"))
        bounding_box = list(superset.total_bounds)

        return f"{bounding_box[0]},{bounding_box[1]},{bounding_box[2]},{bounding_box[3]}"


class ASODownload(ASODataTool):
    def __init__(self, short_name, version, polygon='', filename_filter=''):
        super().__init__(short_name, version, polygon, filename_filter)
        self.region_list =    [ 'N_Sierras',
                                'S_Sierras',
                                'Greater_Yellowstone',
                                'N_Co_Rockies',
                                'SW_Mont',
                                'SW_Co_Rockies',
                                'GBasin',
                                'N_Wasatch',
                                'N_Cascade',
                                'S_Wasatch',
                                'SW_Mtns',
                                'E_WA_N_Id_W_Mont',
                                'S_Wyoming',
                                'SE_Co_Rockies',
                                'Sawtooth',
                                'Ca_Coast',
                                'E_Or',
                                'N_Yellowstone',
                                'S_Cascade',
                                'Wa_Coast',
                                'Greater_Glacier',
                                'Or_Coast'  ]

    def select_region(self):
        print("Select a region by entering its index:")
        for i, region in enumerate(self.region_list, start=1):
            print(f"{i}. {region}")

        try:
            user_input = int(input("Enter the index of the region: "))
            if 1 <= user_input <= len(self.region_list):
                selected_region = self.region_list[user_input - 1]
                self.bounding_box = self.get_bounding_box(selected_region)
                print(f"You selected: {selected_region}")
                print(f"Bounding Box: {self.bounding_box}")
            else:
                print("Invalid index. Please select a valid index.")
        except ValueError:
            print("Invalid input. Please enter a valid index.")
            
if __name__ == "__main__":
    short_name = 'ASO_50M_SWE'
    version = '1'
    
    data_tool = ASODataTool(short_name, version)
    time_start = '2013-04-02T00:00:00Z'
    time_end = '2019-07-19T23:59:59Z'
    ASODownload.select_region()
    directory = "SWE_Data"
    
    print(f"Fetching file URLs in progress for {selected_region} from {time_start} to {time_end}")
    url_list = data_tool.cmr_search(time_start, time_end, data_tool.bounding_box)
    data_tool.cmr_download(directory)

Enter start date (YYYY-MM-DD): 2013-04-02
Enter end date (YYYY-MM-DD): 2019-07-19
Select a region by entering its index:
1. N_Sierras
2. S_Sierras
3. Greater_Yellowstone
4. N_Co_Rockies
5. SW_Mont
6. SW_Co_Rockies
7. GBasin
8. N_Wasatch
9. N_Cascade
10. S_Wasatch
11. SW_Mtns
12. E_WA_N_Id_W_Mont
13. S_Wyoming
14. SE_Co_Rockies
15. Sawtooth
16. Ca_Coast
17. E_Or
18. N_Yellowstone
19. S_Cascade
20. Wa_Coast
21. Greater_Glacier
22. Or_Coast
Enter the index of the region: 2
You selected: S_Sierras
Bounding Box: -120.3763448720203,36.29256774541929,-118.292253412863,38.994985247736324
Fetching file URLs in progress for S_Sierras from 2013-04-02T00:00:00Z to 2019-07-19T23:59:59Z
Querying for data:
	https://cmr.earthdata.nasa.gov/search/granules.json?provider=NSIDC_ECS&sort_key[]=start_date&sort_key[]=producer_granule_id&scroll=true&page_size=2000&short_name=ASO_50M_SWE&version=001&version=01&version=1&temporal[]=2013-04-02T00:00:00Z,2019-07-19T23:59:59Z&bounding_box=-120.3763448720203,36.292

Downloading 188 files to C:\Users\VISH NU\NSM_SSWEET\SWE_Data...
Earthdata Login Username: vishnugindi
Earthdata Login Password: ········
dmlzaG51Z2luZGk6UHJvY2Vzc2luZzEh
001/188: C:\Users\VISH NU\NSM_SSWEET\SWE_Data\ASO_50M_SWE_USCATB_20130403.tif
002/188: C:\Users\VISH NU\NSM_SSWEET\SWE_Data\ASO_50M_SWE_USCATB_20130403.tif.xml


In [3]:
import pandas

rds = rxr.open_rasterio(
    r"C:\Users\VISH NU\NSM_SSWEET\SWE_Data\ASO_50M_SWE_USCACE_20160401.tif")
rds = rds.squeeze().drop("spatial_ref").drop("band")
rds.name = "data"
df = rds.to_dataframe().reset_index()
df[df.data>=0.0].to_csv("out.csv", index=False)

In [4]:
df

Unnamed: 0,y,x,data
0,4.234823e+06,242740.000051,-9999.0
1,4.234823e+06,242790.000152,-9999.0
2,4.234823e+06,242840.000253,-9999.0
3,4.234823e+06,242890.000354,-9999.0
4,4.234823e+06,242940.000455,-9999.0
...,...,...,...
1451338,4.178923e+06,307340.130658,-9999.0
1451339,4.178923e+06,307390.130759,-9999.0
1451340,4.178923e+06,307440.130860,-9999.0
1451341,4.178923e+06,307490.130961,-9999.0


In [None]:
##Sample Code##
Reprj_ds = gdal.Warp("output_tiff.tif", ds, dstSRS = "EPSG:4326", xRes = 0.01, yRes = -0.01, 
                            resampleAlg = "bilinear")
print(Reprj_ds.GetGeoTransform())
print(Reprj_ds.GetProjection())
rds = rxr.open_rasterio(
    r"C:\Users\VISH NU\National_snow_model\National-Snow-Model\Model\output_tiff.tif")
rds = rds.squeeze().drop("spatial_ref").drop("band")
rds.name = "data"
df = rds.to_dataframe().reset_index()
df

In [None]:
class ASODataProcessing:
    @staticmethod
    def processing_tiff(input_file, output_res):
        try:
            # Extract date from the input file name
            date = os.path.splitext(input_file)[0].split("_")[-1]
            
            # Define the output file path
            output_folder = os.path.join(os.getcwd(), "Processed_data")
            os.makedirs(output_folder, exist_ok=True)
            output_file = os.path.join(output_folder, f"ASO_100M_{date}.tif")
    
            # Open the input TIFF file
            ds = gdal.Open(input_file)
    
            if ds is None:
                print(f"Failed to open '{input_file}'. Make sure the file is a valid GeoTIFF file.")
                return None
            
            # Reproject and resample
            gdal.Warp(output_file, ds, dstSRS="EPSG:4326", xRes=output_res, yRes=-output_res, resampleAlg="bilinear")
    
            # Read the processed TIFF file using rasterio
            rds = rxr.open_rasterio(output_file)
            rds = rds.squeeze().drop("spatial_ref").drop("band")
            rds.name = "data"
            df = rds.to_dataframe().reset_index()
            return df
    
        except Exception as e:
            print(f"An error occurred: {str(e)}")
            return None
        
    @staticmethod
    def convert_tiff_to_csv(input_folder, cwd, output_res):
        # Check if the folder exists
        folder_path = os.path.join(cwd, input_folder)
        if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
            print(f"The folder '{input_folder}' does not exist.")
            return
        # Check if the folder is empty
        if not os.listdir(folder_path):
            print(f"The folder '{input_folder}' is empty.")
            return
    
        # Get a list of TIFF files in the folder
        tiff_files = [filename for filename in os.listdir(folder_path) if filename.endswith(".tif")]
    
        # Create CSV files from TIFF files
        for tiff_filename in tiff_files:
            # Open the TIFF file
            tiff_filepath = os.path.join(folder_path, tiff_filename)
            df = processing_tiff(tiff_filepath, output_res)
    
            if df is not None:
                # Get the date from the TIFF filename
                date = os.path.splitext(tiff_filename)[0].split("_")[-1]
    
                # Define the CSV filename and folder
                csv_filename = f"ASO_SWE_{date}.csv"
                csv_folder = os.path.join(cwd, "Processed_Data", "SWE_csv")
                os.makedirs(csv_folder, exist_ok=True)
                csv_filepath = os.path.join(csv_folder, csv_filename)
    
                # Save the DataFrame as a CSV file
                df.to_csv(csv_filepath, index=False)
    
                print(f"Converted '{tiff_filename}' to '{csv_filename}'")
                
    
    # Example:\ASO_SWE_20130403.tif.csv"
    #output_path = "output_path/final_data.csv"   
    #converting_to_standardized_format
    
    @staticmethod
    def extract_cellids(metadata_path, aso_swe_path, output_path):
        # Load "Prediction_Observation_Metadata" CSV file as a DataFrame
        prediction_observation_metadata_df = pd.read_csv(metadata_csv_path)
    
        # Convert the DataFrame into a GeoDataFrame by creating a Polygon geometry
        geometry = [Polygon([(row['BL_Coord_Long'], row['BL_Coord_Lat']),
                             (row['BR_Coord_Long'], row['BR_Coord_Lat']),
                             (row['UR_Coord_Long'], row['UR_Coord_Lat']),
                             (row['UL_Coord_Long'], row['UL_Coord_Lat'])]) 
                    for _, row in prediction_observation_metadata_df.iterrows()]
    
        # Add the geometry to the DataFrame and create a GeoDataFrame
        prediction_observation_metadata = gpd.GeoDataFrame(prediction_observation_metadata_df, geometry=geometry)
    
        # Load ASO SWE DataFrame
        aso_swe_df = pd.read_csv(aso_swe_csv_path)
    
        # Convert the "aso_swe_df" into a GeoDataFrame with point geometries
        geometry = [Point(xy) for xy in zip(aso_swe_df['x'], aso_swe_df['y'])]
        aso_swe_geo = gpd.GeoDataFrame(aso_swe_df, geometry=geometry)
    
        # Perform a spatial join between the two GeoDataFrames
        result = gpd.sjoin(aso_swe_geo, prediction_observation_metadata, how='inner', op='within')
    
        # Drop unnecessary columns
        columns_to_drop = ['geometry', 'index_right', 'y', 'x', 'BR_Coord_Long', 'BR_Coord_Lat', 'UR_Coord_Long', 'UR_Coord_Lat', 
                           'UL_Coord_Long', 'UL_Coord_Lat', 'BL_Coord_Long', 'BL_Coord_Lat', 'BLaspect_L', 'UL_Elevation_m', 
                           'UL_slope_Deg', 'ULaspect_L', 'UR_Elevation_m', 'UR_slope_Deg', 'URaspect_L', 'BR_Elevation_m', 
                           'BR_slope_Deg', 'BRaspect_L']
        Final_df = result.drop(columns_to_drop, axis=1)
    
        # Rename the 'data' column to 'swe'
        Final_df.rename(columns={'data': 'swe'}, inplace=True)
    
        # Save the final DataFrame to the specified output CSV file
        Final_df.to_csv(output_csv_path, index=False)
        
        return
    
    @staticmethod
    def process_folder(input_folder, metadata_path, output_folder):
        # List all CSV files in the input folder
        csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]
    
        for csv_file in csv_files:
            # Create the full file paths for input and output
            input_aso_path = os.path.join(input_folder, csv_file)
            output_aso_path = os.path.join(output_folder, csv_file)
    
            # Process the CSV file using the extract_cellids function
            extract_cellids(metadata_path, input_aso_path, output_aso_path)
            print(f"Processed {csv_file}")
    
    @staticmethod
    def converting_ASO_to_standardized_format(input_folder, output_csv):
        # Initialize an empty DataFrame to store the final transformed data
        final_df = pd.DataFrame()
    
        # Iterate through all CSV files in the directory
        for filename in os.listdir(input_folder):
            if filename.endswith(".csv"):
                file_path = os.path.join(input_folder, filename)
    
                # Extract the time frame from the filename
                time_frame = filename.split('_')[-1].split('.')[0]
    
                # Read the CSV file into a DataFrame
                df = pd.read_csv(file_path)
    
                # Rename the 'SWE' column to the time frame for clarity
                df = df.rename(columns={'SWE': time_frame})
    
                # Merge or concatenate the data into the final DataFrame
                if final_df.empty:
                    final_df = df
                else:
                    final_df = pd.merge(final_df, df, on='cell_id', how='outer')
    
        # Save the final transformed DataFrame to a single CSV file
        final_df.to_csv(output_csv, index=False)
        
if __name__ == "__main__":
    
    data_processor = ASODataProcessing()
    folder_name = "SWE_Data"
    cwd = os.getcwd()
    output_res = 100
    data_processor.convert_tiff_to_csv(folder_name, cwd, output_res)

    input_folder = r"C:\Users\VISH NU\NSM_EvaluationTool\Standardized-Snow-Water-Equivalent-Evaluation-Tool\SSWEET\Processed_Data\SWE_csv
    metadata_path = r"C:\Users\VISH NU\NSM_EvaluationTool\Standardized-Snow-Water-Equivalent-Evaluation-Tool\SSWEET\Provided_Data\Prediction_Location_Metadata.csv"
    #output_folder = "output_folder"
    data_processor.process_folder(input_folder, metadata_path, output_folder)

    output_csv = "Final_result.csv"
    data_processor.converting_ASO_to_standardized_format(input_folder, output_csv)