In [3]:
try:
     import xarray
except:
      !pip install xarray
      import xarray

try:
     import h5netcdf
except:
      !pip install h5netcdf
      import h5netcdf

from pathlib import Path
from tempfile import TemporaryDirectory

from scipy.ndimage import gaussian_filter1d
from matplotlib import animation
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import glob
import pandas as pd
import os
import re

def export_specific_datasets_to_csv(input_directory, output_directory, variable_set1):
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)
          

    # Iterate through all files in the input directory
    for filename in os.listdir(input_directory):
        if filename.endswith(".nc"):
            file_paths = glob.glob(os.path.join(input_directory, filename))
            csv_file_name = f"{filename.replace('.nc', '')}.csv"
            csv_file_path = os.path.join(output_directory, csv_file_name)

            # Open the .nc file
            for file_path in file_paths :
                prod = xr.open_dataset(file_path)
                obs = xr.open_dataset(file_path, group="observation_data").squeeze()
                geo = xr.open_dataset(file_path, group="geolocation_data").set_coords(["longitude", "latitude"])
                dataset = xr.merge((prod,obs, geo))
                
                dataframes=[]
                for n in range(400): 
                   for variable in variable_set1:
                       # index of '2' is for 0 viewing zenith angle
                       i_sel = dataset[variable][:,:,2,n].to_dataframe()
                       i_sel.columns = ["lat","lon",f'{380+n}']
                   dataframes.append(i_sel)
                        
                combined_df = pd.concat(dataframes, axis=1)
                unique_cols = combined_df.columns.duplicated(keep='first')
                combined_df_no_dup_cols = combined_df.loc[:, ~unique_cols]
                df_cleaned = combined_df_no_dup_cols.dropna()
                df_cleaned.to_csv(csv_file_path, index=False)
               

# Specify the input and output directories
input_directory = '/Users/yizhen/Desktop/spex_ifcb'
output_directory = '/Users/yizhen/Desktop/spex_ifcb_output'

# Specify the datasets you want to export
variable_set1 = ['i']

# export the datasets
export_specific_datasets_to_csv(input_directory, output_directory, variable_set1)




In [4]:
try:
     import haversine
except:
      !pip install haversine
      import haversine

from haversine import haversine, Unit
import h5py
import pandas as pd
import numpy as np
import math
import os
import re

def find_nearest_point(lats, lons, target_lat, target_lon):
    
    distances = np.sqrt((lats - target_lat)**2 + (lons - target_lon)**2)
    #distances = haversine((lats,lons), (target_lat,target_lon), unit='km')

    # Find index of min distance
    closest_idx = np.argmin(distances)

    return closest_idx

def extract_date_from_filename(filename):
    # Assuming date is in the format YYYYMMDD in the filename
    match = re.search(r'\d{8}[a-zA-Z]\d{6}', filename)
    if match:
        return match.group(0)
    else:
        return ''

def export_specific_datasets_to_one_csv(input_directory, output_directory):
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)

    # Iterate through all files in the input directory
    dis_df = []
    for filename in os.listdir(input_directory):
        
        csv_file_name = 'ifcb_spex_new.csv'
        csv_file_path = os.path.join(output_directory, csv_file_name)

        input_file_path = os.path.join(input_directory, filename)
        
            
        # --- FIX: Specify the encoding here ---
        try:
            df = pd.read_csv(input_file_path, encoding='windows-1252')
        except UnicodeDecodeError:
            print(f"Could not decode {filename} with windows-1252. Trying latin-1.")
            df = pd.read_csv(input_file_path, encoding='latin-1')
        except Exception as e:
            print(f"An error occurred while reading {filename}: {e}")
            continue # Skip to the next file if an error occurs

        # the location of WaveCIS site
        target_lat = 41.36
        target_lon = -70.52
        df['distance'] = np.sqrt((df['lat'] - target_lat)**2 + (df['lon'] - target_lon)**2)

        time = extract_date_from_filename(filename)
        df['time']= time
        
        nearest_df = df.loc[df['distance'].idxmin()]
        dis_df.append(nearest_df)
    
    # create dataframe 
    dis_df = pd.DataFrame(dis_df)
    
    # Export to CSV
    dis_df.to_csv(csv_file_path, index=False)
    print(f"Exported datasets over specific location from {input_file_path} to {csv_file_path}")

# Specify the input and output directories
input_directory = '/Users/yizhen/Desktop/spex_ifcb_output'
output_directory = '/Users/yizhen/Desktop/spex_ifcb_output_in_one'

export_specific_datasets_to_one_csv(input_directory, output_directory)




Exported datasets over specific location from /Users/yizhen/Desktop/spex_ifcb_output/PACE_SPEXONE.20240706T172756.L1C.V3.5km.csv to /Users/yizhen/Desktop/spex_ifcb_output_in_one/ifcb_spex_new.csv
