In [None]:
import ftplib
import os
from getpass import getpass
import xarray as xr
from glob import glob

def ftp_list_available_passes(username=None, password=None):
    """
    List all available pass numbers dynamically from the FTP server.

    Parameters:
    - username: FTP username.
    - password: FTP password.
    
    Returns:
    - A list of available pass numbers.
    """
    ftpAVISO = 'ftp-access.aviso.altimetry.fr'

    try:
        # Prompt for username and password if not provided
        if not username:
            username = input("Enter FTP username: ")
        if not password:
            password = getpass(f"Enter password for {username}: ")

        # Connect to FTP
        with ftplib.FTP(ftpAVISO) as ftp:
            ftp.login(username, password)
            print(f"Connection established: {ftp.getwelcome()}")

            # Navigate to the SWOT products directory
            ftp.cwd('/swot_products/l3_karin_nadir/l3_lr_ssh/v1_0/Expert/')
            
            # List all available pass numbers from the directory (this should return 'cycle_' directories)
            available_cycles = ftp.nlst()

            # Extract the pass numbers from the file names
            available_passes = set()
            for cycle in available_cycles:
                ftp.cwd(f'/swot_products/l3_karin_nadir/l3_lr_ssh/v1_0/Expert/{cycle}')
                files = ftp.nlst()
                for file in files:
                    pass_num = file.split('_')[5]  # Extract pass number from file name
                    available_passes.add(pass_num)
            
            available_passes = sorted(available_passes)
            print("Available pass numbers:")
            for pass_num in available_passes:
                print(pass_num)
            
            return available_passes

    except Exception as e:
        print(f"Error: {e}")
        return []

def ftp_list_available_cycles(pass_num, username=None, password=None):
    """
    List all available cycles for a given pass number from the FTP server.

    Parameters:
    - pass_num: Pass number as a string.
    - username: FTP username.
    - password: FTP password.
    
    Returns:
    - A list of available cycle directories.
    """
    ftpAVISO = 'ftp-access.aviso.altimetry.fr'

    try:
        # Connect to FTP
        with ftplib.FTP(ftpAVISO) as ftp:
            ftp.login(username, password)
            ftp.cwd('/swot_products/l3_karin_nadir/l3_lr_ssh/v1_0/Expert/')
            
            available_cycles = ftp.nlst()
            available_cycles_for_pass = []

            # Iterate over cycles and check if pass_num is present in files
            for cycle in available_cycles:
                ftp.cwd(f'/swot_products/l3_karin_nadir/l3_lr_ssh/v1_0/Expert/{cycle}')
                files = ftp.nlst()
                for file in files:
                    if f'_{pass_num}_' in file:
                        available_cycles_for_pass.append(cycle)
                        break  # We only need one match per cycle

            print(f"Available cycles for pass {pass_num}: {available_cycles_for_pass}")
            return available_cycles_for_pass

    except Exception as e:
        print(f"Error: {e}")
        return []

def ftp_data_access(cycle_list, pass_num, username=None, password=None, local_filepath=None):
    """
    Download SWOT data from FTP server based on cycles and pass numbers.

    Parameters:
    - cycle_list: List of cycle numbers (e.g., ['cycle_474', 'cycle_475']).
    - pass_num: Pass number as a string.
    - username: FTP username.
    - password: FTP password.
    - local_filepath: Directory to save downloaded files.
    """
    ftpAVISO = 'ftp-access.aviso.altimetry.fr'

    try:
        # Connect to FTP
        with ftplib.FTP(ftpAVISO) as ftp:
            ftp.login(username, password)

            for cycle in cycle_list:
                ftp_path = f'/swot_products/l3_karin_nadir/l3_lr_ssh/v1_0/Expert/{cycle}/'
                ftp.cwd(ftp_path)

                # Find files matching the pattern for the cycle and pass
                file_pattern = f'SWOT_L3_LR_SSH_Expert_{cycle.split("_")[1]}_{pass_num}_'
                matching_files = [f for f in ftp.nlst() if f.startswith(file_pattern)]

                if not local_filepath:
                    local_filepath = input("Enter local directory to save the files: ")

                if not os.path.exists(local_filepath):
                    os.makedirs(local_filepath)

                # Download matching files
                for filename in matching_files:
                    local_file = os.path.join(local_filepath, filename)
                    with open(local_file, 'wb') as f:
                        ftp.retrbinary(f"RETR {filename}", f.write)
                    print(f"Downloaded: {filename}")

    except Exception as e:
        print(f"Error: {e}")

def cube(passn, out_dir, localbox=[0, 360, 30, 70], var_to_drop=None, nadir_cube=False):
    """
    Processes SWOT files for a given pass, subsets them by geographical region,
    drops unnecessary variables, and saves the combined dataset into a NetCDF file.
    
    Parameters:
    - passn: Pass number (as a string).
    - out_dir: Directory to save the output NetCDF file.
    - localbox: Longitude and latitude bounds for subsetting (default: [0, 360, 30, 70]).
    - var_to_drop: List of variables to drop (default: None).
    - nadir_cube: Flag to process nadir variables (not implemented in this version).
    """
    if var_to_drop is None:
        var_to_drop = ['mdt', 'ssha_unedited', 'ocean_tide', 'mss', 'dac', 'calibration', 'ugos', 'vgos']

    # Find all matching files for the given pass number
    files = sorted(glob(f'L3/SWOT_L3_LR_SSH_Expert_???_{passn}*.nc'))
    output_fn = f'{out_dir}{passn}_cube.nc'
    
    datasets = []

    if len(files) > 0:
        for file in files:
            try:
                # Open each dataset
                ds_expert = xr.open_dataset(file)
                ds_expert = ds_expert.drop_dims('num_nadir')

                # Compute mean time for the dataset and assign it as a new coordinate
                mean_time = ds_expert['time'].mean().values
                ds_expert = ds_expert.assign_coords(timec=mean_time)

                # Subset based on geographical location (longitude, latitude)
                ds_subset = (
                    (ds_expert.longitude >= localbox[0]) &
                    (ds_expert.longitude <= localbox[1]) &
                    (ds_expert.latitude >= localbox[2]) &
                    (ds_expert.latitude <= localbox[3])
                )
                ds_expert_sub = ds_expert.where(ds_subset, drop=True)

                # Drop unnecessary variables
                ds_expert_sub = ds_expert_sub.drop_vars(var_to_drop)

                # Add the processed dataset to the list
                datasets.append(ds_expert_sub)

            except Exception as e:
                print(f"Error processing file {file}: {e}")
    
    # Concatenate all datasets along the new 'timec' dimension
    if datasets:
        combined_ds = xr.concat(datasets, dim='timec')

        # Save the aggregated data to a new NetCDF file
        combined_ds.to_netcdf(output_fn)
        print(f"Combined dataset saved to {output_fn}")
    else:
        print(f"No files found for pass {passn}.")

def main():
    """Main workflow to list passes, download, process, and clean data."""
    local_filepath = '/mnt/flow/swot/Matt_Analysis/calval_orbit_expert_v1.0/science_orbit/'
    out_dir = '/mnt/flow/swot/Matt_Analysis/calval_orbit_expert_v1.0/science_orbit/cube/'
    
    # Step 1: List all available pass numbers
    username = input("Enter your FTP username: ")
    password = getpass("Enter your FTP password: ")

    available_passes = ftp_list_available_passes(username, password)
    if not available_passes:
        print("No available pass numbers found. Exiting.")
        return

    # Step 2: Loop through each pass number
    for pass_num in available_passes:
        # Step 3: List all available cycles for the current pass number
        available_cycles = ftp_list_available_cycles(pass_num, username, password)
        if not available_cycles:
            print(f"No available cycles found for pass {pass_num}. Skipping to the next pass.")
            continue

        # Step 4: Download data for the listed cycles
        ftp_data_access(available_cycles, pass_num, username, password, local_filepath)
        
        # Step 5: Process the downloaded data and create a
        
if __name__ == "__main__":
    main()

In [3]:
xr.open_dataset('/home/tchonang/python_program/calval/SWOT_vs_ADCP/L3/SWOT_L3_LR_SSH_Expert_478_001_20230401T212436_20230401T221541_v1.0.nc')

In [None]:
import ftplib
import os
from getpass import getpass
import xarray as xr
from glob import glob

def extract_pass_num(file_name):
    """
    Extracts the pass number from the SWOT file name.

    Parameters:
    - file_name: The name of the SWOT file.

    Returns:
    - The pass number as a string.
    """
    try:
        # Split the filename using underscores and extract the pass number
        pass_num = file_name.split('_')[5]  # The 6th element is the pass number
        return pass_num
    except IndexError:
        print(f"Could not extract pass number from file name: {file_name}")
        return None

def ftp_list_available_files(cycle='cycle_006', username=None, password=None):
    """
    Lists all available files for a specific cycle on the FTP server.

    Parameters:
    - cycle: The cycle for which to list available files (default: cycle_006).
    - username: FTP username.
    - password: FTP password.

    Returns:
    - A list of file names available for the given cycle.
    """
    ftpAVISO = 'ftp-access.aviso.altimetry.fr'

    try:
        # Connect to FTP
        with ftplib.FTP(ftpAVISO) as ftp:
            ftp.login(username, password)
            print(f"Connection established: {ftp.getwelcome()}")

            # Navigate to the SWOT products directory
            ftp.cwd(f'/swot_products/l3_karin_nadir/l3_lr_ssh/v1_0/Expert/{cycle}/')

            # List all files available for the specified cycle
            files = ftp.nlst()
            print(f"Available files for {cycle}:")
            for file in files:
                print(file)

            return files

    except Exception as e:
        print(f"Error: {e}")
        return []

def ftp_data_access(cycle='cycle_006', pass_num=None, username=None, password=None, local_filepath=None):
    """
    Download SWOT data from FTP server based on cycle and pass number.

    Parameters:
    - cycle: The cycle number (e.g., 'cycle_006').
    - pass_num: Pass number to filter files (optional).
    - username: FTP username.
    - password: FTP password.
    - local_filepath: Directory to save downloaded files.
    """
    ftpAVISO = 'ftp-access.aviso.altimetry.fr'

    try:
        # Connect to FTP
        with ftplib.FTP(ftpAVISO) as ftp:
            ftp.login(username, password)
            ftp.cwd(f'/swot_products/l3_karin_nadir/l3_lr_ssh/v1_0/Expert/{cycle}/')

            # List files for the specific cycle
            files = ftp.nlst()

            # Filter files by pass number if provided
            if pass_num:
                files = [f for f in files if f'_{pass_num}_' in f]

            if not files:
                print(f"No files found for cycle {cycle} and pass number {pass_num}.")
                return

            if not local_filepath:
                local_filepath = input("Enter local directory to save the files: ")

            if not os.path.exists(local_filepath):
                os.makedirs(local_filepath)

            # Download matching files
            for filename in files:
                local_file = os.path.join(local_filepath, filename)
                with open(local_file, 'wb') as f:
                    ftp.retrbinary(f"RETR {filename}", f.write)
                print(f"Downloaded: {filename}")

    except Exception as e:
        print(f"Error: {e}")

def cube(passn, out_dir, localbox=[0, 360, 30, 70], var_to_drop=None, nadir_cube=False):
    """
    Processes SWOT files for a given pass, subsets them by geographical region,
    drops unnecessary variables, and saves the combined dataset into a NetCDF file.

    Parameters:
    - passn: Pass number (as a string).
    - out_dir: Directory to save the output NetCDF file.
    - localbox: Longitude and latitude bounds for subsetting (default: [0, 360, 30, 70]).
    - var_to_drop: List of variables to drop (default: None).
    - nadir_cube: Flag to process nadir variables (not implemented in this version).
    """
    if var_to_drop is None:
        var_to_drop = ['mdt', 'ssha_unedited', 'ocean_tide', 'mss', 'dac', 'calibration', 'ugos', 'vgos']

    # Find all matching files for the given pass number
    files = sorted(glob(f'L3/SWOT_L3_LR_SSH_Expert_???_{passn}*.nc'))
    output_fn = f'{out_dir}{passn}_cube.nc'
    
    datasets = []

    if len(files) > 0:
        for file in files:
            try:
                # Open each dataset
                ds_expert = xr.open_dataset(file)
                ds_expert = ds_expert.drop_dims('num_nadir')

                # Compute mean time for the dataset and assign it as a new coordinate
                mean_time = ds_expert['time'].mean().values
                ds_expert = ds_expert.assign_coords(timec=mean_time)

                # Subset based on geographical location (longitude, latitude)
                ds_subset = (
                    (ds_expert.longitude >= localbox[0]) &
                    (ds_expert.longitude <= localbox[1]) &
                    (ds_expert.latitude >= localbox[2]) &
                    (ds_expert.latitude <= localbox[3])
                )
                ds_expert_sub = ds_expert.where(ds_subset, drop=True)

                # Drop unnecessary variables
                ds_expert_sub = ds_expert_sub.drop_vars(var_to_drop)

                # Add the processed dataset to the list
                datasets.append(ds_expert_sub)

            except Exception as e:
                print(f"Error processing file {file}: {e}")
    
    # Concatenate all datasets along the new 'timec' dimension
    if datasets:
        combined_ds = xr.concat(datasets, dim='timec')

        # Save the aggregated data to a new NetCDF file
        combined_ds.to_netcdf(output_fn)
        print(f"Combined dataset saved to {output_fn}")
    else:
        print(f"No files found for pass {passn}.")

def main():
    """Main workflow to list passes, download, process, and clean data."""
    local_filepath = '/mnt/flow/swot/Matt_Analysis/calval_orbit_expert_v1.0/science_orbit/pass_006/'
    out_dir = '/mnt/flow/swot/Matt_Analysis/calval_orbit_expert_v1.0/science_orbit/pass_006/cube/'
    
    username = input("Enter your FTP username: ")
    password = getpass("Enter your FTP password: ")

    # Step 1: List available files for cycle_006
    cycle = 'cycle_006'
    files = ftp_list_available_files(cycle, username, password)
    if not files:
        print(f"No files found for cycle {cycle}. Exiting.")
        return

    # Step 2: Loop through files, extract pass number, and download data
    for file in files:
        pass_num = extract_pass_num(file)
        if pass_num:
            print(f"Processing pass number {pass_num}...")

            # Step 3: Download data for this pass
            ftp_data_access(cycle, pass_num, username, password, local_filepath)
            
            # Step 4: Process the downloaded data and create a cube
            cube(pass_num, out_dir)
        else:
            print(f"Could not determine pass number for file {file}. Skipping.")

if __name__ == "__main__":
    main()