In [1]:
pip install netCDF4 pandas

^C
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import pandas as pd
import netCDF4 as nc

def create_folder_if_not_exists(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Created folder: {folder_path}")
    else:
        print(f"Folder already exists: {folder_path}")

def netcdf_to_csv(input_folder, output_folder):
    # Get a list of all NetCDF files in the input folder
    files = [f for f in os.listdir(input_folder) if f.endswith('.nc')]
    
    create_folder_if_not_exists(output_folder)

    # Process each NetCDF file
    for file in files:
        # Read NetCDF file
        dataset = nc.Dataset(os.path.join(input_folder, file))

        # Convert variables to separate DataFrames based on dimensions
        data_frames = []
        for var_name, variable in dataset.variables.items():
            if variable.size == 0:  # Skip variables with no data
                continue

            dims = variable.dimensions
            key = ','.join(dims)
            data = {var_name: variable[:].flatten()}
            df_var = pd.DataFrame(data)
            data_frames.append(df_var)

        # Concatenate DataFrames, matching columns based on their names
        df = pd.concat(data_frames, axis=1)

        # Write DataFrame to CSV file        
        output_filename = os.path.splitext(file)[0] + '.csv'
        output_path = os.path.join(output_folder, output_filename)
        df.to_csv(output_path, index=False)

        print(f'Processed {file} -> {output_filename}')

# Replace these paths with the input and output folders you want to use
input_folder = 'SeaData/NetCDF'
output_folder = 'SeaData/csv'

netcdf_to_csv(input_folder, output_folder)


In [None]:
import os
import pandas as pd
import netCDF4 as nc

input_file = 'SeaData/NetCDF/S_tau10d_seagrid.nc'
 # Read NetCDF file
dataset = nc.Dataset(input_file)
print(dataset)

In [None]:
import os
import math
import numpy
import xarray
import csv

input_file = 'SeaData/NetCDF/S_tau10d_seagrid.nc'
dsub = xarray.open_dataset(input_file)
print(dsub)
print(dsub)
lat = len(dsub['lat'].values)
lon = len(dsub['lon'].values)
time = len(dsub['time'].values)
biastg = len(dsub['dbiastg'].values)
print(lat)
print(lon)
print(time)
print(biastg)

In [None]:
import os, sys
import pandas as pd
import numpy as np
from numpy import ma
from matplotlib import pyplot as plt
# from mpl_toolkits.basemap import Basemap as bm

# from eofs.standard import Eof

input_folder = 'SeaData/NetCDF/'
dpath = os.path.join(input_folder)
print(dpath)


######################
#load the SST data
######################
import xarray
#print(xarray.__version__)

ncfname = os.path.join(dpath,'S_tau10d_seagrid.nc')
dset = xarray.open_dataset(ncfname)
print(dset.head(5))
print(dset)
print(dset.lat)

##############################################################
#selects the period 1980-2014 and the tropical Pacific domain
##############################################################
dsub = dset.sel(time=slice('2021','2021'), lat=slice(-40,40,-1), lon=slice(120,290))
lat = dset['lat'].values
print(lat)
lon = dset['lon'].values
sst = dset['dbiastg'].values.squeeze() # because of zlev
print(sst.shape)
print(sst)

In [None]:
pip install pyproj -i https://pypi.tuna.tsinghua.edu.cn/simple

In [None]:
pip install basemap-1.3.3-cp310-cp310-win_amd64.whl

In [9]:
import xarray as xr

# Replace this path with the path to your NetCDF file
netcdf_file = 'SeaData/NetCDF/S_tau10d_seagrid.nc'

# Open the NetCDF file as an xarray.Dataset
dataset = xr.open_dataset(netcdf_file)

# Access the data variables
data_variables = dataset.data_vars

# Print the data variables
print("Data variables:")
for var_name, variable in data_variables.items():
    print(f"{var_name}: {variable}")


Data variables:
dbiastg: <xarray.DataArray 'dbiastg' (time: 516, lat: 73, lon: 144)>
[5424192 values with dtype=float32]
Coordinates:
  * time     (time) datetime64[ns] 2021-01-11 2021-01-12 ... 2022-06-10
  * lon      (lon) float32 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5
  * lat      (lat) float32 -90.0 -87.5 -85.0 -82.5 -80.0 ... 82.5 85.0 87.5 90.0
Attributes:
    long_name:  SST


In [None]:
import xarray as xr
import pandas as pd
import os

def create_folder_if_not_exists(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Created folder: {folder_path}")
    else:
        print(f"Folder already exists: {folder_path}")

# Replace this path with the path to your NetCDF file
netcdf_file = "SeaData/NetCDF/S_tau10d_seagrid.nc"

# Replace this path with the path to your output folder
output_folder = "SeaData/csv"

# Open the NetCDF file as an xarray.Dataset/csv
dataset = xr.open_dataset(netcdf_file)

# Select the 'dbiastg' variable
dbiastg = dataset['dbiastg']

create_folder_if_not_exists(output_folder)

# Create a CSV file for each unique 'lat' and 'lon' pair
for lat_value in dbiastg.lat.values:
    for lon_value in dbiastg.lon.values:
        # Get 'dbiastg' values at the specified lat and lon
        dbiastg_values = dbiastg.sel(lat=lat_value, lon=lon_value)

        # Create a DataFrame with 'time' and 'dbiastg' values
        data = {'time': dbiastg.time.values, 'dbiastg': dbiastg_values.values}
        df = pd.DataFrame(data)

        # Save the DataFrame to a CSV file
        output_filename = f"dbiastg_lat_{lat_value}_lon_{lon_value}.csv"
        output_path = os.path.join(output_folder, output_filename)
        df.to_csv(output_path, index=False)

        print(f"Created {output_filename} with {len(df)} rows")


In [None]:
import xarray as xr
import numpy as np

# path NetCDF file
netcdf_file = "SeaData/NetCDF/S_tau10d_seagrid.nc"

# path to output folder
output_folder = "SeaData/csv2"


# Open the NetCDF file as an xarray.Dataset
dataset = xr.open_dataset(netcdf_file)

# Select the 'dbiastg' variable
dbiastg = dataset['dbiastg']

create_folder_if_not_exists(output_folder)

# Replace NaN values in 'dbiastg' using numpy.interp for each unique 'lat' and 'lon' pair
for lat_value in dbiastg.lat.values:
    for lon_value in dbiastg.lon.values:
        # Get 'dbiastg' values at the specified lat and lon
        dbiastg_values = dbiastg.sel(lat=lat_value, lon=lon_value)

        # Get non-NaN indices and values for 'time' and 'dbiastg'
        non_nan_indices = np.isfinite(dbiastg_values.values)
        non_nan_time_values = dbiastg_values.time[non_nan_indices].values
        non_nan_dbiastg_values = dbiastg_values.values[non_nan_indices]

        if non_nan_dbiastg_values.size > 0:
            # Interpolate 'dbiastg' values using numpy.interp
            interpolated_dbiastg_values = np.interp(dbiastg_values.time.values, non_nan_time_values, non_nan_dbiastg_values)

            # Replace the original 'dbiastg' values with the interpolated values
            dbiastg.loc[dict(lat=lat_value, lon=lon_value)] = interpolated_dbiastg_values
        else:
            print(f"No non-NaN values available for lat={lat_value}, lon={lon_value}")
        
        # Create a DataFrame with 'time' and 'dbiastg' values
        data = {'time': dbiastg.time.values, 'dbiastg': dbiastg_values.values}
        df = pd.DataFrame(data)

        # Save the DataFrame to a CSV file
        output_filename = f"dbiastg_lat_{lat_value}_lon_{lon_value}.csv"
        output_path = os.path.join(output_folder, output_filename)
        df.to_csv(output_path, index=False)

        print(f"Created {output_filename} with {len(df)} rows")

print("Finished replacing NaN values in 'dbiastg' using numpy.interp")
