In [25]:
## this script is to convert SAM index txt file to nc file so i can plot this against DOT, WSC, OSC ... then look at correlation etc

import numpy as np
import pandas as pd
import xarray as xr

# Load the data
txt_file = "/Users/iw2g24/PycharmProjects/SSH_project/Data/newsam.1957.2007.txt"  # file name
df = pd.read_csv(txt_file, delimiter=r"\s+", index_col=0)  #  first column (years) as the index

# Get years from index and months from columns
years = df.index.values
months = df.columns.values

# Create the date range (first of each month)
dates = pd.to_datetime([f"{year}-{month}-01" for year in years for month in months], format='%Y-%b-%d')
# set to 12:00:00 and nanoseconds precision
dates = pd.to_datetime([f"{date.strftime('%Y-%m-%d')}T12:00:00.000000000" for date in dates])


# Flatten the SAM values
sam_values = df.values.flatten()

# Create the xarray Dataset
ds = xr.Dataset(
    {
        "SAM": ("time", sam_values)  # 'SAM' variable with time dimension
    },
    coords={
        "time": dates  # Using the generated dates as the time coordinate
    }
)

ds.to_netcdf("monthly_sam.nc")

print(ds)



<xarray.Dataset> Size: 13kB
Dimensions:  (time: 828)
Coordinates:
  * time     (time) datetime64[ns] 7kB 1957-01-01T12:00:00 ... 2025-12-01T12:...
Data variables:
    SAM      (time) float64 7kB -0.87 -2.27 0.07 -1.97 -2.5 ... nan nan nan nan


In [35]:
import pandas as pd
import numpy as np
import xarray as xr

# Load the data
txt_file = "/Users/iw2g24/PycharmProjects/SSH_project/Data/newsam.1957.2007.seas.txt"  # Change this to your file name
df = pd.read_csv(txt_file, delimiter=r"\s+", index_col=0)  # Set the first column (years) as the index

# Get years
years = df.index.values  # [1957, 1958, ...]

# Define seasonal time coordinates
date_mapping = {
    'ANN': '12-01',  # Annual = December 1st
    'AUT': '09-01',  # Autumn = September 1st
    'WIN': '12-01',  # Winter = December 1st
    'SPR': '03-01',  # Spring = March 1st
    'SUM': '06-01',  # Summer = June 1st
}

# Create time coordinates for each season
time_coords = {season: pd.to_datetime([f"{year}-{date_mapping[season]}T12:00:00.000000000" for year in years])
               for season in df.columns}

# Create an xarray Dataset with separate coordinates for each season
ds = xr.Dataset(
    {
        "ANN_sam": (["year"], df["ANN"].values),
        "AUT_sam": (["year"], df["AUT"].values),
        "WIN_sam": (["year"], df["WIN"].values),
        "SPR_sam": (["year"], df["SPR"].values),
        "SUM_sam": (["year"], df["SUM"].values),
    },
    coords={
        "year": years,          # Year coordinate
        "ANN_time": ("year", time_coords["ANN"]),
        "AUT_time": ("year", time_coords["AUT"]),
        "WIN_time": ("year", time_coords["WIN"]),
        "SPR_time": ("year", time_coords["SPR"]),
        "SUM_time": ("year", time_coords["SUM"]),
    }
)

# Save the dataset to a NetCDF file
ds.to_netcdf("seas_sam.nc")
print("Conversion complete! File saved as seas.nc")

print(ds)

Conversion complete! File saved as seas.nc
<xarray.Dataset> Size: 6kB
Dimensions:   (year: 68)
Coordinates:
  * year      (year) int64 544B 1957 1958 1959 1960 1961 ... 2021 2022 2023 2024
    ANN_time  (year) datetime64[ns] 544B 1957-12-01T12:00:00 ... 2024-12-01T1...
    AUT_time  (year) datetime64[ns] 544B 1957-09-01T12:00:00 ... 2024-09-01T1...
    WIN_time  (year) datetime64[ns] 544B 1957-12-01T12:00:00 ... 2024-12-01T1...
    SPR_time  (year) datetime64[ns] 544B 1957-03-01T12:00:00 ... 2024-03-01T1...
    SUM_time  (year) datetime64[ns] 544B 1957-06-01T12:00:00 ... 2024-06-01T1...
Data variables:
    ANN_sam   (year) float64 544B -4.24 -1.27 0.47 1.52 ... 3.03 4.18 2.04 1.63
    AUT_sam   (year) float64 544B -2.52 -2.97 -0.94 0.74 ... 0.34 1.93 0.74 2.07
    WIN_sam   (year) float64 544B -0.68 -0.92 -1.22 0.32 ... 0.03 -0.11 -1.21
    SPR_sam   (year) float64 544B -3.55 1.42 3.2 1.0 0.85 ... 2.8 4.25 0.25 1.81
    SUM_sam   (year) float64 544B -2.52 1.12 0.31 -0.08 ... 3.25 3.83 