# AMOCatlas conversion & compliance checker

The purpose of this notebook is to demonstrate the OceanSites format(s) from `AMOCatlas`.

The demo is organised to show

- Step 1: Loading and plotting a sample dataset

- Step 2: Converting one dataset to a standard format

Note that when you submit a pull request, you should `clear all outputs` from your python notebook for a cleaner merge.

In [None]:
import pathlib
import sys

script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir))

import importlib

import xarray as xr
import os
from amocatlas import readers, plotters, standardise, utilities

In [None]:
# Specify the path for writing datafiles
data_path = os.path.join(parent_dir, "data")

### Load RAPID 26°N

In [None]:
# Load data from data/moc_transports (Quick start)
ds_rapid = readers.load_sample_dataset()
ds_rapid = standardise.standardise_rapid(ds_rapid, ds_rapid.attrs["source_file"])

# Load data from data/moc_transports (Full dataset)
datasetsRAPID = readers.load_dataset("rapid", transport_only=True)
standardRAPID = [
    standardise.standardise_rapid(ds, ds.attrs["source_file"]) for ds in datasetsRAPID
]



In [None]:
# Plot RAPID timeseries

plotters.plot_amoc_timeseries(
    data=[standardRAPID[0]],
    varnames=["moc_mar_hc10"],
    labels=[""],
    resample_monthly=True,
    plot_raw=True,
    title="RAPID 26°N"
)

### Step 2: Convert to AC1 Format

The next step is to convert the standardised dataset to AC1 format, which follows OceanSITES conventions.

**Note**: This conversion currently fails because the standardise.py step doesn't add proper units to the TIME coordinate. This demonstrates the architectural principle that convert.py validates rather than assigns units.

In [None]:
from amocatlas import convert, writers, compliance_checker

# Attempt to convert standardised data to AC1 format
print("🔄 Attempting to convert RAPID data to AC1 format...")

try:
    ac1_datasets = convert.to_AC1(standardRAPID[0])
    ac1_ds = ac1_datasets[0]
    
    print("✅ Conversion successful!")
    print(f"  Suggested filename: {ac1_ds.attrs['suggested_filename']}")
    print(f"  Dimensions: {dict(ac1_ds.dims)}")
    print(f"  Variables: {list(ac1_ds.data_vars.keys())}")
    
    # Save the dataset
    output_file = os.path.join(data_path, ac1_ds.attrs['suggested_filename'])
    success = writers.save_dataset(ac1_ds, output_file)
    
    if success:
        print(f"💾 Saved AC1 file: {output_file}")
        
        # Run compliance check
        print("\\n🔍 Running compliance check...")
        result = compliance_checker.validate_ac1_file(output_file)
        
        print(f"Status: {'✅ PASS' if result.passed else '❌ FAIL'}")
        print(f"Errors: {len(result.errors)}")
        print(f"Warnings: {len(result.warnings)}")
        
        if result.errors:
            print("\\nFirst few errors:")
            for i, error in enumerate(result.errors[:3], 1):
                print(f"  {i}. {error}")
    
except Exception as e:
    print(f"❌ Conversion failed: {e}")
    print("\\nThis is expected because standardise.py needs to be updated to provide proper units.")
    print("The convert.py module validates that units are present rather than assigning them.")

In [None]:
plotters.show_attributes(ac1_ds)

### Demonstration: Working conversion with manual units fix

To demonstrate what a successful conversion would look like, let's temporarily fix the TIME units and run the complete workflow:

In [None]:
# Temporarily fix the TIME units to demonstrate successful conversion
# (This would normally be done in standardise.py)
demo_ds = standardRAPID[0].copy()
demo_ds['TIME'].attrs['units'] = 'seconds since 1970-01-01T00:00:00Z'

print("🔄 Converting RAPID data to AC1 format (with TIME units fixed)...")

try:
    ac1_datasets = convert.to_AC1(demo_ds)
    ac1_ds = ac1_datasets[0]
    
    print("✅ Conversion successful!")
    print(f"  Suggested filename: {ac1_ds.attrs['id']}.nc")
    print(f"  Dimensions: {dict(ac1_ds.sizes)}")
    print(f"  Variables: {list(ac1_ds.data_vars.keys())}")
    print(f"  TIME units: {ac1_ds.TIME.attrs.get('units')}")
    print(f"  TRANSPORT units: {ac1_ds.TRANSPORT.attrs.get('units')}")
    
    # Inspect the structure
    print("\\n📊 Dataset structure:")
    print(f"  TRANSPORT shape: {ac1_ds.TRANSPORT.shape}")
    print(f"  Component names: {list(ac1_ds.TRANSPORT_NAME.values)}")
    print(f"  Global attributes: {len(ac1_ds.attrs)} attributes")
    
    # Save the dataset using the writers module
    output_file = os.path.join(data_path, ac1_ds.attrs['id'] + ".nc")
    print(f"\\n💾 Saving to: {output_file}")
    success = writers.save_dataset(ac1_ds, output_file)
    
    if success:
        print(f"✅ Successfully saved AC1 file!")
        
        # File size check
        file_size = os.path.getsize(output_file)
        print(f"  File size: {file_size:,} bytes")
        
    else:
        print("❌ Failed to save file")
    
except Exception as e:
    print(f"❌ Conversion failed: {e}")
    import traceback
    traceback.print_exc()

### Step 3: Compliance Checking

Run the AC1 compliance checker to validate the converted file against the specification:

In [None]:
# Run compliance check on the created file
if 'output_file' in locals() and os.path.exists(output_file):
    print("🔍 Running AC1 compliance check...")
    
    result = compliance_checker.validate_ac1_file(output_file)
    
    print(f"\\n📊 Compliance Results:")
    print(f"  Status: {'✅ PASS' if result.passed else '❌ FAIL'}")
    print(f"  File Type: {result.file_type}")
    print(f"  Errors: {len(result.errors)}")
    print(f"  Warnings: {len(result.warnings)}")
    
    if result.errors:
        print(f"\\n❌ Errors ({len(result.errors)} total):")
        for i, error in enumerate(result.errors[:5], 1):
            print(f"  {i}. {error}")
        if len(result.errors) > 5:
            print(f"  ... and {len(result.errors) - 5} more errors")
    
    if result.warnings:
        print(f"\\n⚠️  Warnings ({len(result.warnings)} total):")
        for i, warning in enumerate(result.warnings[:3], 1):
            print(f"  {i}. {warning}")
        if len(result.warnings) > 3:
            print(f"  ... and {len(result.warnings) - 3} more warnings")
    
    # Show validation categories
    print(f"\\n🔧 What the compliance checker validates:")
    print("  ✓ Filename pattern (OceanSITES conventions)")
    print("  ✓ Required dimensions and variables")
    print("  ✓ Variable attributes (units, standard_name, vocabulary)")
    print("  ✓ Global attributes (conventions, metadata)")
    print("  ✓ Data value ranges (coordinates, valid_min/max)")
    print("  ✓ CF convention compliance (dimension ordering)")
    
else:
    print("❌ No AC1 file available for compliance checking")
    print("Please ensure the conversion step above succeeded first.")