In [1]:
import os
import xarray
import re
import pandas as pd
import json
import tempfile
import boto3
import rasterio
from datetime import datetime
from dateutil.relativedelta import relativedelta

%env AWS_PROFILE='veda-smce-mfa'

env: AWS_PROFILE='veda-smce-mfa'


In [2]:
session = boto3.Session(profile_name='veda-smce-mfa')
s3_client = session.client('s3')
raster_io_session = rasterio.env.Env(profile_name='veda-smce-mfa')

bucket_name = 'ghgc-data-store-dev'
new_cog_folder = 'updated_with_nodata'
collection_name = 'oco2geos-co2-daygrid-v10r'
prefix = 'geos-oco2/'

In [3]:
def get_all_s3_keys(bucket, prefic):
    """Get a list of all keys in an S3 bucket."""
    keys = []

    kwargs = {"Bucket": bucket, "Prefix": prefix}
    while True:
        resp = s3_client.list_objects_v2(**kwargs)
        for obj in resp['Contents']:
            if obj["Key"].endswith(".tif"):
                keys.append(obj["Key"])

        try:
            kwargs["ContinuationToken"] = resp["NextContinuationToken"]
        except KeyError:
            break

    return keys

In [4]:
keys = get_all_s3_keys(bucket_name, prefix)
keys[:5]
keys[0].split('/')[-1]

'oco2_GEOS_XCO2PREC_L3CO2_day_B10206Ar_20150101.tif'

In [5]:
with raster_io_session:
    for key in keys:
        with rasterio.open(f's3://{bucket_name}/{key}') as src:
    # Read the data
            data = src.read()
            
            # Get the metadata of the source file
            meta = src.meta.copy()

            # Update the metadata with the new "no data" value
            meta.update(nodata=-9999)

            # Replace original "no data" values in data with new "no data" value
            data[data == src.nodata] = -9999

# Write the updated data to a new file
            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
                temp_file_path = temp_file.name

            # Write the updated data to the temporary file
            with rasterio.open(temp_file_path, "w", **meta) as dst:
                dst.write(data)

            # Upload the temporary file to S3
            s3_client.upload_file(
                Filename=temp_file_path,
                Bucket=bucket_name,
                Key=f"{new_cog_folder}/{collection_name}/{'/'.join(key.split('/')[3:])}",
            )

            # Clean up the temporary file
            os.remove(temp_file_path)