## Retrieving data from S3 buckets

### Getting a whole object

#### Disk based access

In [1]:
import earthkit.data as ekd

req = {"endpoint": "object-store.os-api.cci1.ecmwf.int",
     "bucket": "earthkit-test-data-public", 
     "objects": "test6.grib",
   }

ds = ekd.from_source("s3", req, anon=True) 
ds.ls()

test6.grib:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
1,ecmf,u,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
2,ecmf,v,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
3,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
4,ecmf,u,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
5,ecmf,v,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll


#### Reading as a stream

In [2]:
req = {"endpoint": "object-store.os-api.cci1.ecmwf.int",
     "bucket": "earthkit-test-data-public", 
     "objects": "test6.grib",
   }

ds = ekd.from_source("s3", req, stream=True, anon=True) 

for f in ds:
    # f is GribField object. It gets deleted when going out of scope
    print(f)


GribField(t,1000,20180801,1200,0,0)
GribField(u,1000,20180801,1200,0,0)
GribField(v,1000,20180801,1200,0,0)
GribField(t,850,20180801,1200,0,0)
GribField(u,850,20180801,1200,0,0)
GribField(v,850,20180801,1200,0,0)


In [3]:
ds = ekd.from_source("s3", req, stream=True, anon=True) 

for f in ds.batched(2):
    # f is a fieldlist
    print(f"len={len(f)}")
    for g in f:
        print(f" {g}")

len=2
 GribField(t,1000,20180801,1200,0,0)
 GribField(u,1000,20180801,1200,0,0)
len=2
 GribField(v,1000,20180801,1200,0,0)
 GribField(t,850,20180801,1200,0,0)
len=2
 GribField(u,850,20180801,1200,0,0)
 GribField(v,850,20180801,1200,0,0)


In [4]:
ds = ekd.from_source("s3", req, stream=True, read_all=True, anon=True) 
ds.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
1,ecmf,u,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
2,ecmf,v,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
3,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
4,ecmf,u,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
5,ecmf,v,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll


### Getting multiple objects

In [5]:
req = {"endpoint": "object-store.os-api.cci1.ecmwf.int",
     "bucket": "earthkit-test-data-public", 
     "objects": ["test6.grib", "tuv_pl.grib"],
   }

ds = ekd.from_source("s3", req, anon=True) 
len(ds)

  0%|          | 0/2 [00:00<?, ?it/s]

tuv_pl.grib:   0%|          | 0.00/4.22k [00:00<?, ?B/s]

24

### Getting parts of an object

In [6]:
req = {"endpoint": "object-store.os-api.cci1.ecmwf.int",
     "bucket": "earthkit-test-data-public",
     "objects": { "object": "test6.grib", "parts": (240, 480)},
   }

ds = ekd.from_source("s3", req, anon=True) 
ds.ls()

test6.grib:   0%|          | 0.00/480 [00:00<?, ?B/s]

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,u,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
1,ecmf,v,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll


In [7]:
req = {"endpoint": "object-store.os-api.cci1.ecmwf.int",
     "bucket": "earthkit-test-data-public",
     "objects": { "object": "test6.grib", "parts": [(0, 240), (480, 240)]},
   }

ds = ekd.from_source("s3", req, anon=True) 
ds.ls()

test6.grib:   0%|          | 0.00/480 [00:00<?, ?B/s]

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
1,ecmf,v,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll


### Getting parts of multiple objects

In [8]:
req = {"endpoint": "object-store.os-api.cci1.ecmwf.int",
     "bucket": "earthkit-test-data-public", 
     "objects": [{"object": "test6.grib", "parts": (0,240)}, 
                 {"object": "tuv_pl.grib", "parts": (2400, 240)}],
   }

ds = ekd.from_source("s3", req, anon=True) 
ds.ls()

  0%|          | 0/2 [00:00<?, ?it/s]

tuv_pl.grib:   0%|          | 0.00/240 [00:00<?, ?B/s]

test6.grib:   0%|          | 0.00/240 [00:00<?, ?B/s]

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,1000,20180801,1200,0,an,0,regular_ll
1,ecmf,u,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


### Using parts with a stream

In [9]:
req = {"endpoint": "object-store.os-api.cci1.ecmwf.int",
     "bucket": "earthkit-test-data-public",
     "objects": { "object": "test6.grib", "parts": (240, 480)},
   }


ds = ekd.from_source("s3", req, stream=True,  anon=True) 

for f in ds:
    # f is GribField object. It gets deleted when going out of scope
    print(f)

GribField(u,1000,20180801,1200,0,0)
GribField(v,1000,20180801,1200,0,0)
