In [1]:
import boto3
import pandas as pd
import io

# read in S3 credentials from ./.aws/credentials file
# assumes we are using a credential profile names 'dev'
session = boto3.Session(profile_name='dev')
s3_client = session.client('s3')
# change this to the bucket you want to read/write to:
s3_bucket = 'drb-estuary-salinity'

# Writing data to S3

## Write from a local file

In [2]:
# define the location of the file you want to upload to S3
local_fpath = './data/usgs_nwis_01477050.csv'
# definte the location within the S3 bucket where you want to save this file
s3_fpath = '00_shared_data/usgs_nwis_01477050_fromfile.csv'
# upload your file
response = s3_client.put_object(Body=open(local_fpath, 'rb'), Bucket=s3_bucket, Key=s3_fpath)
# retrieve the version number
version_fromfile = response.get("ResponseMetadata").get('HTTPHeaders').get('x-amz-version-id')
print(f'file version: {version_fromfile}')

file version: WSymHRVvZhDELGYs_ycyoKpkH619xf60


## Write from pandas df

In [3]:
# read in a dataset as a pandas df use as an example
local_fpath = './data/usgs_nwis_01477050.csv'
df = pd.read_csv(local_fpath)
df.head()

Unnamed: 0,datetime,121609_00010,121607_00095,121610_00300,121612_00301,121608_00400,243095_99133,243094_99134
0,2019-04-01,8.822222,316.944444,11.289189,97.361111,7.557576,,
1,2019-04-02,8.54375,314.114583,11.205208,96.0,7.50625,,
2,2019-04-03,8.834375,313.635417,11.20625,96.677083,7.519792,,
3,2019-04-04,9.365625,311.552083,11.161458,97.552083,7.503125,,
4,2019-04-05,9.278022,307.648352,11.161538,97.318681,7.5,,


In [4]:
# definte the location within the S3 bucket where you want to save this file
s3_fpath = '00_shared_data/usgs_nwis_01477050_fromdf.csv'
# upload your file
with io.StringIO() as csv_buffer:
    df.to_csv(csv_buffer, index=False)
    response = s3_client.put_object(Bucket=s3_bucket, Key=s3_fpath, Body=csv_buffer.getvalue())
# retrieve the version number
version_fromdf = response.get("ResponseMetadata").get('HTTPHeaders').get('x-amz-version-id')
print(f'file version: {version_fromdf}')

file version: CPVYosH_Y3XI5hEo2h8zQEW7Tdp_ODzB


# Reading data from S3

## List files in S3

In [5]:
# define a file prefix to look in your bucket for
prefix = '00_shared_data'
# loop through all objects with this prefix and print
for obj in s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=prefix)['Contents']:
    print(obj['Key'])

00_shared_data/
00_shared_data/usgs_nwis_01477050_fromdf.csv
00_shared_data/usgs_nwis_01477050_fromfile.csv


## Read data and save to a local file

In [6]:
# define the file in S3 bucket that you want to retrieve
s3_fpath = '00_shared_data/usgs_nwis_01477050_fromfile.csv'
# definte the local file path that you want to save this file to
local_fpath = './data/usgs_nwis_01477050_downloaded.csv'
# download the data file
s3_client.download_file(s3_bucket, s3_fpath, local_fpath)

## Read data into a pandas df

In [7]:
# define the file in S3 bucket that you want to retrieve
s3_fpath = '00_shared_data/usgs_nwis_01477050_fromfile.csv'
# retrieve the file and read into a pandas df
obj = s3_client.get_object(Bucket=s3_bucket, Key=s3_fpath)
df = pd.read_csv(obj.get("Body"))
df.head()

Unnamed: 0,datetime,121609_00010,121607_00095,121610_00300,121612_00301,121608_00400,243095_99133,243094_99134
0,2019-04-01,8.822222,316.944444,11.289189,97.361111,7.557576,,
1,2019-04-02,8.54375,314.114583,11.205208,96.0,7.50625,,
2,2019-04-03,8.834375,313.635417,11.20625,96.677083,7.519792,,
3,2019-04-04,9.365625,311.552083,11.161458,97.552083,7.503125,,
4,2019-04-05,9.278022,307.648352,11.161538,97.318681,7.5,,


# Versioning

## Get list of file version available

In [8]:
# define the file in S3 bucket that you want to get version for
s3_fpath = '00_shared_data/usgs_nwis_01477050_fromfile.csv'
# get versions available
versions = s3_client.list_object_versions(Bucket=s3_bucket, Prefix=s3_fpath)
display(versions.get('Versions'))

[{'ETag': '"07de0739f00600345b46fa387f74d89a"',
  'Size': 24897,
  'StorageClass': 'STANDARD',
  'Key': '00_shared_data/usgs_nwis_01477050_fromfile.csv',
  'VersionId': 'WSymHRVvZhDELGYs_ycyoKpkH619xf60',
  'IsLatest': True,
  'LastModified': datetime.datetime(2021, 11, 19, 21, 57, 4, tzinfo=tzutc()),
  'Owner': {'DisplayName': 'gs-chs-dev-wma',
   'ID': '71818ee6aab503bfe46fc0d15fedb47b628eb41497ecfbc6b80b5096535847f3'}},
 {'ETag': '"07de0739f00600345b46fa387f74d89a"',
  'Size': 24897,
  'StorageClass': 'STANDARD',
  'Key': '00_shared_data/usgs_nwis_01477050_fromfile.csv',
  'VersionId': 'qXkbGuRVaNCZ6V8Z5GZN88MBD5h2dOah',
  'IsLatest': False,
  'LastModified': datetime.datetime(2021, 11, 19, 20, 47, 52, tzinfo=tzutc()),
  'Owner': {'DisplayName': 'gs-chs-dev-wma',
   'ID': '71818ee6aab503bfe46fc0d15fedb47b628eb41497ecfbc6b80b5096535847f3'}},
 {'ETag': '"07de0739f00600345b46fa387f74d89a"',
  'Size': 24897,
  'StorageClass': 'STANDARD',
  'Key': '00_shared_data/usgs_nwis_01477050_fromf

## Read a specific version and save to a local file

In [9]:
# define the S3 file and version that you want to retrieve
s3_fpath = '00_shared_data/usgs_nwis_01477050_fromfile.csv'
version = '35l6y2wEVuYpYn7ap1XscXCIXXeDpHtw'
# definte the local file path that you want to save this file to
local_fpath = f'./data/usgs_nwis_01477050_{version}.csv'
# retrieve the file
s3_client.download_file(s3_bucket, s3_fpath, local_fpath, ExtraArgs={'VersionId': version})

## Read a specific version into a pandas df

In [10]:
# define the S3 file and version that you want to retrieve
s3_fpath = '00_shared_data/usgs_nwis_01477050_fromfile.csv'
version = '35l6y2wEVuYpYn7ap1XscXCIXXeDpHtw'
# retrieve the file and read into a pandas df
obj = s3_client.get_object(
    Bucket=s3_bucket,
    Key=s3_fpath,
    VersionId=version,
)
data = obj.get("Body")
df = pd.read_csv(data, encoding='utf8')
df.head()

Unnamed: 0,datetime,121609_00010,121607_00095,121610_00300,121612_00301,121608_00400,243095_99133,243094_99134
0,2019-04-01,8.822222,316.944444,11.289189,97.361111,7.557576,,
1,2019-04-02,8.54375,314.114583,11.205208,96.0,7.50625,,
2,2019-04-03,8.834375,313.635417,11.20625,96.677083,7.519792,,
3,2019-04-04,9.365625,311.552083,11.161458,97.552083,7.503125,,
4,2019-04-05,9.278022,307.648352,11.161538,97.318681,7.5,,
