# SHOW A LIST OF DATA ON THE BUCKET

In [1]:
from google.cloud import storage
import os

GCP_PROJ = os.getenv("GCP_PROJECT_ID")
GCP_KEY_FILE = os.getenv("GCP_KEY_FILE")

bucket_name = "cgpdata"

In [2]:
def list_blobs(bucket_name):
    """Lists all the blobs in the bucket."""
    # bucket_name = "your-bucket-name"

    storage_client = storage.Client.from_service_account_json(GCP_KEY_FILE)

    # Note: Client.list_blobs requires at least package version 1.17.0.
    blobs = storage_client.list_blobs(bucket_name)

    # Note: The call returns a response only when the iterator is consumed.
    for blob in blobs:
        print(blob.name)

In [3]:
list_blobs(bucket_name)

gold/
hist/
hist/2022-8-SPY_5min_alphavantage.csv
hist/2022-9-SPY_5min_alphavantage.csv
hist/2023-1-SPY_5min_alphavantage.csv
hist/2023-10-SPY_5min_alphavantage.csv
hist/2023-11-SPY_5min_alphavantage.csv
hist/2023-12-SPY_5min_alphavantage.csv
hist/2023-2-SPY_5min_alphavantage.csv
hist/2023-3-SPY_5min_alphavantage.csv
hist/2023-4-SPY_5min_alphavantage.csv
hist/2023-5-SPY_5min_alphavantage.csv
hist/2023-6-SPY_5min_alphavantage.csv
hist/2023-7-SPY_5min_alphavantage.csv
hist/2023-8-SPY_5min_alphavantage.csv
hist/2023-9-SPY_5min_alphavantage.csv
raw/
raw/2000-01-SPY_5min_alphavantage.csv
raw/2025-02-SPY-5min-alphavantage.csv
raw/2025-03-SPY-5min-alphavantage.csv
raw/2025-03-^GSPC-5min-yfinance.csv
silver/


## LIST OBJECTS WITH A PREFIX (i.e. folder name)

In [4]:

def list_blobs_with_prefix(bucket_name, prefix, delimiter=None):
    """Lists all the blobs in the bucket that begin with the prefix.

    This can be used to list all blobs in a "folder", e.g. "public/".

    The delimiter argument can be used to restrict the results to only the
    "files" in the given "folder". Without the delimiter, the entire tree under
    the prefix is returned. For example, given these blobs:

        a/1.txt
        a/b/2.txt

    If you specify prefix ='a/', without a delimiter, you'll get back:

        a/1.txt
        a/b/2.txt

    However, if you specify prefix='a/' and delimiter='/', you'll get back
    only the file directly under 'a/':

        a/1.txt

    As part of the response, you'll also get back a blobs.prefixes entity
    that lists the "subfolders" under `a/`:

        a/b/
    """

    storage_client = storage.Client.from_service_account_json(GCP_KEY_FILE)

    # Note: Client.list_blobs requires at least package version 1.17.0.
    blobs = storage_client.list_blobs(bucket_name, prefix=prefix, delimiter=delimiter)

    # Note: The call returns a response only when the iterator is consumed.
    print("Blobs:")
    for blob in blobs:
        print(blob.name)

    if delimiter:
        print("Prefixes:")
        for prefix in blobs.prefixes:
            print(prefix)


In [5]:
prefix = "raw"
list_blobs_with_prefix(bucket_name, prefix)

Blobs:
raw/
raw/2000-01-SPY_5min_alphavantage.csv
raw/2025-02-SPY-5min-alphavantage.csv
raw/2025-03-SPY-5min-alphavantage.csv
raw/2025-03-^GSPC-5min-yfinance.csv


In [6]:
prefix = "raw/yf_sp500/"
list_blobs_with_prefix(bucket_name, prefix)

Blobs:


In [11]:
blob_name = "raw/2000-01-SPY_5min_alphavantage.csv"

In [None]:
# blob name must include prefix
def blob_metadata(bucket_name, blob_name):
    """Prints out a blob's metadata."""
    # bucket_name = 'your-bucket-name'
    # blob_name = 'your-object-name'

    storage_client = storage.Client.from_service_account_json(GCP_KEY_FILE)
    bucket = storage_client.bucket(bucket_name)

    # Retrieve a blob, and its metadata, from Google Cloud Storage.
    # Note that `get_blob` differs from `Bucket.blob`, which does not
    # make an HTTP request.
    blob = bucket.get_blob(blob_name)

    print(f"Blob: {blob.name}")
    print(f"Bucket: {blob.bucket.name}")
    print(f"Storage class: {blob.storage_class}")
    print(f"ID: {blob.id}")
    print(f"Size: {blob.size} bytes")
    print(f"Updated: {blob.updated}")
    print(f"Generation: {blob.generation}")
    print(f"Metageneration: {blob.metageneration}")
    print(f"Etag: {blob.etag}")
    print(f"Owner: {blob.owner}")
    print(f"Component count: {blob.component_count}")
    print(f"Crc32c: {blob.crc32c}")
    print(f"md5_hash: {blob.md5_hash}")
    print(f"Cache-control: {blob.cache_control}")
    print(f"Content-type: {blob.content_type}")
    print(f"Content-disposition: {blob.content_disposition}")
    print(f"Content-encoding: {blob.content_encoding}")
    print(f"Content-language: {blob.content_language}")
    print(f"Metadata: {blob.metadata}")
    print(f"Medialink: {blob.media_link}")
    print(f"Custom Time: {blob.custom_time}")
    print("Temporary hold: ", "enabled" if blob.temporary_hold else "disabled")
    print(
        "Event based hold: ",
        "enabled" if blob.event_based_hold else "disabled",
    )
    print(f"Retention mode: {blob.retention.mode}")
    print(f"Retention retain until time: {blob.retention.retain_until_time}")
    if blob.retention_expiration_time:
        print(
            f"retentionExpirationTime: {blob.retention_expiration_time}"
        )

In [13]:
blob_metadata(bucket_name, blob_name)

Blob: raw/2000-01-SPY_5min_alphavantage.csv
Bucket: cgpdata
Storage class: STANDARD
ID: cgpdata/raw/2000-01-SPY_5min_alphavantage.csv/1741290020391016
Size: 96602 bytes
Updated: 2025-03-06 19:40:20.422000+00:00
Generation: 1741290020391016
Metageneration: 1
Etag: COiQ9+ia9osDEAE=
Owner: None
Component count: None
Crc32c: ApH7Zw==
md5_hash: qlhZiM4qXCEeMJYZ6qY3oA==
Cache-control: None
Content-type: text/csv
Content-disposition: None
Content-encoding: None
Content-language: None
Metadata: None
Medialink: https://storage.googleapis.com/download/storage/v1/b/cgpdata/o/raw%2F2000-01-SPY_5min_alphavantage.csv?generation=1741290020391016&alt=media
Custom Time: None
Temporary hold:  disabled
Event based hold:  disabled
Retention mode: None
Retention retain until time: None
