In [4]:
import os
from google.cloud import storage
from google.oauth2 import service_account
from dotenv import load_dotenv
load_dotenv()

def download_all_10k_texts(local_output_folder, service_account_path=None):
    """
    Downloads all .txt 10-K files stored in:
    company_details/EDGAR (US)/filings/
    and saves them into a local folder.
    """

    # Load service account path from .env if not provided
    if service_account_path is None:
        service_account_path = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")

    if not service_account_path or not os.path.exists(service_account_path):
        raise FileNotFoundError(
            f"‚ùå Service account file not found at: {service_account_path}"
        )

    # Create Firebase Storage client
    credentials = service_account.Credentials.from_service_account_file(service_account_path)
    client = storage.Client(credentials=credentials, project=credentials.project_id)

    bucket_name = "funwai-resume.firebasestorage.app"
    bucket = client.bucket(bucket_name)

    # Folder where your cleaned .txt files live inside Firebase Storage
    prefix = "company_details/EDGAR (US)"

    # Ensure local output folder exists
    os.makedirs(local_output_folder, exist_ok=True)

    print("üîç Checking for .txt files in bucket...")

    # List all files under prefix
    blobs = bucket.list_blobs(prefix=prefix)

    download_count = 0

    for blob in blobs:
        if blob.name.endswith(".txt"):  # Only download .txt files
            filename = blob.name.split("/")[-1]
            local_path = os.path.join(local_output_folder, filename)

            print(f"‚¨áÔ∏è Downloading {filename} ...")

            blob.download_to_filename(local_path)
            download_count += 1

    if download_count == 0:
        print("‚ö†Ô∏è No .txt files found in the specified folder.")
    else:
        print(f"‚úÖ Download complete. {download_count} files saved to {local_output_folder}")


# ---- Run the download ----

download_all_10k_texts(
    local_output_folder="./clean_10k_texts"   # choose your folder
)


üîç Checking for .txt files in bucket...
‚¨áÔ∏è Downloading AAPL_2025_10K.txt ...
‚¨áÔ∏è Downloading ABBV_2024_10K.txt ...
‚¨áÔ∏è Downloading ABNB_2024_10K.txt ...
‚¨áÔ∏è Downloading ABT_2024_10K.txt ...
‚¨áÔ∏è Downloading ACN_2025_10K.txt ...
‚¨áÔ∏è Downloading ADBE_2024_10K.txt ...
‚¨áÔ∏è Downloading ADI_2024_10K.txt ...
‚¨áÔ∏è Downloading AEP_2024_10K.txt ...
‚¨áÔ∏è Downloading AJG_2024_10K.txt ...
‚¨áÔ∏è Downloading AMAT_2024_10K.txt ...
‚¨áÔ∏è Downloading AMCR_2025_10K.txt ...
‚¨áÔ∏è Downloading AMD_2024_10K.txt ...
‚¨áÔ∏è Downloading AME_2024_10K.txt ...
‚¨áÔ∏è Downloading AMZN_2024_10K.txt ...
‚¨áÔ∏è Downloading AON_2024_10K.txt ...
‚¨áÔ∏è Downloading A_2024_10K.txt ...
‚¨áÔ∏è Downloading BAC_2024_10K.txt ...
‚¨áÔ∏è Downloading BALL_2024_10K.txt ...
‚¨áÔ∏è Downloading BA_2024_10K.txt ...
‚¨áÔ∏è Downloading BBY_2025_10K.txt ...
‚¨áÔ∏è Downloading BKNG_2024_10K.txt ...
‚¨áÔ∏è Downloading BKR_2024_10K.txt ...
‚¨áÔ∏è Downloading BMY_2024_10K.txt ...
‚¨áÔ∏è Downloading BRK-B_2024_10