In [1]:
from croniter import croniter
from tqdm import tqdm
from dotenv import load_dotenv

import json
import logging
import os
import re
from datetime import datetime, timedelta
import base64
import hashlib
from dataclasses import asdict

from log import UplightLogger
from manifest import ManifestType
from route import Route
from manifest_service import ManifestService
from route import Route
from utils import get_google_oauth_token, extract_bucket_from_path, Event
from gcs import GCS

# Workaround External Manifest File Generation

The PubSub Cloud Scheduler job is not triggering the external manifest generation so here we will use the same codebase to generate the external manifest file and directly upload it to GCP storage. 

## Config and Defaults for exelon_comed tenant

In [2]:
load_dotenv()
logger = UplightLogger("main-manual")

logger.eventName = "manual external manifest"
logger.eventStep = "Init"

# Enable GCP logging for Cloud Environment
# UplightLogger.enable_gcp_logging()
logger.log("Initiated")
logger.log("Request received for EXTERNAL MANIFEST file creation")

MANIFEST_VERSION = os.environ.get("manifest_version")
UIS_VERSION = os.environ.get("uis_version")
ENV = os.environ.get("env")
SOV = os.environ.get("sov")

TENANT_NAME = os.environ.get("tenant_name")
TENANT_ID = os.environ.get("tenant_id")
IN_BUCKET = f"uplight-{TENANT_NAME}-incoming-{ENV}-{SOV}"
DEFAULT_DELIM = "comma" # comma, tab, pipe, semicolon, other

manifest_routes = json.load(open(f"manifest_{ENV}_route.json", "r"))
manifest_routes = [route for route in manifest_routes if route["tenant_id"] == TENANT_ID]

FILE_CONFIG = {
    ".csv": {
        "file_type": "csv",
        "delimiter": "comma"
    },
    ".tsv": {
        "file_type": "tsv",
        "delimiter": "tab"
    }
}

file_obj = {
    "filename": "",
    "as_of_datetime": "",
    "file_type": "",
    "delimiter": "", 
    "md5_checksum": "",
    "encryption_algorithm": "",
    "compression_algorithm": "",
    "entity_type": "",
}

manifest_obj = {
    "metadata": {
        "manifest_version": MANIFEST_VERSION,
        "uis_version": UIS_VERSION,
    },
    "files": [],
}


# manually set the routes to have destination_location be the same as the incoming bucket
for route in manifest_routes:
    route["destination_bucket"] = f"uplight-{TENANT_NAME}-unvalidated-{ENV}-{SOV}"
    
source_bucket = IN_BUCKET
gcs = GCS(oauth_token=get_google_oauth_token())


monthly_itr = croniter("0 0 1 * *", datetime.now()) # 1st of every month
# use the previous month as the last run time to capture all files uploaded within the last month
last_run_time = monthly_itr.get_prev(datetime)
file_patterns = [route["key_pattern"] for route in manifest_routes]
print(f"fetched following file patterns: {file_patterns}")

fetched following file patterns: ['^customer[a-zA-Z0-9_-]*(\\.csv)', '^interval[a-zA-Z0-9_-]*(\\.csv)', '^billing[a-zA-Z0-9_-]*(\\.csv)']


In [3]:



latest_files = gcs.get_latest_files(source_bucket, last_run_time, file_patterns)
print(f"Found {len(latest_files)} files to process in {source_bucket}")
print(f"Latest files: {latest_files}")

req_obj = Event(bucket=source_bucket, files=latest_files,
                event_type="OBJECT_FINALIZE", batch_id="1234")

logger.log("Request ", level=logging.DEBUG, **asdict(req_obj))

# manifest_routes = [ManifestService(**route, events=[]) for route in manifest_routes]

route = Route(ENV)
route.trigger(req_obj)

if route.is_processed:
    logger.log("Request Processed Successfully")    
else:
    logger.log(f"Route not found for {req_obj.bucket}; Skipping", level=logging.ERROR)

Found 3 files to process in uplight-exelon-comed-incoming-dev-rotw
Latest files: ['billing_bat-13.csv', 'customer_bat-13.csv', 'interval_bat-13.csv']
Blob /tmp/exelon-comed_external_temp_2023-08-25 19:27:38.json has been copied to exelon-comed_external_manifest_2023-08-25 19:27:41.json
Removing the file /tmp/exelon-comed_external_temp_2023-08-25 19:27:38.json


## References

- [Github: Uplight/up-ds-external-manifest-handler](https://github.com/Uplight-Inc/up-ds-manifest-service)
- [Confluence: Avangrid External Manifest Generation](https://uplightinc.atlassian.net/wiki/spaces/DS/pages/6641288612/Avangrid+External+Manifest+generation)
- [Confluence: Manifest File Creation](https://uplightinc.atlassian.net/wiki/spaces/DS/pages/6544000926/Manifest+file+creation+WIP)
- [Confluence: Connect Ingest Pipeline End to End](https://uplightinc.atlassian.net/wiki/spaces/PE/pages/7029752062/Connect+Ingest+Pipeline+End+to+End)
- [Confluence: External Manifest Creation using Cloud function](https://uplightinc.atlassian.net/wiki/spaces/DS/pages/6625460269/External+Manifest+Creation+using+Cloud+function)