In [28]:
import os
import zipfile
from google.cloud import storage
import utils
import pyarrow.parquet as pq
import pandas as pd
from google.cloud import bigquery_datatransfer
from google.cloud import bigquery
from google.api_core.exceptions import NotFound
import geopandas as gpd
import fiona
import duckdb

In [2]:
# Uploading to GCS
# First, validate the authentication token
CREDENTIALS_PATH =  r"C:\Users\eprashar\AppData\Roaming\gcloud\application_default_credentials.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(CREDENTIALS_PATH)

# Verify credentials
utils.check_and_authenticate(CREDENTIALS_PATH)

Credentials file is older than 24 hours. Re-authenticating...
Trying reauthentication on gcloud server using shell command...
Login window opened...please complete authentication
Waiting for credentials file to update...
Authentication confirmed! Credentials file updated.


In [15]:
# If not using a service account, initialize the client like this:
SOURCE_PROJECT_ID = "clgx-gis-app-dev-06e3"
SOURCE_DATASET_ID = "encumbered_parcels"

DESTINATION_PROJECT_ID = "clgx-gis-app-uat-a0e0"
DESTINATION_DATASET_ID = "proximity_parcels"
client = bigquery.Client(project=DESTINATION_PROJECT_ID)

# ==============================================================================
#  HELPER FUNCTIONS
# ==============================================================================

def copy_all_tables(source_project, source_dataset, dest_project, dest_dataset):
    """
    Copies all tables from a source dataset to a destination dataset,
    skipping any materialized views.
    """
    print(f"\n--- Starting Table Copy ---")
    print(f"From: {source_project}.{source_dataset}")
    print(f"To:   {dest_project}.{dest_dataset}")

    source_dataset_ref = f"{source_project}.{source_dataset}"
    
    try:
        tables = client.list_tables(source_dataset_ref)
        print(f"Found {len(list(client.list_tables(source_dataset_ref)))} items in source dataset.")

        for table in tables:
            # --- FIX: Check the table type and skip materialized views ---
            if table.table_type == "MATERIALIZED_VIEW":
                print(f"  -> Skipping table: {table.table_id} (Type: MATERIALIZED_VIEW)")
                continue

            print(f"  -> Copying table: {table.table_id}...")
            
            source_table_ref = f"{source_project}.{source_dataset}.{table.table_id}"
            dest_table_ref = f"{dest_project}.{dest_dataset}.{table.table_id}"

            # Configure the copy job
            job_config = bigquery.CopyJobConfig()
            job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

            # Start the copy job
            copy_job = client.copy_table(
                source_table_ref,
                dest_table_ref,
                job_config=job_config,
            )
            copy_job.result()  # Wait for the job to complete
            print(f"     -> SUCCESS: Copied to {dest_table_ref}")

    except NotFound:
        print(f"ERROR: Source dataset '{source_dataset_ref}' not found.")
    except Exception as e:
        print(f"An unexpected error occurred during table copy: {e}")
        raise


In [16]:
# Copy all tables from the source dataset to the destination dataset
copy_all_tables(SOURCE_PROJECT_ID, SOURCE_DATASET_ID, DESTINATION_PROJECT_ID, DESTINATION_DATASET_ID)


--- Starting Table Copy ---
From: clgx-gis-app-dev-06e3.encumbered_parcels
To:   clgx-gis-app-uat-a0e0.proximity_parcels
Found 19 items in source dataset.
  -> Copying table: all_encumbrance_scores...
     -> SUCCESS: Copied to clgx-gis-app-uat-a0e0.proximity_parcels.all_encumbrance_scores
  -> Copying table: county_boundaries...
     -> SUCCESS: Copied to clgx-gis-app-uat-a0e0.proximity_parcels.county_boundaries
  -> Skipping table: parcels_mv (Type: MATERIALIZED_VIEW)
  -> Copying table: protected_lands_national...
     -> SUCCESS: Copied to clgx-gis-app-uat-a0e0.proximity_parcels.protected_lands_national
  -> Skipping table: protected_lands_national_mv (Type: MATERIALIZED_VIEW)
  -> Copying table: proximity_intersection_protected_lands_national...
     -> SUCCESS: Copied to clgx-gis-app-uat-a0e0.proximity_parcels.proximity_intersection_protected_lands_national
  -> Copying table: proximity_intersection_railways...
     -> SUCCESS: Copied to clgx-gis-app-uat-a0e0.proximity_parcels.p

In [28]:
def copy_all_routines(source_project, source_dataset, dest_project, dest_dataset):
    """
    Copies all routines (stored procedures) from a source dataset to a 
    destination dataset, replacing hardcoded project and dataset IDs in the body.
    """
    print(f"\n--- Starting Stored Procedure (Routine) Copy ---")
    print(f"From: {source_project}.{source_dataset}")
    print(f"To:   {dest_project}.{dest_dataset}")

    source_dataset_ref = f"{source_project}.{source_dataset}"
    
    try:
        routines = client.list_routines(source_dataset_ref)
        routine_ids = [routine.routine_id for routine in routines]
        print(f"Found {len(routine_ids)} routines to copy.")

        for routine_id in routine_ids:
            print(f"  -> Copying routine: {routine_id}...")
            
            source_routine_ref = f"{source_project}.{source_dataset}.{routine_id}"
            dest_routine_ref_str = f"{dest_project}.{dest_dataset}.{routine_id}"
            
            # Get the full definition of the source routine
            source_routine = client.get_routine(source_routine_ref)
            
            # 1. Get the original SQL body of the procedure
            updated_body = source_routine.body
            
            # 2. Define replacement pairs. This is more flexible.
            replacements = [
                # Replace the primary source project and dataset
                (f"{source_project}.{source_dataset}", f"{dest_project}.{dest_dataset}"),
                # Replace any other hardcoded dev projects with their UAT equivalents
                # ("clgx-idap-bigquery-dev-71f0", "clgx-idap-bigquery-uat-d3f3")
            ]

            # 3. Perform all replacements
            for old_string, new_string in replacements:
                updated_body = updated_body.replace(old_string, new_string)

            # 4. Use the updated body for the new routine
            source_routine.body = updated_body
            
            # Use the API representation for a more robust copy
            resource = source_routine.to_api_repr()

            # Update the reference to point to the new destination
            dest_routine_ref = bigquery.RoutineReference.from_string(dest_routine_ref_str)
            resource['routineReference'] = dest_routine_ref.to_api_repr()
            
            # Create a new Routine object from the modified resource
            dest_routine = bigquery.Routine.from_api_repr(resource)
            
            # Create (or replace) the routine in the destination dataset
            client.create_routine(dest_routine, exists_ok=True)
            print(f"     -> SUCCESS: Recreated routine at {dest_routine_ref_str} with updated references.")

    except NotFound:
        print(f"ERROR: Source dataset '{source_dataset_ref}' not found.")
    except Exception as e:
        print(f"An unexpected error occurred during routine copy: {e}")
        raise

In [29]:
# Copy all routines
copy_all_routines(SOURCE_PROJECT_ID, SOURCE_DATASET_ID, DESTINATION_PROJECT_ID, DESTINATION_DATASET_ID)


--- Starting Stored Procedure (Routine) Copy ---
From: clgx-gis-app-dev-06e3.encumbered_parcels
To:   clgx-gis-app-uat-a0e0.proximity_parcels
Found 16 routines to copy.
  -> Copying routine: calculate_intersection_score_for_polygons...
     -> SUCCESS: Recreated routine at clgx-gis-app-uat-a0e0.proximity_parcels.calculate_intersection_score_for_polygons with updated references.
  -> Copying routine: calculate_intersection_score_for_polygons_batch...
     -> SUCCESS: Recreated routine at clgx-gis-app-uat-a0e0.proximity_parcels.calculate_intersection_score_for_polygons_batch with updated references.
  -> Copying routine: calculate_intersection_score_polygons_batch...
     -> SUCCESS: Recreated routine at clgx-gis-app-uat-a0e0.proximity_parcels.calculate_intersection_score_polygons_batch with updated references.
  -> Copying routine: calculate_proximity_for_polygons...
     -> SUCCESS: Recreated routine at clgx-gis-app-uat-a0e0.proximity_parcels.calculate_proximity_for_polygons with upda

In [None]:
# The 'r' before the string is important to handle the backslashes correctly.
target_folder = r"C:\Users\eprashar\OneDrive - CoreLogic Solutions, LLC\github\jan_25_proj_infra_parcels\data\Wetlands"

print(f"Scanning for zip files in: {target_folder}\n")

# Loop through all the files in the target directory
for filename in os.listdir(target_folder):
    # Check if the file is a zip file
    if filename.endswith(".zip"):
        # Construct the full path to the zip file
        zip_path = os.path.join(target_folder, filename)
        print(f"Found zip file: {filename}")

        try:
            # Open the zip file for reading
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                # Extract all the contents into the target folder
                print(f" -> Extracting '{filename}'...")
                zip_ref.extractall(target_folder)
                print(" -> Extraction complete.")

            # If extraction was successful, delete the original zip file
            os.remove(zip_path)
            print(f" -> Successfully deleted '{filename}'.\n")

        except zipfile.BadZipFile:
            print(f" -> ERROR: '{filename}' is not a valid zip file or is corrupted. Skipping.")
        except Exception as e:
            print(f" -> ERROR: An unexpected error occurred with '{filename}': {e}")

print("--- All zip files processed. ---")