In [3]:
import os
import subprocess
from datetime import datetime, timedelta
import logging
import time
from google.cloud import storage
import pyarrow.parquet as pq
import pandas as pd
from google.cloud import bigquery_datatransfer
from google.cloud import bigquery
from google.api_core.exceptions import NotFound
import geopandas as gpd

In [4]:
# Functions to check authentication key
# Function to check google authentication token and re-generate if it is expired/doesn't exist
def check_and_authenticate(json_path):
    '''
    Function to check google authentication token and re-generate if it is expired/doesn't exist
    '''
    try:
        if not os.path.exists(json_path):
            raise FileNotFoundError("Credentials file not found")
        # Get modification time of the file
        file_mod_time = datetime.fromtimestamp(os.path.getmtime(json_path))
        current_time = datetime.now()

        # Check if the file is older than 24 hours
        if current_time - file_mod_time > timedelta(hours=24):
            print("Credentials file is older than 24 hours. Re-authenticating...")

            # Re-authenticate
            try:
                print(f"Trying reauthentication on gcloud server using shell command...")
                subprocess.run("start cmd /c gcloud auth application-default login", shell=True, check=True)
                print('Login window opened...please complete authentication')
                
                # Poll for file modification
                print("Waiting for credentials file to update...")
                max_wait = 300  # seconds
                check_interval = 2  # seconds
                start_time = datetime.now()

                while (datetime.now() - start_time).total_seconds() < max_wait:
                    new_mod_time = datetime.fromtimestamp(os.path.getmtime(json_path))
                    if new_mod_time > file_mod_time:
                        print("Authentication confirmed! Credentials file updated.")
                        break
                    time.sleep(check_interval)
                else:
                    print("Timed out waiting for credentials file update.")

            except subprocess.CalledProcessError as e:
                print(f"Error during re-authentication: {e}")
            except Exception as e:
                print(f'Authentication failed because of {e}')
        else:
            print("Credentials file is valid.")
    except Exception as e:
        print(f"Error: {e}")

In [5]:
# Uploading to GCS
# First, validate the authentication token
CREDENTIALS_PATH =  r"C:\Users\eprashar\AppData\Roaming\gcloud\application_default_credentials.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(CREDENTIALS_PATH)

# Check and authenticate
check_and_authenticate(CREDENTIALS_PATH)

Credentials file is older than 24 hours. Re-authenticating...
Trying reauthentication on gcloud server using shell command...
Login window opened...please complete authentication
Waiting for credentials file to update...
Authentication confirmed! Credentials file updated.


In [None]:
# If not using a service account, initialize the client like this:
SOURCE_PROJECT_ID = "clgx-gis-app-uat-a0e0"
SOURCE_DATASET_ID = "proximity_parcels"

DESTINATION_PROJECT_ID = "clgx-gis-app-prd-364d"
DESTINATION_DATASET_ID = "proximity_parcels"
client = bigquery.Client(project=DESTINATION_PROJECT_ID)

# ==============================================================================
# Function to copy all tables from source dataset to destination dataset
# ==============================================================================
def copy_all_tables(
        source_project, 
        source_dataset, 
        dest_project, 
        dest_dataset,
        tables=None,
        overwrite=False
        ):
    """
   Copies tables from a source dataset to a destination dataset.
    - If 'table_list' is provided, it copies only those tables.
    - If 'table_list' is None, it copies all tables from the source.
    - It will automatically skip any materialized views.
    - If 'overwrite' is False, it will skip tables that already exist in the destination.
    """
    print(f"\n--- Starting Table Copy ---")
    print(f"From: {source_project}.{source_dataset}")
    print(f"To:   {dest_project}.{dest_dataset}")

    source_dataset_ref = f"{source_project}.{source_dataset}"
    
    try:
        # Determine which tables to process
        tables_to_process = []
        if tables:
            print(f"Processing a provided list of {len(tables)} tables.")
            tables_to_process = tables
        else:
            print("No table list provided. Fetching all tables from source dataset.")
            all_items = client.list_tables(source_dataset_ref)
            tables_to_process = [item.table_id for item in all_items]
            print(f"Found {len(tables_to_process)} items in source dataset.")

        for table_id in tables_to_process:
            source_table_ref_str = f"{source_project}.{source_dataset}.{table_id}"
        
            # Get the full table object to check its type
            try:
                table_obj = client.get_table(source_table_ref_str)
            except NotFound:
                print(f"  -> WARNING: Table '{table_id}' not found in source dataset. Skipping.")
                continue

            # Check the table_type
            if table_obj.table_type == "MATERIALIZED_VIEW":
                print(f"  -> Skipping: {table_id} (Type: MATERIALIZED_VIEW)")
                continue
            
            print(f"  -> Copying table: {table_id}...")
            dest_table_ref_str = f"{dest_project}.{dest_dataset}.{table_id}"

            # Additional check for overwrite functionality
            if not overwrite:
                try:
                    client.get_table(dest_table_ref_str)
                    # If get_table succeeds, the table exists.
                    print(f"  -> Skipping: Destination table '{dest_table_ref_str}' already exists and overwrite is False.")
                    continue
                except NotFound:
                    # Table doesn't exist, so we can proceed with the copy.
                    pass

            # Configure and start the copy job
            job_config = bigquery.CopyJobConfig(write_disposition="WRITE_TRUNCATE")
            copy_job = client.copy_table(
                source_table_ref_str,
                dest_table_ref_str,
                job_config=job_config,
            )
            copy_job.result()  # Wait for the job to complete
            print(f"      -> SUCCESS: Copied to {dest_table_ref_str}")

    except NotFound:
        print(f"ERROR: Source dataset '{source_dataset_ref}' not found.")
    except Exception as e:
        print(f"An unexpected error occurred during table copy: {e}")
        raise

In [17]:
# Copy tables from GIS UAT to PROD
tables_to_copy = [
    'roadways',
    'railways',
    'transmission_lines',
    'protected_lands_national',
    'wetlands'
    ]

# Run copy all tables
copy_all_tables(
    SOURCE_PROJECT_ID, 
    SOURCE_DATASET_ID, 
    DESTINATION_PROJECT_ID, 
    DESTINATION_DATASET_ID,
    tables=tables_to_copy,
    overwrite=False
)


--- Starting Table Copy ---
From: clgx-gis-app-uat-a0e0.proximity_parcels
To:   clgx-gis-app-prd-364d.proximity_parcels
Processing a provided list of 5 tables.
  -> Copying table: roadways...
  -> Skipping: Destination table 'clgx-gis-app-prd-364d.proximity_parcels.roadways' already exists and overwrite is False.
  -> Copying table: railways...
  -> Skipping: Destination table 'clgx-gis-app-prd-364d.proximity_parcels.railways' already exists and overwrite is False.
  -> Copying table: transmission_lines...
  -> Skipping: Destination table 'clgx-gis-app-prd-364d.proximity_parcels.transmission_lines' already exists and overwrite is False.
  -> Copying table: protected_lands_national...
  -> Skipping: Destination table 'clgx-gis-app-prd-364d.proximity_parcels.protected_lands_national' already exists and overwrite is False.
  -> Copying table: wetlands...
  -> Skipping: Destination table 'clgx-gis-app-prd-364d.proximity_parcels.wetlands' already exists and overwrite is False.
