<a href="https://colab.research.google.com/github/Birkbeck/msc-projects-2023-4-chorltonm/blob/main/big-query/export_bigquery_objects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import Libaries
import os
import sys
import json
import pandas as pd
import numpy as np
import importlib
import pandas_gbq

# Import Google Cloud Libraires
from google.cloud import bigquery
from google.oauth2 import service_account
from google.colab import drive
from google.colab import userdata


In [None]:
# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Change default ouput directory
os.chdir('/content/drive/MyDrive/birkbeck_msc-project/output_files')



In [None]:
# Authentication credentials and keys

# Google Service Account

# Load the JSON key from local Google Collab file
key = json.load(open('/content/drive/MyDrive/service_account.json', 'r'))

# Authenticate using the loaded key
credentials = service_account.Credentials.from_service_account_info(key)


In [None]:
import os
from google.cloud import bigquery
from google.oauth2 import service_account


def save_objects_as_sql(project_id, dataset_id, output_directory, credentials):
    client = bigquery.Client(credentials=credentials, project=project_id)
    dataset_ref = client.dataset(dataset_id)

    # Export tables and views
    tables = list(client.list_tables(dataset_ref))
    for table in tables:
        table_ref = client.get_table(table)

        if table_ref.table_type == 'VIEW':
            object_query = table_ref.view_query
            #object_query = textwrap.dedent(str(object_query)).strip()
            create_statement = f"""CREATE OR REPLACE VIEW `{project_id}.{dataset_id}.{table.table_id}` AS

{object_query}
"""
        elif table_ref.table_type == 'TABLE':
            # Generate CREATE TABLE statement
            schema_fields = []
            for field in table_ref.schema:
                field_type = field.field_type
                if field_type == 'INTEGER':
                    field_type = 'INT64'
                elif field_type == 'FLOAT':
                    field_type = 'FLOAT64'
                schema_fields.append(f"`{field.name}` {field_type}")
            schema_string = ",\n    ".join(schema_fields)
            create_statement = f"""CREATE OR REPLACE TABLE `{project_id}.{dataset_id}.{table.table_id}`

(
{schema_string}
)
"""
        else:
            continue  # Skip other types of objects

        filename = f"{table.table_id}.sql"
        file_path = os.path.join(output_directory, filename)
        with open(file_path, 'w') as f:
            f.write(create_statement)

        print(f"Saved {table_ref.table_type.lower()} definition for {dataset_id} {table.table_id} to {file_path}")

    # Export stored procedures
    query = f"""
    SELECT routine_name, routine_definition
    FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.ROUTINES`
    WHERE routine_type = 'PROCEDURE'
    """
    query_job = client.query(query)
    results = query_job.result()

    for row in results:
        procedure_name = row['routine_name']
        procedure_definition = row['routine_definition']

        create_statement = f"""CREATE OR REPLACE PROCEDURE `{project_id}.{dataset_id}.{procedure_name}` ()

{procedure_definition}
"""

        filename = f"{procedure_name}.sql"
        file_path = os.path.join(output_directory, filename)
        with open(file_path, 'w') as f:
            f.write(create_statement)

        print(f"Saved stored procedure definition for {dataset_id} {procedure_name} to {file_path}")

datasets = ['extract_layer', 'preparation_layer', 'analysis_layer']

for dataset in datasets:
      dataset_id = dataset
      project_id = 'birkbeck-msc-project-422917'
      output_directory = '/content/drive/MyDrive/birkbeck_msc-project/output_files'

      # Ensure the output directory exists
      os.makedirs(output_directory, exist_ok=True)

      save_objects_as_sql(project_id, dataset_id, output_directory, credentials)

In [None]:
def export_table_data_as_json(project_id, dataset_id, output_directory, credentials):
    client = bigquery.Client(credentials=credentials, project=project_id)
    dataset_ref = client.dataset(dataset_id)

    # Export tables
    tables = list(client.list_tables(dataset_ref))
    for table in tables:
        table_ref = client.get_table(table)

        if table_ref.table_type == 'TABLE':
            # Query to get all data from the table
            query = f"SELECT * FROM `{project_id}.{dataset_id}.{table.table_id}`"
            query_job = client.query(query)
            results = query_job.result()

            # Convert results to list of dictionaries
            data = [dict(row) for row in results]

            # Save as JSON
            filename = f"{table.table_id}_data.json"
            file_path = os.path.join(output_directory, filename)
            with open(file_path, 'w') as f:
                json.dump(data, f, default=str, indent=2)

            print(f"Saved table data for {table.table_id} to {file_path}")

datasets = ['extract_layer', 'preparation_layer', 'analysis_layer']

for dataset in datasets:
      dataset_id = dataset
      project_id = 'birkbeck-msc-project-422917'
      output_directory = '/content/drive/MyDrive/birkbeck_msc-project/output_files'

      # Ensure the output directory exists
      os.makedirs(output_directory, exist_ok=True)
      export_table_data_as_json(project_id, dataset_id, output_directory, credentials)