In [7]:
from google.cloud import bigquery
import pandas as pd
import json

In [5]:
def find_common_columns_json(project_id, dataset_id, table1_id, table2_id):
    """
    Finds the intersection of columns between two BigQuery tables and returns them in JSON format.

    Args:
        project_id (str): The ID of your Google Cloud project.
        dataset_id (str): The ID of the dataset containing the tables.
        table1_id (str): The ID of the first table.
        table2_id (str): The ID of the second table.

    Returns:
        str: A JSON string containing the common columns in the desired format.
        Returns None if there is an error.
    """
    client = bigquery.Client(project=project_id)

    try:
        table1_ref = client.dataset(dataset_id).table(table1_id)
        table2_ref = client.dataset(dataset_id).table(table2_id)

        table1 = client.get_table(table1_ref)
        table2 = client.get_table(table2_ref)

        table1_columns = set([field.name for field in table1.schema])
        table2_columns = set([field.name for field in table2.schema])

        common_columns = list(table1_columns.intersection(table2_columns))
        result = {"metrics_to_compare": common_columns}
        return json.dumps(result, indent=2)  # Convert to JSON with indentation

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [12]:
def copy_to_clipboard(text):
    """
    Copies the given text to the clipboard.

    Args:
        text (str): The text to copy.
    """
    try:
        pyperclip.copy(text)
        print("Text copied to clipboard.")
    except pyperclip.PyperclipException as e:
        print(f"Error copying to clipboard: {e}")
        print("Pyperclip may not be installed or your system may not have a clipboard mechanism.")
        print("Install pyperclip with: pip install pyperclip")

In [8]:
common_cols_json = find_common_columns_json(project_id, dataset_id, table1_id, table2_id)

if common_cols_json:
    print(common_cols_json)
else:
    print("Could not retrieve common columns")

{
  "metrics_to_compare": [
    "last_touch_channel",
    "raw_page_url",
    "unscrubbed_url",
    "data_source_group",
    "high_value_visit_had_2_pageviews",
    "digital_property",
    "region",
    "last_non_direct_channel",
    "city",
    "engaged_sessions",
    "first_touch_channel",
    "is_high_value_visit",
    "engagement_time",
    "last_non_direct_subchannel",
    "site_locale",
    "first_touch_subchannel",
    "sub_region",
    "contained_auth0_pageview",
    "contained_www_pageview",
    "last_touch_medium",
    "device_browser_version",
    "landing_page_site_version",
    "last_non_direct_channel_category",
    "device_browser",
    "has_null_user_pseudo_id",
    "country",
    "device_language",
    "data_source",
    "last_touch_source_medium",
    "conversions",
    "contained_auth0_docs_pageview",
    "contained_support_signon",
    "contained_auth0_blog_pageview",
    "first_touch_source_medium",
    "is_qualified_session",
    "last_non_direct_campaign",
    "f

In [13]:
!pip install pyperclip



In [11]:
import pyperclip

In [14]:
pyperclip.copy(common_cols_json)

In [4]:
# Example usage:
project_id = "okta-ga-rollup"  # Replace with your project ID
dataset_id = "dbt_prod_ga4_reporting"
table1_id = "ga4__content_with_ua_union"
table2_id = "ga4__traffic_with_ua_union"

common_cols = find_common_columns(project_id, dataset_id, table1_id, table2_id)

if common_cols:
    print("Common columns:")
    for col in common_cols:
        print(col)
else:
    print("Could not retrieve common columns")

#Optional: Display as a pandas DataFrame.
if common_cols:
    df = pd.DataFrame(common_cols, columns = ['Common Columns'])
    display(df)

Common columns:
last_touch_channel
raw_page_url
unscrubbed_url
data_source_group
high_value_visit_had_2_pageviews
digital_property
region
last_non_direct_channel
city
engaged_sessions
first_touch_channel
is_high_value_visit
engagement_time
last_non_direct_subchannel
site_locale
first_touch_subchannel
sub_region
contained_auth0_pageview
contained_www_pageview
last_touch_medium
device_browser_version
landing_page_site_version
last_non_direct_channel_category
device_browser
has_null_user_pseudo_id
country
device_language
data_source
last_touch_source_medium
conversions
contained_auth0_docs_pageview
contained_support_signon
contained_auth0_blog_pageview
first_touch_source_medium
is_qualified_session
last_non_direct_campaign
first_touch_medium
fiscal_month
fiscal_quarter
contained_sec_prop_pageview
last_touch_channel_category
year_month
high_value_visit_had_conversion
state
last_non_direct_medium
visitor_type
fiscal_year
high_value_visits
last_non_direct_source_medium
had_login_click
last_n

Unnamed: 0,Common Columns
0,last_touch_channel
1,raw_page_url
2,unscrubbed_url
3,data_source_group
4,high_value_visit_had_2_pageviews
...,...
65,contained_auth0_quickstarts_pageview
66,metro
67,user_type
68,device_viewport_resolution
