In [12]:
# Import necessary libraries
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd

# Define your service account file path and your project ID
service_account_file = 'adamh-wedge-project-6f59b14d0763.json'
project_id = 'adamh-wedge-project'

# Define your credentials and your client
credentials = service_account.Credentials.from_service_account_file(service_account_file)
client = bigquery.Client(credentials=credentials, project=project_id)

# Define your dataset and table
dataset_id = 'wedge_transactions'
table_id = 'trans*'

# Build a SQL query to get a list of unique owners excluding card_no==3.0
sql_query = f"""
SELECT DISTINCT card_no
FROM `{project_id}.{dataset_id}.{table_id}`
where card_no != 3.0
"""

# Execute the query
query_job = client.query(sql_query)

# Convert the query job into a DataFrame
df_owners = query_job.to_dataframe()


In [13]:
# Take a sample of the owners
sample_size = 450  # Adjust this value as needed
random_seed = 42  # Set a random seed
df_sample_owners = df_owners.sample(n=sample_size, random_state=random_seed)

# For each owner in the sample, get all their records
df_records = pd.DataFrame()

# Create a string of unique card numbers separated by commas
card_no_string = ', '.join(map(str, df_sample_owners['card_no'].tolist()))

# Build a SQL query to get all records for the owners in the sample
sql_query = f"""
SELECT *
FROM `{project_id}.{dataset_id}.{table_id}`
WHERE card_no IN ({card_no_string})
"""

# Execute the query and convert the result into a DataFrame
df_records = client.query(sql_query).to_dataframe()

# Print the count of unique owners in the sample
print(f"Number of unique owners in the sample: {len(df_sample_owners)}")

# Save the records to a CSV file
csv_file = 'sample_owner_records.csv'
df_records.to_csv(csv_file, index=False)

import os

# Calculate the size of the CSV file in MB
file_size_MB = os.path.getsize(csv_file) / (1024 * 1024)

# Print the size of the CSV file
print(f"Size of the CSV file: {file_size_MB:.2f} MB")

Number of unique owners in the sample: 450
Size of the CSV file: 228.30 MB
