In [29]:
#!py -m pip install pyodbc pandas db-dtypes
#!py -m pip install google-cloud-bigquery google-auth google-auth-oauthlib pandas_gbq

In [30]:
import pyodbc
import pandas as pd
import pandas_gbq
import json
from google.cloud import bigquery

In [31]:
# Open and read the JSON file
with open('config/mom_db_config.json', 'r') as file:
    mom_db_config = json.load(file)

In [32]:
MOM_CONN_STR = (
    f"{mom_db_config['driver']}"
    f"{mom_db_config['server']}"
    f"{mom_db_config['database']}"
    f"{mom_db_config['user']}"
    f"{mom_db_config['password']}"
)
PROJECT_ID = "sturdy-tome-468208-k2"
DATASET_ID = "dev"
TABLE_IDS = {
    "orders" : "orders"
}

In [33]:
# Create connection
mom_conn = pyodbc.connect(MOM_CONN_STR)

In [34]:
last_order_time  = "2025-08-06 12:27:05.000"
last_order_num = 1588550

query = """
    SELECT
        orderno AS order_id,
        custnum AS customer_id,
        entrytime AS entry_time,
        ord_total AS order_total
        FROM CMS 
        -- WHERE orderno > ?
        ORDER BY order_id ASC
"""

df = pd.read_sql(query, mom_conn)#, params=[last_order_num])

  df = pd.read_sql(query, mom_conn)#, params=[last_order_num])


In [35]:
df.shape

(1510054, 4)

In [36]:
df.tail(10)

Unnamed: 0,order_id,customer_id,entry_time,order_total
1510044,1588610.0,260434.0,2025-08-06 14:57:06,92.0
1510045,1588611.0,605559.0,2025-08-06 15:06:23,100.0
1510046,1588612.0,935508.0,NaT,105.0
1510047,1588613.0,217199.0,2025-08-06 15:05:27,85.0
1510048,1588614.0,612917.0,2025-08-06 15:03:24,95.0
1510049,1588615.0,592428.0,2025-08-06 15:05:02,110.0
1510050,1588616.0,323786.0,2025-08-06 15:08:39,90.0
1510051,1588617.0,381457.0,2025-08-06 15:08:50,35.0
1510052,1588618.0,935854.0,2025-08-06 15:09:15,35.0
1510053,1588619.0,802832.0,2025-08-06 15:10:20,85.0


In [None]:
# We need to fix the null entrytimes, just backwards fill from the previous seen entrytime
df['entry_time'] = pd.to_datetime(df['entry_time'], errors='coerce')  # ensure datetime type
df['entry_time'] = df['entry_time'].fillna(method='ffill')  # forward-fill

  df['entry_time'] = df['entry_time'].fillna(method='bfill')  # forward-fill


In [38]:
df.tail(10)

Unnamed: 0,order_id,customer_id,entry_time,order_total
1510044,1588610.0,260434.0,2025-08-06 14:57:06,92.0
1510045,1588611.0,605559.0,2025-08-06 15:06:23,100.0
1510046,1588612.0,935508.0,2025-08-06 15:05:27,105.0
1510047,1588613.0,217199.0,2025-08-06 15:05:27,85.0
1510048,1588614.0,612917.0,2025-08-06 15:03:24,95.0
1510049,1588615.0,592428.0,2025-08-06 15:05:02,110.0
1510050,1588616.0,323786.0,2025-08-06 15:08:39,90.0
1510051,1588617.0,381457.0,2025-08-06 15:08:50,35.0
1510052,1588618.0,935854.0,2025-08-06 15:09:15,35.0
1510053,1588619.0,802832.0,2025-08-06 15:10:20,85.0


In [39]:
mom_conn.close()

In [40]:
#!gcloud auth application-default login

In [41]:
  # Initialize client (uses default credentials)
client = bigquery.Client(project=PROJECT_ID)

# Configure load job — let schema be auto-detected
job_config = bigquery.LoadJobConfig(
    autodetect=True,              # This makes schema generation automatic
    write_disposition="WRITE_APPEND"  # Or WRITE_TRUNCATE / WRITE_EMPTY
)

# Load DataFrame into BigQuery
#job = client.load_table_from_dataframe(
#    df,
#    f"{DATASET_ID}.{TABLE_IDS['orders']}",
#    job_config=job_config
#)
#job.result()  # Wait for the load to complete

print("Loaded {} rows into {}".format(df.shape[0], TABLE_IDS['orders']))

Loaded 1510054 rows into orders


In [42]:
query = f"""
SELECT * FROM `{DATASET_ID}.{TABLE_IDS['orders']}`
"""

# Run query and return results as a DataFrame
df = client.query(query).to_dataframe()

NotFound: 404 Not found: Dataset sturdy-tome-468208-k2:dev was not found in location US; reason: notFound, message: Not found: Dataset sturdy-tome-468208-k2:dev was not found in location US

Location: US
Job ID: acfde162-4cbe-4d01-aaac-4415f4054707


In [None]:
df.shape

(1510042, 5)

In [None]:
df.head(10)

Unnamed: 0,order_id,customer_id,entry_time,order_total,check_amount
0,2.0,42850.0,2004-03-31 12:52:53,4.0,4.0
1,4.0,42850.0,2004-03-31 15:43:24,11.75,11.75
2,12.0,42873.0,2004-04-01 14:50:33,41.97,41.97
3,14.0,42874.0,2004-04-01 15:52:40,26.97,26.97
4,15.0,42875.0,2004-04-01 16:10:36,26.97,26.97
5,23.0,42886.0,2004-04-02 10:23:12,17.98,17.98
6,26.0,42888.0,2004-04-02 11:45:34,19.99,19.99
7,35.0,42895.0,2004-04-02 14:15:04,26.97,26.97
8,37.0,42898.0,2004-04-02 16:08:21,26.97,26.97
9,38.0,42904.0,2004-04-05 12:09:29,31.98,31.98
