In [3]:
import requests
import json
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta, timezone
import pandas as pd
import polars as pl

# Load the environment variables from the .env file
load_dotenv()

# Define the parameters for the API requests
CLIENT_ID = os.environ.get("CLIENT_ID")
CLIENT_SECRET = os.environ.get("CLIENT_SECRET")
REDIRECT_URI = os.environ.get("REDIRECT_URI")
RESTAURANT_ID = os.environ.get("RESTAURANT_ID")
ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN")
REFRESH_TOKEN = os.environ.get("REFRESH_TOKEN")
EXPIRES_IN = os.environ.get("EXPIRES_IN")
EXPIRES_AT = os.environ.get("EXPIRES_AT")

# Set API details
# Use the access token to interact with the Dinlr API
base_url = "https://api.dinlr.com/v1"
aheaders = {
    "Authorization": f"Bearer {ACCESS_TOKEN}"
}

rheaders = {'Content-Type': 'application/x-www-form-urlencoded'}

lTUG_ID = 'a7e56eeb-98db-402d-bea1-1ee35d54a4fd'
lEVENT_ID = '6bf38c18-a852-439b-aa54-007547f1bb6a'

# Send the GET request to the locations endpoint
# response = requests.get(f"https://api.dinlr.com/v1/{RESTAURANT_ID}/onlineorder/locations", headers=aheaders)
# data = response.json()

def convert_to_datetime(date_string):
    return datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S")

def is_token_expired():
    # Check if the access token has expired
    return datetime.now() >= convert_to_datetime(EXPIRES_AT)

# try:
#     if is_token_expired():
#         # Step 4: Request a new access token using the refresh token
#         # Define the parameters for the POST request
#         params = {
#             "grant_type": "refresh_token",
#             "client_id": CLIENT_ID,
#             "client_secret": CLIENT_SECRET,
#             "refresh_token": REFRESH_TOKEN
#         }

#         # Send the POST request to the token endpoint
#         response = requests.post(f"{base_url}/{RESTAURANT_ID}/oauth/token", data=params, headers=rheaders)

#         # Parse the JSON response and extract the new access token
#         data = response.json()
#         ACCESS_TOKEN = data["access_token"]
#         REFRESH_TOKEN = data["refresh_token"]
#         EXPIRES_IN = data["expires_in"]

#         # Update the .env file with the new access token and refresh token
#         os.environ["ACCESS_TOKEN"] = ACCESS_TOKEN
#         os.environ["EXPIRES_IN"] = EXPIRES_IN
#         os.environ["REFRESH_TOKEN"] = REFRESH_TOKEN

#         # Declare EXPIRES_AT environment variable
#         os.environ["EXPIRES_AT"] = str(datetime.now() + timedelta(seconds=int(EXPIRES_IN)))
        
#         # export to .env file
#         with open('.env', 'w') as f:
#             for key, value in os.environ.items():
#                 f.write(f"{key}={value}\n")

# except Exception as e:
#     # Handle any exceptions that may occur
#     print(f"An error occurred: {str(e)}")
#     print(f"Error type: {type(e).__name__}")
#     print(f"Error context: {e.args}")


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Defining the functions to get order and order details.

In [2]:
# Function to get all orders
def get_all_orders(location_id, all=True, update_at_min=None, create_at_min=None, create_at_max=None, page=1):
    page = 1
    orders = []
    
    # If no update_at_min is provided, get all orders
    if all:
        try:
            while True:
                url = f"{base_url}/{RESTAURANT_ID}/onlineorder/orders?location_id={location_id}&page={page}"
                response = requests.get(url, headers=aheaders)
                data = response.json()["data"]
                
                if not data:
                    break
                
                orders.extend(data)
                page += 1
                
        except Exception as e:
            print(f"An error occurred: {str(e)}")

    # If update_at_min is provided, get orders updated after the specified time
    # Update + sign with %2B for update_at_min
    if update_at_min:
        update_at_min = update_at_min.replace("+", "%2B")
        while True:
            url = f"{base_url}/{RESTAURANT_ID}/onlineorder/orders?location_id={location_id}&update_at_min={update_at_min}&page={page}"
            response = requests.get(url, headers=aheaders)
            data = response.json()["data"]
            
            if not data:
                break
            
            orders.extend(data)
            page += 1

    # If create_at_min is provided, get orders created after the specified time along with create_at_max
    if create_at_min:
        # add 32 days to create_at_min
        if not create_at_max:
            create_at_max = create_at_min + pd.Timedelta(days=31)
        else:
            create_at_max = create_at_max.strftime("%Y-%m-%dT%H:%M:%S%z")

        create_at_min = create_at_min.strftime("%Y-%m-%dT%H:%M:%S%z")
        create_at_max = create_at_max.strftime("%Y-%m-%dT%H:%M:%S%z")
        create_at_max = create_at_max.replace("+", "%2B")
        create_at_min = create_at_min.replace("+", "%2B")
        while True:
            url = f"{base_url}/{RESTAURANT_ID}/onlineorder/orders?location_id={location_id}&create_at_min={create_at_min}&create_at_max={create_at_max}&page={page}"
            response = requests.get(url, headers=aheaders)
            data = response.json()["data"]
            
            if not data:
                break
            
            orders.extend(data)
            page += 1
    
    return orders

# Function to get order details
def get_order_details(order_id):
    url = f"{base_url}/{RESTAURANT_ID}/onlineorder/orders/{order_id}"
    response = requests.get(url, headers=aheaders)
    order_details = response.json()["data"]
    return order_details

In [3]:
# Read last item of json data into a pandas dataframe
master_TUG = pd.read_json('TUG_orders_2024-02-29T21:01:11+0800.json')
master_EVENT = pd.read_json('EVENT_orders_2024-02-24T23:32:45+0800.json')

In [4]:
# get last 'updated_at' date
last_created_TUG = master_TUG['created_at'].max() + pd.Timedelta(seconds=1)
last_created_EVENT = master_EVENT['created_at'].max() + pd.Timedelta(seconds=1)

# convert last_created to string in ISO 8601 format: "2024-02-25T02:00:15+08:00"
# last_created_TUG = last_created_TUG.strftime("%Y-%m-%dT%H:%M:%S%z")
# last_created_EVENT = last_created_EVENT.strftime("%Y-%m-%dT%H:%M:%S%z")
# print(f"Last created TUG: {last_created_TUG}")
# print(f"Last created EVENT: {last_created_EVENT}")

In [4]:
# get order for TUG at last updated date using the API
TUG_orders = get_all_orders(lTUG_ID, all=False, create_at_min=last_created_TUG)


NameError: name 'get_all_orders' is not defined

In [6]:
TUG_orders

[{'id': 'BBDF75BA-C5D4-4929-BC1E-D1248E7AE729',
  'customer': None,
  'order_no': '240301XVZA',
  'order_ticket': None,
  'dining_option': '6b1c80f3-6184-457e-a568-8d7346742023',
  'subtotal': 30,
  'total': 30,
  'rounding': 0,
  'paid': 30,
  'status': 'closed',
  'financial_status': 'paid',
  'kitchen_status': None,
  'expedite_status': None,
  'updated_at': '2024-03-01T14:12:57+08:00',
  'created_at': '2024-03-01T14:12:32+08:00',
  'objects': [{'object': '27cf2862-1e20-4a43-841d-193cac283daa',
    'name': 'DINE IN'}]},
 {'id': '338A8E27-3AF3-4DFA-B2A1-627B2CA2864A',
  'customer': None,
  'order_no': '240301N0G4',
  'order_ticket': None,
  'dining_option': '6b1c80f3-6184-457e-a568-8d7346742023',
  'subtotal': 13,
  'total': 13,
  'rounding': 0,
  'paid': 13,
  'status': 'closed',
  'financial_status': 'paid',
  'kitchen_status': None,
  'expedite_status': None,
  'updated_at': '2024-03-01T14:24:53+08:00',
  'created_at': '2024-03-01T14:24:24+08:00',
  'objects': [{'object': '27cf286

In [9]:
# Fetch all orders and their details
TUG_orders = get_all_orders(lTUG_ID)
EVENT_orders = get_all_orders(lEVENT_ID)

TUG_all_order_details = [get_order_details(order["id"]) for order in TUG_orders]
EVENT_all_order_details = [get_order_details(order["id"]) for order in EVENT_orders]

# Dump details to json with last_created as suffix in the filename
with open(f'TUG_orders_{last_created_TUG}.json', 'w') as f:
    json.dump(TUG_all_order_details, f)

with open(f'EVENT_orders_{last_created_EVENT}.json', 'w') as f:
    json.dump(EVENT_all_order_details, f)


# Convert to DataFrame
TUG_df_orders = pd.DataFrame(TUG_all_order_details)
EVENT_df_orders = pd.DataFrame(EVENT_all_order_details)

We will first store the raw data into JSON for tracing. After this step, we will proceed to curate the data into separate tables.

In [44]:
parsed = json.loads(TUG_df_orders.to_json(orient="records"))
with open("TUG_orders.json", "w") as json_file:
    json.dump(parsed, json_file, indent=4)

parsed = json.loads(EVENT_df_orders.to_json(orient="records"))
with open("EVENT_orders.json", "w") as json_file:
    json.dump(parsed, json_file, indent=4)

In [1]:
# # Curating data...
# # Define a function to unnest and create separate tables
# def unnest_json(df, field_name):
#     return df.select(pl.col(field_name).arr.flatten().alias(field_name)).explode(field_name)

# # Define a function to obtain json keys that has nested arrays
# def get_nested_keys(json_data):
#     nested_keys = []
#     for key, value in json_data.items():
#         if isinstance(value, list):
#             nested_keys.append(key)
#     return nested_keys

# def check_nested_keys(json_data, parent_key=None):
#     nested_keys = []
#     for key, value in json_data.items():
#         if isinstance(value, list):
#             nested_keys.append((parent_key, key))
#             for item in value:
#                 nested_keys.extend(check_nested_keys(item, key))
#         elif isinstance(value, dict):
#             nested_keys.append((parent_key, key))
#             nested_keys.extend(check_nested_keys(value, key))
#     return nested_keys

# nested_keys = check_nested_keys(json_data)

# def get_order_schema(all_order_details):
#     # Load the JSON data into Polars
#     df = pl.DataFrame(all_order_details)

#     # Get the nested keys
#     nested_keys = get_nested_keys(all_order_details[0])

#     # Create separate tables for each nested keys
#     tables = {}
#     for key in nested_keys:
#         tables[key] = unnest_json(df, key)

#     return tables



# # Call the function with TUG_all_order_details
# process_order_details(TUG_all_order_details)

NameError: name 'process_order_details' is not defined

In [37]:
TUG_df_orders_items = pd.json_normalize(TUG_df_orders['items'].explode())

Save to parquet / json

In [None]:
# # Save to Parquet with GZIP compression
# parquet_file = '/mnt/data/orders.parquet.gzip'
# df_orders.to_parquet(parquet_file, compression='gzip')