# Task 1 - Querying the Metronome API
 - Query the Metronome API to retrieve relevant customer data.
 - Process the retrieved data to generate a summary report (csv).
 - Include essential customer information such as customer name, customer invoice balance, credit balance, etc. 
 - Process the report to a single csv

In [1]:
import pandas as pd
from dotenv import load_dotenv
import os
from utils import get_customers, load_and_process_data, get_customer_invoices, get_credit_balances, models_to_dicts
import json
from pathlib import Path

load_dotenv()

# Data directories
DATA_DIR = Path("data")
RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
# Create directories if they don't exist
# Skip if they do
DATA_DIR.mkdir(exist_ok=True)
RAW_DATA_DIR.mkdir(exist_ok=True)
PROCESSED_DATA_DIR.mkdir(exist_ok=True)

customers_csv = PROCESSED_DATA_DIR / "customer_list.csv"
customer_invoices_csvs = PROCESSED_DATA_DIR / "invoices.csv"
customer_credit_balances_csv = PROCESSED_DATA_DIR / "credit_balances.csv"




In [2]:
# Preload customer data for selection tab
customer_list = get_customers()
customer_list_dicts = models_to_dicts(customer_list)
# Save customer list to file
with open(RAW_DATA_DIR / "customer_list.json", "w") as f:
    json.dump(customer_list_dicts, f)
# Save to csv   
customer_list_df = pd.DataFrame(customer_list_dicts)
customer_list_df.to_csv(customers_csv, index=False)

{}


In [3]:
# Get invoices for each customer
all_invoices_all_customers = []
for customer in customer_list:
    invoices = get_customer_invoices(customer.id)
    invoices_dicts = models_to_dicts(invoices)
    # Save invoices to file
    with open(RAW_DATA_DIR / f"{customer.id}_invoices.json", "w") as f:
        json.dump(invoices_dicts, f)
    all_invoices_all_customers.extend(invoices_dicts)
# Save all invoices to single csv
all_invoices_df = pd.DataFrame(all_invoices_all_customers)
all_invoices_df.to_csv(customer_invoices_csvs, index=False)

Fetching invoices for customer: 004747b8-9124-4060-989a-8d1075af2424
Fetching invoices for customer: 0602ebf7-659e-470a-a536-9fbd413fb42b
Fetching invoices for customer: 12184764-5687-4690-8794-35efc5586e72
Fetching invoices for customer: 15b367c9-04b9-4064-9a58-b589928898fd
Fetching invoices for customer: 1715df37-9b9b-4829-b381-e7febaefb102
Fetching invoices for customer: 20ffd2e6-ff2e-4347-8045-9e744ef8a986
Fetching invoices for customer: 2209a058-dbe1-4e8c-8325-fb8daa1cc987
Fetching invoices for customer: 2294f7f7-19a6-44a2-b9f9-4b6b79134d12
Fetching invoices for customer: 2ac3705a-51a6-4149-8f6a-0113941e94e7
Fetching invoices for customer: 2ae68df2-533e-44bf-9b4d-ac766c7ac3da
Fetching invoices for customer: 334ad07b-7bc1-4e3c-8337-a344837e344f
Fetching invoices for customer: 37154c55-bd42-4b7e-a453-51dd005b35b7
Fetching invoices for customer: 3e233dbd-0280-4def-aaa9-011a3a4ba745
Fetching invoices for customer: 40430943-1fa7-48f4-86ba-0c27ad2386a4
Fetching invoices for customer: 5b

In [4]:
# Get credit balances for each customer
customer_ids = [customer.id for customer in customer_list]
credit_balances = get_credit_balances(customer_ids=customer_ids)
credit_balances_dicts = models_to_dicts(credit_balances)
print(len(credit_balances_dicts))
# Save credit balances to file
with open(RAW_DATA_DIR / "credit_balances.json", "w") as f:
    json.dump(credit_balances_dicts, f)

# Save to a single csv
credit_balances_df = pd.DataFrame(credit_balances_dicts)
credit_balances_df.to_csv(customer_credit_balances_csv, index=False)

Fetching balances for 20 customers
5


In [5]:
import duckdb
con = duckdb.connect('customer_data.db')

In [6]:
# Load customers into duckdb
con.execute(f"CREATE TABLE customers AS SELECT * FROM read_csv_auto('{customers_csv}')")

# Load invoices into duckdb
con.execute(f"CREATE TABLE invoices AS SELECT * FROM read_csv_auto('{customer_invoices_csvs}')")

# Load credit balances into duckdb
con.execute(f"CREATE TABLE credit_balances AS SELECT * FROM read_csv_auto('{customer_credit_balances_csv}')")


<duckdb.duckdb.DuckDBPyConnection at 0x128fc4970>

In [7]:
# Converts PGSQL key-value string to JSON 
def convert_kv_to_json(kv_str: str) -> str:
    # Replace all single quotes with double quotes
    kv_str = kv_str.replace("'", '"')
    return kv_str

# Register the function in DuckDB
#con.remove_function("convert_kv_to_json")
con.create_function("convert_kv_to_json", convert_kv_to_json)



<duckdb.duckdb.DuckDBPyConnection at 0x128fc4970>

# Notes
- I'm not 100% confident that I pulled the exact right fields to calculate invoice or credit balance totals. I had to do some Google searching to determine what the right approach to calculating a balance might be and it turned out, of course, that was precalculated as running balance. 
- Also it said "current invoice" -- I took that to mean the newest one, so used a rank function to pull that. 
- Unclear if I should have done the same for credit balance, rank based on some date value. 
- Finally, I only gave myself so much time. I am sure there are areas to be improved. 

In [22]:
con.execute("""
            
WITH ranked_invoices AS (
    SELECT customer_id,
           total,
           end_timestamp,
           ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY end_timestamp DESC) AS rn
    FROM invoices
    WHERE status = 'FINALIZED'
),
            
invoice_totals as (
    SELECT customer_id,
       count(*) as total_invoices,
       CONCAT('$', ROUND(SUM(total/100),2), ' USD') AS total_invoiced
       FROM ranked_invoices
       WHERE rn = 1
       GROUP BY customer_id),
 balance_unpack AS (
    SELECT
        id,
        name,
        customer_id,
        reason,
        JSON(convert_kv_to_json(balance)) AS balance_properties,
        JSON(convert_kv_to_json(deductions)) AS deductions_properties,
        JSON(convert_kv_to_json(grant_amount)) AS grant_amount_properties
    FROM credit_balances
    WHERE balance IS NOT NULL),
            
   balance_adjustments as (select
            id,
            name,
            customer_id,
            reason, 
            deductions_properties[0].amount::double as deductions_amount, 
            deductions_properties[0].running_balance::double as running_balance, 
            balance_properties.including_pending::double as including_pending,
            balance_properties.excluding_pending::double as excluding_pending,
            grant_amount_properties.amount::double as grant_amount
            FROM balance_unpack
            ),
    total_adjustments as (
        SELECT
            customer_id,
            CONCAT('$', ROUND(SUM(running_balance/100),2), ' USD') AS total_balance_credits
        FROM balance_adjustments
        GROUP BY 1
    )
            select c.name,
            i.total_invoiced as current_invoice_balance,
            t.total_balance_credits as credit_balance,
            from customers c
            LEFT JOIN invoice_totals i ON c.id = i.customer_id
            LEFT JOIN total_adjustments t ON c.id = t.customer_id
            ORDER BY c.name
            
            """).fetchdf()#.to_csv("./submissions/task_1_invoicing.csv", index=False)

Unnamed: 0,name,current_invoice_balance,credit_balance
0,AJLUAY Corp.,$1629.04 USD,$0.0 USD
1,BQYGSA Corp.,$508.97 USD,
2,C3 Company,,
3,DJLAPQ Ltd.,$529.12 USD,$826.3 USD
4,Example-Customer-5,,
5,FZR Ltd.,$487.92 USD,$0.0 USD
6,GFRSUWSM LLC,$506.03 USD,
7,HDCEOZSD Inc.,$532.07 USD,
8,HWU LLC,$11202.1 USD,
9,IITGBOGYI Corp.,$11089.78 USD,$0.0 USD


In [37]:
tables = con.execute("SHOW TABLES").fetchall()
print(tables)
# Expose schema for each table
for table in tables:
    table_name = table[0]
    print(f"Table: {table_name}")
    print(con.execute(f"DESCRIBE {table_name}").fetchdf())
    #print(con.execute(f"SELECT * FROM {table_name} LIMIT 5").fetchdf())
    print("\n")

[('credit_balances',), ('customers',), ('invoices',)]
Table: credit_balances
           column_name column_type null   key default extra
0                   id     VARCHAR  YES  None    None  None
1                 name     VARCHAR  YES  None    None  None
2          customer_id     VARCHAR  YES  None    None  None
3       uniqueness_key     VARCHAR  YES  None    None  None
4               reason     VARCHAR  YES  None    None  None
5         effective_at   TIMESTAMP  YES  None    None  None
6           expires_at   TIMESTAMP  YES  None    None  None
7             priority      DOUBLE  YES  None    None  None
8         grant_amount     VARCHAR  YES  None    None  None
9          paid_amount     VARCHAR  YES  None    None  None
10             balance     VARCHAR  YES  None    None  None
11          deductions     VARCHAR  YES  None    None  None
12  pending_deductions     VARCHAR  YES  None    None  None
13       custom_fields     VARCHAR  YES  None    None  None
14   credit_grant_type 