# NameX Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import pandas as pd
import csv
from datetime import datetime, timedelta

%load_ext sql
%config SqlMagic.displaylimit = 5

This will create the connection to the database and prep the jupyter magic for SQL...

In [None]:
namex_db = 'postgresql://' + \
                os.getenv('PG_USER', '') + ":" + os.getenv('PG_PASSWORD', '') +'@' + \
                os.getenv('PG_HOST', '') + ':' + os.getenv('PG_PORT', '5432') + '/' + os.getenv('PG_DB_NAME', '');

%sql $namex_db


In [None]:
pay_db = 'postgresql://' + \
                os.getenv('PAY_USER', '') + ":" + os.getenv('PAY_PASSWORD', '') +'@' + \
                os.getenv('PAY_HOST', '') + ':' + os.getenv('PAY_PORT', '5432') + '/' + os.getenv('PAY_DB_NAME', '');

%sql $pay_db

Simplest query to run to ensure our libraries are loaded and our DB connection is working

In [None]:
%%sql $namex_db
select now() AT TIME ZONE 'PST' as current_date

Daily totals for specified date: Following query, 'current_date - 0' means today, 'current_date - 1' means yesterday, 'current_date - 2' means the day before yesterday...

Set the number of days we want the report to be run over.

In [None]:
number_of_days_nr=int(os.getenv('NUMBER_OF_DAYS_NR', '1')) 
report_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_nr), '%Y-%m-%d')

number_of_days_payment=int(os.getenv('NUMBER_OF_DAYS_PAYMENT', '1'))  
payments_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_payment), '%Y-%m-%d')

## get all duplicate names

In [None]:
%%sql $namex_db name_requests  <<
select distinct
r.id, r.nr_num, r.priority_cd as priority, r.state_cd as nr_state,r.submitted_date,r.source,r.previous_request_id as resubmit,
n.name,
a.first_name||' '||a.last_name as customer_name, a.phone_number, a.email_address
from requests r, names n, applicants a
where r.id = n.nr_id
and r.id = a.nr_id
and r.submitted_date::date >= :report_start_date
and r.state_cd <> 'PENDING_DELETION'
and r.nr_num not like 'NR L%'
and
n.choice=1
and
n.name in (

select 
n.name
from requests r, names n
where r.id = n.nr_id
and
r.submitted_date::date >= :report_start_date
-- and r.state_cd in ('DRAFT','HOLD','PENDING_PAYMENT','CANCELLED')
-- and r.state_cd in ('DRAFT','HOLD','PENDING_PAYMENT')
and r.state_cd not in ('PENDING_DELETION')
--and n.choice=1
group by n.name
having count(n.name) > 1
)
order by n.name
;

In [None]:
nr_frame = name_requests.DataFrame()
filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_namex_' + report_start_date +'.csv'

with open(filename, 'w') as f:
    if not nr_frame.empty:
        nr_frame.to_csv(f, sep=',', encoding='utf-8', index=False)

## get all payments

In [None]:
%%sql $pay_db paid  <<
SELECT i.business_identifier, 
       i.id                  invoice_id, 
       i.created_on,
       ir.invoice_number, 
       i.invoice_status_code invoice_status, 
       p.payment_status_code pay_status, 
       i.total, 
       i.paid, 
       r.receipt_number 
FROM   invoices i 
       LEFT OUTER JOIN invoice_references ir 
                    ON ir.invoice_id = i.id 
       LEFT OUTER JOIN payments p 
                    ON p.invoice_number = ir.invoice_number 
       LEFT OUTER JOIN receipts r 
                    ON r.invoice_id = i.id 
WHERE 
  created_on >=:payments_start_date
  and i.invoice_status_code = 'PAID'
  and i.business_identifier like 'NR%'
  and i.paid <> 101.5
ORDER  BY invoice_id ASC;

In [None]:
paid_frame = paid.DataFrame()
if not paid_frame.empty:    
    paid_frame['nr_num']=paid_frame['business_identifier']

payment_filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_payment_' + payments_start_date +'.csv'

with open(payment_filename, 'w') as f:
    if paid_frame.empty:
        writer = csv.writer(f)
        writer.writerow(('No Data Retrieved',''))        
    else:
        paid_frame.to_csv(f, sep=',', encoding='utf-8', index=False)    

## Merge the Duplicate Names with Payment information

In [None]:
if not nr_frame.empty and not paid_frame.empty:  
    result_frame = pd.merge(nr_frame, paid_frame, how='left', on=['nr_num'])
    result_frame=result_frame.drop(['id','business_identifier','created_on','invoice_number','total','receipt_number'], axis=1)      
elif not nr_frame.empty:
    result_frame = nr_frame.drop(['id'], axis=1)
else: 
    result_frame = pd.DataFrame([])

In [None]:
merged_filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_' + report_start_date +'.csv'

with open(merged_filename, 'w') as f:
    if result_frame.empty:
        writer = csv.writer(f)
        writer.writerow(('No Data Retrieved','')) 
    else: 
        result_frame.to_csv(f, sep=',', encoding='utf-8', index=False)