# NameX Daily Stats

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [None]:
import os
import sys
import requests
import logging
import pandas as pd
import csv
import json
import gspread
import sqlalchemy
from sqlalchemy import create_engine
from datetime import datetime, timedelta, tzinfo, timezone
from oauth2client.service_account import ServiceAccountCredentials
from df2gspread import df2gspread as d2g
module_path = os.path.join(os.getcwd(), "nr_duplicates_report")
if module_path not in sys.path:
    sys.path.insert(0, module_path)
from util.token import get_bearer_token
from config import Config

%load_ext sql
%config SqlMagic.displaylimit = 5
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

This will create the connection to the database and prep the jupyter magic for SQL...

In [None]:
engine = create_engine(Config.SQLALCHEMY_DATABASE_URI)

Daily totals for specified date: Following query, 'current_date - 0' means today, 'current_date - 1' means yesterday, 'current_date - 2' means the day before yesterday...

Set the number of days we want the report to be run over.

In [None]:
number_of_days_nr=int(Config.NUMBER_OF_DAYS_NR)
report_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_nr), '%Y-%m-%d')

number_of_days_payment=int(Config.NUMBER_OF_DAYS_PAYMENT)
payments_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_payment), '%Y-%m-%d')

## get all duplicate names

In [None]:
query = """
SELECT DISTINCT
r.id, r.nr_num, r.priority_cd AS priority, r.state_cd AS nr_state, 
r.submitted_date, r.source, r.previous_request_id AS resubmit,
n.name, 
a.first_name || ' ' || a.last_name AS customer_name, 
a.phone_number, a.email_address
FROM requests r
JOIN names n ON r.id = n.nr_id
JOIN applicants a ON r.id = a.nr_id
WHERE r.submitted_date::date >= :report_start_date
AND r.state_cd <> 'PENDING_DELETION'
AND r.nr_num NOT LIKE 'NR L%'
AND n.choice = 1
AND n.name IN (
    SELECT n.name
    FROM requests r, names n
    WHERE r.id = n.nr_id
    AND r.submitted_date::date >= :report_start_date
    AND r.state_cd NOT IN ('PENDING_DELETION')
    GROUP BY n.name
    HAVING COUNT(n.name) > 1
)
ORDER BY n.name;
"""

with engine.connect() as conn:
    result = conn.execute(sqlalchemy.text(query), {"report_start_date": report_start_date})
    name_requests = pd.DataFrame(result.fetchall(), columns=result.keys())

## get all payments

In [None]:
PAY_RELAY_URL = Config.OCP_RELAY_URL + '/payments/pay'
headers = {
    "Authorization": f"Bearer {get_bearer_token()}"
}
params = {"payments_start_date": payments_start_date}

response = requests.get(PAY_RELAY_URL, params=params, headers=headers)
if response.ok:
    invoices = response.json()
else:
    logging.exception("Error fetching invoices:", response.status_code, response.text)

## Merge the Duplicate Names with Postgres Payment information

In [None]:
nr_frame = name_requests

paid_frame = pd.DataFrame(invoices)
if invoices:
        paid_frame['nr_num']=paid_frame['business_identifier']

result_frame = nr_frame
if not nr_frame.empty and not paid_frame.empty:
        result_frame = pd.merge(nr_frame, paid_frame, how='left', on=['nr_num'])        
        result_frame=result_frame.drop(['id','business_identifier','created_on','invoice_number','total','receipt_number'], axis=1)

## Get all legacy payments from GlobalP

In [None]:
COLIN_PAY_RELAY_URL = Config.OCP_RELAY_URL + '/payments/colin'
headers = {
    "Authorization": f"Bearer {get_bearer_token()}"
}
params = {"payments_start_date": payments_start_date}
response = requests.get(COLIN_PAY_RELAY_URL, params=params, headers=headers)

if response.ok:
    invoices = response.json()
    global_payment_frame = pd.DataFrame(invoices)
else:
    print("Error fetching invoices:", response.status_code, response.text)

## Merge the Duplicate Names with Global Payment information

In [None]:
global_payment_frame.columns= global_payment_frame.columns.astype(str).str.lower()

if not result_frame.empty and not global_payment_frame.empty:
        result_frame = pd.merge(result_frame, global_payment_frame, how='left', on=['nr_num'])

result_filename = os.path.join(os.getcwd(), r'nr_duplicates_report/data/')+'nr_duplicates_' + payments_start_date + '.csv'
with open(result_filename, 'w') as f:
        if result_frame.empty:
                f.write('none')
        else:
                result_frame.to_csv(f, sep=',', encoding='utf-8', index=False)    

## Upload the data to google storage.

In [None]:
if Config.ENVIRONMENT == 'prod':
    if not result_frame.empty:
        # Config data dictionary
        dictionary = {   
            "type": os.getenv('TYPE', ''),
            "project_id": os.getenv('PROJECT_ID', ''),
            "private_key_id": os.getenv('PRIVATE_KEY_ID', ''),
            "private_key": os.getenv('PRIVATE_KEY', ''),
            "client_email": os.getenv('CLIENT_EMAIL', ''),
            "client_id": os.getenv('CLIENT_ID', ''),
            "auth_uri": os.getenv('AUTH_URI', ''),
            "token_uri": os.getenv('TOKEN_URI', ''),
            "auth_provider_x509_cert_url": os.getenv('AUTH_PROVIDER_X509_cert_URL', ''),
            "client_x509_cert_url": os.getenv('CLIENT_X509_CERT_URL', '')
        }

        with open(os.path.join(os.getcwd(), r'nr_duplicates_report/data/')+"service_key.json", "w") as outfile:
            json.dump(dictionary, outfile)

        scope = ['https://www.googleapis.com/auth/spreadsheets']
        json_file = os.path.join(os.getcwd(), r'data/')+"service_key.json"
        credentials = ServiceAccountCredentials.from_json_keyfile_name(json_file, scope)
        gc = gspread.authorize(credentials)

        wks_name = 'Day - ' + datetime.strftime(datetime.now()-timedelta(number_of_days_payment), '%d')
        spreadsheet_key = '1KFo3oUyzXo9A1aAOSy8cjR5ArxVT2Uvgdbe8NEZNLJU'
        sheet = d2g.upload(result_frame, spreadsheet_key, wks_name, credentials=credentials, col_names=True, row_names=False) 
else:
    logging.info('Skipping upload to Google Storage')