# _

Meant to be used just once to load prior billables into the database.

That table is used to identify duplicate leads in the new data to prevent clients from being double billed.

In [51]:
from db_engines import wh_db as DB

import pandas as pd
from pandas import Series as Ser, DataFrame as Df

from pathlib import Path
from os import environ as os_environ
from dotenv import load_dotenv

from sqlalchemy.dialects.postgresql import\
    DATE, TIMESTAMP, VARCHAR, INTEGER, BIGINT
load_dotenv()


True

In [52]:
# CONSTANTS / CONFIG
SRC_PTH = Path(os_environ['PRMDIA_MM_PRIOR_BILLABLES_SRCPTH'])

TZ_REPORTS_LOCAL = 'US/Central'

ASTYPE = {
    'callerid': 'Int64',
    'delivery_code': 'string',
    'practice': 'string',
    'caller_name': 'string',
    'how_sent': 'string',
    'toll': 'Int64',
    'call_date': 'datetime64[ns]',
    'lead_delivery_date': 'datetime64[ns]',
    'processed': f'datetime64[ns, {TZ_REPORTS_LOCAL}]',
    'af_acct': 'Int32',
    'af_prac_id': 'Int32',
}

DTYPE = {
    'callerid': BIGINT,
    'delivery_code': VARCHAR,
    'practice': VARCHAR,
    'caller_name': VARCHAR,
    'how_sent': VARCHAR,
    'toll': BIGINT,
    'call_date': DATE,
    'lead_delivery_date': DATE,
    'processed': TIMESTAMP(timezone=True),
    'af_acct': INTEGER,
    'af_prac_id': INTEGER,
}

DATE_COLS = [
    'call_date',
    'lead_delivery_date',
    'processed',
]

DT_TO_LOC = [
    'processed',
]

TBLNM = 'prior_billables'

DELIV_FILLNA = ('lead_delivery_date', 'call_date')

In [53]:
# EXTRACT
df = pd.read_csv(SRC_PTH).convert_dtypes()


In [54]:
# CLEAN UP DATES/TS
for c in DATE_COLS:
    df[c] = pd.to_datetime(df[c])

for c in DT_TO_LOC:
    df[c] = df[c].dt.tz_localize(TZ_REPORTS_LOCAL)


In [55]:
# FILL IN LEAD DELIVERY DATE
df[DELIV_FILLNA[0]] = df[DELIV_FILLNA[0]].fillna(df[DELIV_FILLNA[1]])


In [56]:
# FINALIZE DTYPES IN DF
df = df.astype(ASTYPE)


In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9813 entries, 0 to 9812
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype                     
---  ------              --------------  -----                     
 0   callerid            9813 non-null   Int64                     
 1   delivery_code       9801 non-null   string                    
 2   practice            9809 non-null   string                    
 3   caller_name         9813 non-null   string                    
 4   how_sent            3664 non-null   string                    
 5   toll                9436 non-null   Int64                     
 6   call_date           9813 non-null   datetime64[ns]            
 7   lead_delivery_date  9813 non-null   datetime64[ns]            
 8   processed           8628 non-null   datetime64[ns, US/Central]
 9   af_acct             9813 non-null   Int32                     
 10  af_prac_id          9565 non-null   Int32                     
dtypes: I

In [58]:
# LOAD
with DB.connect() as conn:
    df.to_sql(
        name=TBLNM,
        con=conn,
        index=False,
        if_exists='replace',
        dtype=DTYPE
    )