Generate exception reports when there are data inconsistencies,such as :
1. Missing pricing models for a transaction
2. Zero or negative transaction amounts
3. Duplicated transactions within a short time period
4. Large transaction amounts outside the normal range

In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [None]:
engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{database}')

In [16]:
transactions = pd.read_sql('SELECT * FROM sales_transaction', engine)
pricing_model = pd.read_sql('SELECT * FROM pricing_model', engine)

In [75]:
#1.Find any transaction that does not have a pricing model

missing_pricing_model = transactions[~transactions['user_name'].isin(pricing_model['user_name'])]
missing_pricing_model = missing_pricing_model['user_name'].unique()

In [17]:
#2. zero or negative transaction amounts
error_transaction = transactions[transactions['amount']<=0]
error_transaction

Unnamed: 0,trans_dt_time,cc_num,user_name,category,amount,first_name,last_name,gender,street,city,...,long,city_popn,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,merch_zipcode


In [24]:
# 3. duplicated transactions within the day
transactions['dup_date'] = transactions['trans_dt_time'].dt.date
transactions['dup_date'] = pd.to_datetime(transactions['dup_date'])
dup_trans =transactions[transactions.duplicated(subset=['amount','dup_date','cc_num','user_name'],keep=False)]
dup_trans

Unnamed: 0,trans_dt_time,cc_num,user_name,category,amount,first_name,last_name,gender,street,city,...,city_popn,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,merch_zipcode,dup_date
907325,2019-12-28 16:05:07,4170689000000000.0,Schmidt and Sons,shopping_net,1.86,Samuel,Frey,M,830 Myers Plaza Apt. 384,Edmond,...,116001.0,Media buyer,1993-05-10,21c8da1164c1c630ffb7773db7092e43,1356711000.0,36.286931,-96.96986,0,73061.0,2019-12-28
907358,2019-12-28 16:12:06,4170689000000000.0,Schmidt and Sons,shopping_net,1.86,Samuel,Frey,M,830 Myers Plaza Apt. 384,Edmond,...,116001.0,Media buyer,1993-05-10,b81aa8661547a4eacaa40c4d53620244,1356711000.0,36.463805,-97.609183,0,73738.0,2019-12-28


In [66]:
# 4. Large transaction amount outside the normal range
def large_trans_cat (row):
    
    if row['category'] == 'travel':
        return row['amount'] >= 10000
    else:
        return row['amount']>= 5000

transactions['large_trans'] = transactions.apply (large_trans_cat,axis=1)
large_trans = transactions[transactions['large_trans']]

In [65]:
large_trans

Unnamed: 0,trans_dt_time,cc_num,user_name,category,amount,first_name,last_name,gender,street,city,...,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,merch_zipcode,dup_date,large_trans
12085,2019-01-07 23:50:36,3567698000000000.0,Pouros-Haag,shopping_pos,5444.24,John,Stevens,M,428 Morgan River,Hudson,...,Travel agency manager,1998-07-29,e80105badbd35c0f583e5f2910ea951a,1325980000.0,41.588036,-73.799288,0,12533.0,2019-01-07,True
17744,2019-01-11 16:37:15,343472700000000.0,"Schroeder, Wolff and Hermiston",travel,11872.21,Gloria,Wallace,F,234 Bridges Wells Apt. 389,Center Tuftonboro,...,"Optician, dispensing",1971-08-05,4f3ddd38d97af5a9808705a6bd742960,1326300000.0,43.235621,-71.391741,0,3258.0,2019-01-11,True
37315,2019-01-22 15:24:15,2264938000000000.0,"Ritchie, Bradtke and Stiedemann",travel,10776.59,Juan,Sherman,M,5939 Garcia Forges Suite 297,San Antonio,...,Land,1995-10-17,16928015dca994adc051afe4b42b1c54,1327246000.0,29.772325,-98.593405,0,78015.0,2019-01-22,True
48934,2019-01-29 13:37:16,3598634000000000.0,"Watsica, Haag and Considine",shopping_pos,5027.6,David,Hughes,M,707 Butler Parkways Apt. 747,Omaha,...,"Surveyor, land/geomatics",1995-10-10,7a383f0b9524dfcb974e9935ec95d4d4,1327844000.0,41.667116,-95.44668,0,51565.0,2019-01-29,True
59564,2019-02-04 21:22:30,30235270000000.0,"Champlin, Rolfson and Connelly",travel,12788.07,Kenneth,Doyle,M,8614 Reed Glen,West Harrison,...,Lexicographer,1977-08-16,b544af3bb2b803347b8b71995d965acd,1328391000.0,40.269472,-73.482064,0,,2019-02-04,True
105732,2019-03-02 15:36:36,4633065000000000.0,"Champlin, Rolfson and Connelly",travel,11629.34,Jasmine,Wade,F,90662 Lewis Avenue,Providence,...,"Nurse, children's",1995-11-29,8a31df33975e8773d4c8cc34799c5624,1330703000.0,42.504489,-71.379189,0,1741.0,2019-03-02,True
124796,2019-03-10 16:33:32,4302481000000000.0,Kerluke-Abshire,shopping_net,6818.74,David,Rodriguez,M,821 Solis Points,Muskegon,...,Historic buildings inspector/conservation officer,1995-05-25,6e39875e0d0c507b7d620aeb7e670219,1331397000.0,43.30082,-86.432623,0,49445.0,2019-03-10,True
150564,2019-03-22 13:17:34,4450831000000000.0,"Larson, Quitzon and Spencer",travel,15034.18,Donna,Davis,F,6760 Donovan Lakes,Clayton,...,Occupational psychologist,1972-01-20,938cfcbde2c3a64c38b7855eb8288de0,1332422000.0,35.266488,-95.033506,0,74462.0,2019-03-22,True
159575,2019-03-25 17:38:55,372509300000000.0,Tillman LLC,travel,10469.09,Kristen,Hanson,F,26544 Andrea Glen,Goodrich,...,Learning disability nurse,1985-06-18,37d46917137e98e2299a80603c270008,1332697000.0,42.192907,-84.09466,0,48158.0,2019-03-25,True
164881,2019-03-28 16:53:47,630451500000.0,Ankunding-Carroll,travel,14630.68,Rachel,Daniels,F,561 Little Plain Apt. 738,Wetmore,...,Immunologist,1972-06-12,c3fd5aeb603b150ce5c05c7ee38ada97,1332954000.0,46.944452,-85.746696,0,,2019-03-28,True


In [81]:
#consolidating exceptions into one report
exception_list = []

#1. Missing pricing models for a transaction
exception_list.append("1. Missing pricing models for a transaction")
if len(missing_pricing_model)>0:
    exception_list.append("No exceptions")
else:
    exception_list.append(missing_pricing_model.to_csv(index=False))

#2. zero or negative transaction amounts
exception_list.append("2. Zero or negative transaction amounts")
if error_transaction.empty:
    exception_list.append("No exceptions")
else:
    exception_list.append(error_transaction.to_csv(index=False))

# 3. duplicated transactions within the day
exception_list.append(" 3. Duplicated transactions within the day")
if dup_trans.empty:
    exception_list.append("No exceptions")
else:
    exception_list.append(dup_trans.to_csv(index=False))

# 4. Large transaction amount outside the normal range

exception_list.append("  4. Large transaction amount outside the normal range")
if large_trans.empty:
    exception_list.append("No exceptions")
else:
    exception_list.append(large_trans.to_csv(index=False))

In [83]:
# writing the report to a csv file
with open('exception_report.csv', 'w') as f:
    for report in exception_list:
        f.write(report + "\n\n")