This notebook is intended to review average transaction value ranges in order to establish an upper and lower end of average transactions

In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd

# 0. load data

This data set is going to focus on transactions from 2022 forward

In [2]:
START_DATE = "2022-01-01"

In [5]:
q = """select
            t.id,
            t.date,
            t.amount,
            t.recurring,
            t.recurring_origin,
            f.type,
            t.source
        from transactions as t
            left join form as f on t.form=f.id
        where
            t.status='A' and
            t.date>={}""".format(START_DATE)
trans = redshift_query_read(q, schema='production')

In [8]:
print("{:,} transactions".format(len(trans)))
print("Dates: {} to {}".format(trans['date'].min(), trans['date'].max()))
print("{:,} one time transactions".format(len(trans[trans['recurring']==0])))
print("{:,} recurring transactions".format(len(trans[trans['recurring']!=0])))
print("{:,} recurring origin transactions".format(len(trans[trans['recurring_origin']==1])))

13,910,835 transactions
Dates: 2020-01-01 00:00:00 to 2024-10-16 00:00:00
7,936,446 one time transactions
5,974,389 recurring transactions
316,017 recurring origin transactions


# 1. one time transactions

In [11]:
onetime = trans[trans['recurring']==0]

In [12]:
print("All one time transactions:")
print("-"*40)
print("${:,.2f} mean; ${:,.2f} median".format(onetime['amount'].mean(), onetime['amount'].median()))

All one time transactions:
----------------------------------------
$174.18 mean; $50.00 median


In [21]:
print("Value ranges with sample proportion:")
print("-"*40)
ranges = [0, 5, 25, 100, 250, 500, 1000]
for i in range(len(ranges)):
    if i != len(ranges) - 1:
        floor = ranges[i]
        ceil = ranges[i+1]
        sample = len(onetime[(onetime['amount']>=floor)&(onetime['amount']<ceil)]) / len(onetime)
        print("${:,.0f}-${:,.0f}: {:.2f}%".format(floor, ceil, sample * 100.))
    else:
        sample = len(onetime[onetime['amount']>ranges[i]]) / len(onetime)
        print("${:,.0f}+: {:.2f}%".format(ranges[i], sample * 100.))

Value ranges with sample proportion:
----------------------------------------
$0-$5: 15.86%
$5-$25: 12.02%
$25-$100: 34.46%
$100-$250: 23.32%
$250-$500: 6.44%
$500-$1,000: 4.23%
$1,000+: 2.48%


# 2. recurring

In [27]:
rec = trans[trans['recurring']!=0]
rec_origin = rec[rec['recurring_origin']==1]

In [28]:
print("All recurring transactions:")
print("-"*40)
print("${:,.2f} mean; ${:,.2f} median".format(rec['amount'].mean(), rec['amount'].median()))

print()

print("Originating recurring transactions:")
print("-"*40)
print("${:,.2f} mean; ${:,.2f} median".format(rec_origin['amount'].mean(), rec_origin['amount'].median()))

All recurring transactions:
----------------------------------------
$58.26 mean; $25.00 median

Originating recurring transactions:
----------------------------------------
$103.27 mean; $25.99 median


In [29]:
print("Value ranges with sample proportion:")
print("-"*40)
ranges = [0, 5, 25, 100, 250, 500, 1000]
for i in range(len(ranges)):
    if i != len(ranges) - 1:
        floor = ranges[i]
        ceil = ranges[i+1]
        sample = len(rec_origin[(rec_origin['amount']>=floor)&(rec_origin['amount']<ceil)]) / len(rec_origin)
        print("${:,.0f}-${:,.0f}: {:.2f}%".format(floor, ceil, sample * 100.))
    else:
        sample = len(rec_origin[rec_origin['amount']>ranges[i]]) / len(onetime)
        print("${:,.0f}+: {:.2f}%".format(ranges[i], sample * 100.))

Value ranges with sample proportion:
----------------------------------------
$0-$5: 1.39%
$5-$25: 35.26%
$25-$100: 42.70%
$100-$250: 13.84%
$250-$500: 3.19%
$500-$1,000: 2.08%
$1,000+: 0.05%
