In [57]:
import sys, datetime
sys.path.append("../../scripts/")
from s3_support import *

import pandas as pd
import numpy as np

In [67]:
q = '''select
            amount, 
            donations_amt,
            recurring,
            hour,
            zip,
            state,
            year,
            useragent
        from transactions
        where
            status='A' and
            source in ('don_form', 'mobile', 'sms') and
            donations_amt>0'''
trans = redshift_query_read(q, schema='production')

In [68]:
trans['is_recurring'] = trans['recurring']!=0
rec = trans[trans['is_recurring']].groupby('recurring').first().reset_index()
trans = trans[~trans['is_recurring']].append(rec)

In [69]:
trans['is_recurring'].value_counts(normalize=True)

False    0.937777
True     0.062223
Name: is_recurring, dtype: float64

In [70]:
year_rec_trans = trans.groupby(['year', 'is_recurring'])['amount'].count().reset_index()
year_rec_trans.pivot(index='year', columns='is_recurring', values='amount')

is_recurring,False,True
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,241023,9864
2016,252038,13141
2017,312220,18171
2018,374211,27097
2019,441223,31610
2020,824714,47986
2021,741194,53198
2022,318351,31493


## state cleanup

In [46]:
us_states = [
    'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 
    'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 
    'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 
    'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 
    'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 
    'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'
]

In [47]:
def state_fix(s):
    state_truncs = {'geor': 'GA', 'ohio': 'OH', 'cali': 'CA', 'fla': 'FL',
                    'flor': 'FL', 'hawa': 'HI', 'virg': 'VA', 'texa': 'TX',
                    'newy': 'NY', 'mich': 'MI', 'arka': 'AR', 'mass': 'MA',
                    'okla': 'OK', 'idah': 'ID', 'loui': 'LA', 'illi': 'IL',
                    'conn': 'CT', 'alas': 'AK', 'colo': 'CO', 'iowa': 'IA',
                    'kans': 'KS', 'wyom': 'WY', 'wisc': 'WI', 'wash': 'WA',
                    'verm': 'VT', 'utah': 'UT', 'tenn': 'TN', 'rhod': 'RI',
                    'oreg': 'OR', 'neva': 'NV', 'nebr': 'NE', 'indi': 'IN',
                    'mont': 'MT', 'alab': 'AL', 'miss': 'MS', 'penn': 'PA',
                    'dela': 'DE'}
    if str(s).lower() in state_truncs.keys():
        return state_truncs[str(s).lower()]
    
    return str(s).upper()

trans['state'] = trans['state'].apply(state_fix)

In [48]:
print("{:,} transactions not US states".format(len(trans[~trans['state'].isin(us_states)])))
print("{:,} tansactions in US states".format(len(trans[trans['state'].isin(us_states)])))

521,507 transactions not US states
5,119,331 tansactions in US states


In [49]:
trans = trans[trans['state'].isin(us_states)]

In [50]:
state_avgs = trans.groupby(['state', 'hour'])[['amount', 'donations_amt']].agg(['mean', 'median']).reset_index()

In [51]:
state_avgs[state_avgs['state']=='FL']

Unnamed: 0_level_0,state,hour,amount,amount,donations_amt,donations_amt
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,median,mean,median
216,FL,0,144.390032,50.0,99.856251,10.0
217,FL,1,142.917715,50.0,104.342415,20.0
218,FL,2,144.599584,50.0,116.00849,25.0
219,FL,3,129.611813,50.0,98.497396,25.0
220,FL,4,137.773137,70.0,104.634185,30.0
221,FL,5,167.513918,84.0,130.604778,50.0
222,FL,6,167.761657,100.0,119.901391,50.0
223,FL,7,163.18431,75.0,120.057464,30.0
224,FL,8,178.427249,75.0,117.464968,20.0
225,FL,9,175.180812,70.0,110.752511,5.0


## useragent cleanup

In [55]:
def tag_platform(u):
    u = str(u)
    if 'iPhone' in u:
        return 'iPhone'
    elif 'iPad' in u:
        return 'iPad'
    elif 'Android' in u:
        return 'Android'
    elif 'IntelMacOSX' in u:
        return 'Mac'
    elif 'Windows' in u:
        return 'Windows'
    else:
        return u

trans['platform'] = trans['useragent'].apply(tag_platform)

In [66]:
tagged_platforms = ['iPhone', 'iPad', 'Android', 'Mac', 'Windows']
print("{:,} tagged transactions".format(len(trans[trans['platform'].isin(tagged_platforms)])))
print("{:,} non-tagged transactions".format(len(trans[~trans['platform'].isin(tagged_platforms)])))

4,818,231 tagged transactions
301,100 non-tagged transactions


In [67]:
trans = trans[trans['platform'].isin(tagged_platforms)]

In [None]:
trans.groupby('platform')['amount'].agg(['mean', 'median']).reset_index()

Unnamed: 0,platform,mean,median
0,Android,106.275805,50.0
1,Mac,240.726001,80.0
2,Windows,204.190843,52.5
3,iPad,181.552374,54.6
4,iPhone,138.456217,50.0


## sample size check

- reduce zip code samples to those with > 100 observations
- when a given zip code/platform pairing has fewer than 100 observations, fail over to zip code median
- when a given zip code has fewer than 100 observations for all platforms, fail over to state/platform medians
- when a given state/platform has fewer than 100 observations, fail over to state median for all platforms

In [None]:
zip_agg = trans.groupby(['zip', 'platform'])['amount'].count().reset_index()
representative_zips = zip_agg[zip_agg['amount']>100]['zip'].tolist()
"all zip codes: {:,}".format(len(zip_agg)), "zip codes with > 100 samples: {:,}".format(len(representative_zips))

('all zip codes: 216,769', 'zip codes with > 100 samples: 10,890')

### building zip table

In [None]:
zip_agg = trans[trans['zip'].isin(representative_zips)].groupby(['zip', 'platform'])['amount'].agg(['count', 'median']).reset_index()
zip_pvt = zip_agg.pivot(index='zip', columns='platform', values=['count', 'median']).reset_index()
zip_pvt.columns = ['zip', 'count_Android', 'count_Mac', 'count_Windows', 'count_iPad', 
                    'count_iPhone', 'median_Android', 'median_Mac', 'median_Windows', 
                    'median_iPad', 'median_iPhone']
zip_pvt['count_all'] = zip_pvt[[c for c in zip_pvt.columns if 'count_' in c]].sum(axis=1)
zip_pvt = zip_pvt.merge(trans[trans['zip'].isin(representative_zips)].groupby('zip')['amount'].median().reset_index(), on='zip')
zip_pvt['median_all'] = zip_pvt['amount']
zip_pvt.drop('amount', axis=1, inplace=True)
zip_pvt.head()

Unnamed: 0,zip,count_Android,count_Mac,count_Windows,count_iPad,count_iPhone,median_Android,median_Mac,median_Windows,median_iPad,median_iPhone,count_all,median_all
0,1002,16.0,190.0,172.0,6.0,31.0,48.5,50.0,50.0,51.0,36.4,415.0,50.0
1,1020,66.0,15.0,119.0,9.0,25.0,10.0,75.0,25.0,35.0,20.6,234.0,25.0
2,1027,12.0,67.0,135.0,9.0,17.0,45.84,50.0,35.0,50.0,26.25,240.0,40.0
3,1028,22.0,32.0,142.0,13.0,38.0,57.875,50.0,100.0,102.75,50.0,247.0,63.0
4,1040,17.0,58.0,172.0,9.0,29.0,10.0,50.0,100.0,100.0,20.14,285.0,70.0


### building state table

In [None]:
state_agg = trans.groupby(['state', 'platform'])['amount'].agg(['count', 'median']).reset_index()
state_pvt = state_agg.pivot(index='state', columns='platform', values=['count', 'median']).reset_index()
state_pvt.columns = ['state', 'count_Android', 'count_Mac', 'count_Windows',
                    'count_iPad', 'count_iPhone', 'median_Android', 'median_Mac',
                     'median_Windows', 'median_iPad', 'median_iPhone']
state_pvt['count_all'] = state_pvt[[c for c in state_pvt.columns if 'count_' in c]].sum(axis=1)
state_pvt = state_pvt.merge(trans.groupby('state')['amount'].median().reset_index(), on='state')
state_pvt['median_all'] = state_pvt['amount']
state_pvt.drop('amount', axis=1, inplace=True)
state_pvt.head()

Unnamed: 0,state,count_Android,count_Mac,count_Windows,count_iPad,count_iPhone,median_Android,median_Mac,median_Windows,median_iPad,median_iPhone,count_all,median_all
0,AK,573.0,1313.0,4013.0,299.0,1298.0,25.0,52.07,35.0,50.0,50.0,7496.0,50.0
1,AL,6366.0,8932.0,35957.0,2456.0,12584.0,42.4,100.0,52.5,53.72,51.25,66295.0,52.0
2,AR,2065.0,2344.0,9489.0,667.0,3729.0,50.0,100.0,95.0,95.0,52.0,18294.0,75.0
3,AZ,10693.0,28222.0,76819.0,4501.0,20870.0,33.0,100.0,65.0,70.0,38.0,141105.0,55.0
4,CA,33347.0,141345.0,252398.0,16454.0,75895.0,50.0,100.0,65.0,70.0,50.0,519439.0,61.8


# scratch

In [None]:
import itertools, math

In [None]:
coins = [25, 10, 1]
combo_sizes = (range(1, len(coins) + 1))
coin_permutations = ((s for s in itertools.combinations(coins, combo_size)) for combo_size in combo_sizes)

least_num_coins = None

for coin_perm in coin_permutations:
    for p in coin_perm:
        coin_count = 0
        cents = 32
        for coin in p:
            coin_count += int(cents / coin)
            cents = cents % coin
        
        if not cents:
            print("Set: {} yields {} coins".format(p, coin_count))
            
            if not least_num_coins or coin_count < least_num_coins[1]:
                least_num_coins = (p, coin_count)
                
print("\nresolution:")
print("Set: {} yields {} coins".format(least_num_coins[0], least_num_coins[1]))

Set: (1,) yields 32 coins
Set: (25, 1) yields 8 coins
Set: (10, 1) yields 5 coins
Set: (25, 10, 1) yields 8 coins

resolution:
Set: (10, 1) yields 5 coins


In [47]:
nums = [1,3,0,4]
#nums = [3,2,1,0,4]

r = farthest = 0
while r <= farthest:
    if farthest >= len(nums) - 1:
        print("True")
        break
    farthest = max(farthest, r + nums[r])
    r += 1
    print("incrementing; r: {}, farthest: {}".format(r, farthest))
    
print("False")

incrementing; r: 1, farthest: 1
incrementing; r: 2, farthest: 4
True
False


In [54]:
from collections import namedtuple

Marks = namedtuple('Marks', 'Physics Chemistry Math CS average')
average = lambda x: sum(x) / len(x)
marks = Marks(90, 85, 95, 100, average)
print(marks)
marks.average()


Marks(Physics=90, Chemistry=85, Math=95, CS=100, average=<function <lambda> at 0x7ffbf013d050>)


TypeError: <lambda>() missing 1 required positional argument: 'self'