# objective

determine if number of items in a given auction influences the amount of bidding. could this skew bidding attention to weaken what would otherwise be successful auction items?

In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *
%matplotlib inline

# load data

In [2]:
# product, # of bids, bid increment, value, start $, end $
q = "select * from auctionitem"
df_ai = redshift_query_read(q, schema='production')

In [3]:
q = '''select
            ta.form,
            ta.product,
            ta.total as price,
            ta.status as status,
            count(distinct(b.ticketholder)) as bidders,
            count(b.ticketholder) as bids,
            count(distinct(ta.id)) as transactions,
            count(distinct(ta.bidder)) as winners
        from bidders as b
            left join transauction as ta on b.product=ta.product
        group by ta.product, ta.total, ta.status, ta.form'''
df_bids = redshift_query_read(q, schema='production')

In [4]:
df = df_ai.drop('id', axis=1).merge(df_bids, on='product')

In [7]:
df = df[(df['value']>0)]

df['price_ratio'] = df['price'] / df['value']
df['bidincrement_ratio'] = df['bidincrement'] / df['value']
df['outperformer'] = df['price'] > df['value']
df['underperformer'] = df['price'] <= df['value']

# analysis

In [19]:
forms = df.groupby('form')['outperformer'].agg(['count', 'median']).reset_index()
forms.columns = ['form', 'items', 'outperformer']

In [20]:
forms.groupby('outperformer')['items'].agg(['count', 'mean', 'median']).reset_index()

Unnamed: 0,outperformer,count,mean,median
0,0.0,433,38.769053,29.0
1,0.5,4,28.0,25.0
2,1.0,31,35.645161,22.0


The sample sizes are rather small here but the data is not so differentiated as to lead me to believe that this will be going somewhere important.  

In [21]:
forms = df.groupby('form')['outperformer'].agg(['count', 'mean']).reset_index()
forms.columns = ['form', 'items', 'outperformer']

In [42]:
forms_op = forms.groupby('outperformer')['items'].agg(['count', 'mean', 'median']).reset_index()

op_ranges = [(0.0, 0.1), (0.1, 0.2), (0.2, 0.3),
            (0.3, 0.4), (0.4, 0.5), (0.5, 0.6),
            (0.6, 0.7), (0.7, 0.8), (0.8, 0.9),
            (0.9, 1.0)]
for r in op_ranges:
    if r[1] == 1.0:
        _df = forms_op[(forms_op['outperformer']>=r[0])&(forms_op['outperformer']<=r[1])]
    else:
        _df = forms_op[(forms_op['outperformer']>=r[0])&(forms_op['outperformer']<r[1])]
    r_count = _df['count'].sum()
    r_mean = _df['mean'].mean()
    r_median = _df['median'].mean()

    print("{}: samples: {}, items: mean {:.2f}, median {:.2f}".format(r, r_count, r_mean, r_median))

(0.0, 0.1): samples: 204, items: mean 50.42, median 50.16
(0.1, 0.2): samples: 108, items: mean 63.24, median 62.67
(0.2, 0.3): samples: 66, items: mean 61.22, median 61.01
(0.3, 0.4): samples: 37, items: mean 58.36, median 57.85
(0.4, 0.5): samples: 18, items: mean 62.50, median 62.14
(0.5, 0.6): samples: 12, items: mean 50.56, median 50.19
(0.6, 0.7): samples: 9, items: mean 57.75, median 57.58
(0.7, 0.8): samples: 2, items: mean 33.00, median 33.00
(0.8, 0.9): samples: 3, items: mean 63.33, median 63.33
(0.9, 1.0): samples: 9, items: mean 21.19, median 19.50
