7. do bidders tend to focus on individual items or spread their bids around to various items?
    1. we want to look for evidence to determine if bidders are spreading funds throughout an auction or are they genuinely trying to win specific items
    2. do bidders generally only bid on a single item?
    3. if bidders bid on multiple items, do they tend to bid the same amount?
    4. if bidders bid on multiple items, will bidders increase bids markedly when they have lost on another item?
    5. do bids per item increase with the decrease in items available to bid on?

In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

In [2]:
q = "select * from bidders"
bids = redshift_query_read(q, schema='production')

In [3]:
print("{:,} entries".format(len(bids)))
print("{:%Y-%m-%d} to {:%Y-%m-%d}".format(bids['date_created'], bids['date_created']))
bids.head(3)

0 entries


KeyError: 'date_created'

In [4]:
print("{:,} bids entries".format(len(bids)))
print("{} to {}".format(bids['date_created'].min(), bids['date_created'].max()))
print("{:,} products, {:,} bidders".format(len(bids['product'].unique()), len(bids['ticketholder'].unique())))

print()
print("Removing NaN ticketholders")
print("{:,} bids entries".format(len(bids[~bids['ticketholder'].isna()])))
print("{} to {}".format(bids[~bids['ticketholder'].isna()]['date_created'].min(), bids[~bids['ticketholder'].isna()]['date_created'].max()))
print("{:,} products, {:,} bidders".format(len(bids[~bids['ticketholder'].isna()]['product'].unique()), len(bids[~bids['ticketholder'].isna()]['ticketholder'].unique())))

print()
print("Removing 0 ticketholder")
print("{:,} bids entries".format(len(bids[bids['ticketholder']!=0])))
print("{} to {}".format(bids[bids['ticketholder']!=0]['date_created'].min(), bids[~bids['ticketholder'].isna()]['date_created'].max()))
print("{:,} products, {:,} bidders".format(len(bids[bids['ticketholder']!=0]['product'].unique()), len(bids[bids['ticketholder']!=0]['ticketholder'].unique())))

1,405,982 bids entries
2019-07-24 11:51:35 to 2021-09-23 20:16:33
22,159 products, 67,140 bidders

Removing NaN ticketholders
1,405,982 bids entries
2019-07-24 11:51:35 to 2021-09-23 20:16:33
22,159 products, 67,140 bidders

Removing 0 ticketholder
1,236,969 bids entries
2019-07-24 11:51:35 to 2021-09-23 20:16:33
637 products, 67,139 bidders


In [17]:
print("{:.2f} mean bids per bidder".format(bids.groupby('ticketholder')['date_created'].nunique().mean()))
print("{:.2f} median bids per bidder".format(bids.groupby('ticketholder')['date_created'].nunique().median()))
print("Ex anonymous bidders:")
print("{:.2f} mean bids per bidder".format(bids[bids['ticketholder']!=0].groupby('ticketholder')['date_created'].nunique().mean()))
print("{:.2f} median bids per bidder".format(bids[bids['ticketholder']!=0].groupby('ticketholder')['date_created'].nunique().median()))
print()
print("{:.2f} mean bids per bidder per product".format(bids.groupby(['ticketholder', 'product'])['date_created'].nunique().mean()))
print("{:.2f} median bids per bidder per product".format(bids.groupby(['ticketholder', 'product'])['date_created'].nunique().median()))
print()
print("{:.2f} mean bids per product".format(bids.groupby('product')['date_created'].count().mean()))
print("{:.2f} median bids per product".format(bids.groupby('product')['date_created'].count().median()))
print()
print("{:.2f} mean products per bidder".format(bids.groupby('ticketholder')['product'].nunique().mean()))
print("{:.2f} median products per bidder".format(bids.groupby('ticketholder')['product'].nunique().median()))
print()
print("{:.2f} mean bidders per product".format(bids.groupby('product')['ticketholder'].nunique().mean()))
print("{:.2f} median bidders per product".format(bids.groupby('product')['ticketholder'].nunique().median()))

20.82 mean bids per bidder
7.00 median bids per bidder
Ex anonymous bidders:
18.42 mean bids per bidder
7.00 median bids per bidder

4.30 mean bids per bidder per product
2.00 median bids per bidder per product

63.45 mean bids per product
5.00 median bids per product

4.86 mean products per bidder
4.00 median products per bidder

14.73 mean bidders per product
1.00 median bidders per product


Prior run:
- 9.18 mean bids per bidder
- 6.00 median bids per bidder

In [6]:
bids_per_bidder = bids.groupby(['ticketholder'])['date_created'].nunique().reset_index()
bids_per_bidder.columns = ['ticketholder', 'bids']

bids_per_bidder['bucket'] = pd.cut(bids_per_bidder['bids'], bins=[1, 2, 3, 5, 10, 15])
bids_per_bidder['bucket'].value_counts(normalize=True)

(5, 10]     0.323130
(3, 5]      0.277421
(10, 15]    0.165032
(2, 3]      0.120135
(1, 2]      0.114283
Name: bucket, dtype: float64

In [7]:
len(bids_per_bidder[bids_per_bidder['bids']>=15]) / len(bids_per_bidder)

0.2758266309204647

In [8]:
bids_per_bidder = bids.groupby(['ticketholder', 'product'])['date_created'].nunique().reset_index()
bids_per_bidder.columns = ['ticketholder', 'product', 'bids']

bids_per_bidder['bucket'] = pd.cut(bids_per_bidder['bids'], bins=[1, 2, 3, 5, 10, 15])
bids_per_bidder['bucket'].value_counts(normalize=True)

(1, 2]      0.454035
(2, 3]      0.174957
(5, 10]     0.170543
(3, 5]      0.145936
(10, 15]    0.054528
Name: bucket, dtype: float64

In [9]:
len(bids_per_bidder[bids_per_bidder['bids']>=15]) / len(bids_per_bidder)

0.035023795587780134

In [10]:
print(bids.groupby('ticketholder')['amount'].agg(['count', 'nunique']).mean())
print(bids.groupby('ticketholder')['amount'].agg(['count', 'nunique']).median())

count      20.941049
nunique    17.326884
dtype: float64
count      7.0
nunique    7.0
dtype: float64


In [11]:
# concurrent product bidding
bidder_concurrency = bids.groupby(['ticketholder', 'product'])['date_created'].agg(['min', 'max', 'nunique']).reset_index()
bidder_concurrency = bidder_concurrency[bidder_concurrency['ticketholder']!=0]
bidder_concurrency.columns = ['ticketholder', 'product', 'first', 'last', 'count']

bidder_product_count = bidder_concurrency.groupby('ticketholder')['product'].count().reset_index()
single_product_bidders = bidder_product_count[bidder_product_count['product']==1]['ticketholder'].tolist()

bidder_concurrency = bidder_concurrency[~bidder_concurrency['ticketholder'].isin(single_product_bidders)]
bidder_concurrency.head(3)

Unnamed: 0,ticketholder,product,first,last,count
21763,2774840,84929,2019-07-24 17:33:17,2019-07-24 17:33:17,1
21764,2774840,84930,2019-07-24 17:16:25,2019-07-24 17:33:14,2
21765,2774841,84870,2019-07-31 09:43:41,2019-07-31 10:58:06,2


In [12]:
bidder_overlap_data = []
for t in bidder_concurrency['ticketholder'].unique():
    these_bids = bidder_concurrency[bidder_concurrency['ticketholder']==t].sort_values('first', ascending=True)
    
    bidder_overlap_data.append((these_bids['last'] - these_bids['first'].shift()).mean())

In [13]:
pd.DataFrame(bidder_overlap_data).agg(['mean', 'median'])

Unnamed: 0,0
mean,91 days 10:39:59.152025973
median,6 days 20:41:55.714285714


In [14]:
bidders = bids.groupby('ticketholder')['date_created'].agg(['min', 'max', 'count']).reset_index()
bidders.columns = ['bidder', 'start', 'end', 'count']
bidders['range'] = bidders['end'] - bidders['start']

products = bids.groupby('ticketholder')['product'].nunique()

In [15]:
bidders.head(3)

Unnamed: 0,bidder,start,end,count,range
0,0,2019-10-05 20:17:10,2021-09-23 20:16:33,169013,718 days 23:59:23
1,2774840,2019-07-24 17:16:25,2019-07-24 17:33:17,3,0 days 00:16:52
2,2774841,2019-07-31 09:43:41,2019-07-31 11:04:12,4,0 days 01:20:31


In [16]:
print("{} mean bid range per bidder".format(bidders['range'].mean()))
print("{} median bid range per bidder".format(bidders['range'].median()))

185 days 04:29:39.308266310 mean bid range per bidder
0 days 15:48:25 median bid range per bidder
