In [123]:
import pandas as pd

import sys, os, requests, json, datetime
sys.path.insert(1, '../../scripts/')
from s3_support import *
%matplotlib inline

# loading data

### auction purchases, donations, and transactions

In [124]:
# get auction purchases
q = "select * from transauction"
df_purchases = redshift_query_read(q, schema="production")
df_purchases['createddate'] = pd.to_datetime(df_purchases['createddate'])

# get auction donations
q = "select * from transauctiondonation"
df_donations = redshift_query_read(q, schema="production")
df_donations['createddate'] = pd.to_datetime(df_donations['createddate'])

# get transactions
df_trans = get_dataframe_from_file("trans-records", "trans.auction_forms.csv")

In [125]:
# narrowing transactions dataframe
auction_forms = list(set(df_purchases['form'].unique().tolist() + df_donations['form'].unique().tolist()))
df_trans = df_trans[df_trans['form'].isin(auction_forms)]
len(auction_forms), len(df_trans['form'].unique())

(141, 55)

In [126]:
purchase_transactions = df_purchases['transaction'].unique().tolist()
df_purch = df_trans[df_trans['id'].isin(purchase_transactions)].copy()

purchase_n_don_transactions = list(set(df_purchases['transaction'].unique().tolist() + df_donations['transaction'].unique().tolist()))
df_purch_don = df_trans[df_trans['id'].isin(purchase_n_don_transactions)].copy()

### event settings

In [127]:
def fetch_table(table):
    url = 'https://secure.qgiv.com/admin/qgivadmin/statistics/export_tables.php'
    payload = {'key': 'DSQR59VwyFhw21PKDF4K', 'table': table}

    rsp = requests.post(url, data=payload)
    json_data = json.loads(rsp.content)

    return json_data

In [128]:
data = fetch_table("eventsettings")
es = pd.DataFrame(data[0])
es['form'] = es['form'].astype(int)

forms_that_processed = df_purch_don['form'].unique().tolist()

settings = es[es['form'].isin(forms_that_processed)][['form', 'startDate', 'endDate']].copy()
settings['form'] = settings['form'].astype(int)
settings['startDate'] = pd.to_datetime(settings['startDate'], errors='coerce')
settings['endDate'] = pd.to_datetime(settings['endDate'], errors='coerce')

In [129]:
settings[settings['form']==952927]

Unnamed: 0,form,startDate,endDate
6958,952927,2020-01-31 21:26:00,2020-01-31 22:00:00


### merging data

In [130]:
def get_start_date(x):
    if len(settings[settings['form']==x]) > 0:
        return settings[settings['form']==x]['startDate'].iloc[0]
    else:
        return None
    
def get_end_date(x):
    if len(settings[settings['form']==x]) > 0:
        return settings[settings['form']==x]['endDate'].iloc[0]
    else:
        return None

In [131]:
# just purchases
df_purch['datetime'] = pd.to_datetime(df_purch[['date', 'hour']].apply(lambda x: "{} {}:00:00".format(x['date'], x['hour']), axis=1))
df_purch['event_start_date'] = df_purch['form'].apply(get_start_date)
df_purch['event_end_date'] = df_purch['form'].apply(get_end_date)
df_purch['time_since_start'] = df_purch['datetime'] - df_purch['event_start_date']
df_purch['time_since_end'] = df_purch['event_end_date'] - df_purch['datetime']

In [132]:
# purchases & donations
df_purch_don['datetime'] = pd.to_datetime(df_purch_don[['date', 'hour']].apply(lambda x: "{} {}:00:00".format(x['date'], x['hour']), axis=1))
df_purch_don['event_start_date'] = df_purch_don['form'].apply(get_start_date)
df_purch_don['event_end_date'] = df_purch_don['form'].apply(get_end_date)
df_purch_don['time_since_start'] = df_purch_don['datetime'] - df_purch_don['event_start_date']
df_purch_don['time_since_end'] = df_purch_don['event_end_date'] - df_purch_don['datetime']

# mean diff times

### just purchases

In [133]:
# just purchases
print("MEAN TIME DIFFS FOR ALL TRANSACTIONS")
print(df_purch[df_purch['event_start_date'].dt.year!=2030][['time_since_end', 'time_since_start']].mean())

MEAN TIME DIFFS FOR ALL TRANSACTIONS
time_since_end     -1 days +07:39:23.271028
time_since_start     3 days 04:37:19.233644
dtype: timedelta64[ns]


In [134]:
form_data = []
one_hour = (60 * 60)
for form in df_purch[df_purch['event_start_date'].dt.year!=2030]['form'].unique():
    this_df = df_purch[df_purch['form']==form]
    
    lt_1_hour = len(this_df[this_df['time_since_start'].dt.total_seconds()<one_hour])
    lt_6_hours = len(this_df[this_df['time_since_start'].dt.total_seconds()<(one_hour * 6)])
    lt_24_hours = len(this_df[this_df['time_since_start'].dt.total_seconds()<(one_hour * 24)])
    
    lt_1_hour_end = len(this_df[this_df['time_since_end'].dt.total_seconds()<one_hour])
    lt_6_hours_end = len(this_df[this_df['time_since_end'].dt.total_seconds()<(one_hour * 6)])
    lt_24_hours_end = len(this_df[this_df['time_since_end'].dt.total_seconds()<(one_hour * 24)])
    
    form_data.append({
        'form': form,
        'purchases_time_since_start_mean': this_df['time_since_start'].mean(),
        'purchases_time_since_end_mean': this_df['time_since_end'].mean(),
        'purchases_transaction_count': len(this_df),
        'purchases_perc_complete_1_hour_from_start': float(lt_1_hour) / float(len(this_df)),
        'purchases_perc_complete_6_hour_from_start': float(lt_6_hours) / float(len(this_df)),
        'purchases_perc_complete_24_hour_from_start': float(lt_24_hours) / float(len(this_df)),
        'purchases_perc_complete_1_hour_from_end': float(lt_1_hour_end) / float(len(this_df)),
        'purchases_perc_complete_6_hour_from_end': float(lt_6_hours_end) / float(len(this_df)),
        'purchases_perc_complete_24_hour_from_end': float(lt_24_hours_end) / float(len(this_df))
    })

In [135]:
purchases_form_data = pd.DataFrame(form_data)
purchases_form_data.dropna()[['purchases_time_since_start_mean', 'purchases_time_since_end_mean']].mean()

purchases_time_since_start_mean   -2 days +23:17:53.470016
purchases_time_since_end_mean       2 days 23:03:46.529983
dtype: timedelta64[ns]

### purchases & donations

In [136]:
print("MEAN TIME DIFFS FOR ALL TRANSACTIONS")
print(df_purch_don[df_purch_don['event_start_date'].dt.year!=2030][['time_since_end', 'time_since_start']].mean())

MEAN TIME DIFFS FOR ALL TRANSACTIONS
time_since_end     -1 days +06:43:01.296572
time_since_start     2 days 20:24:00.178837
dtype: timedelta64[ns]


In [137]:
form_data = []
one_hour = (60 * 60)
for form in df_purch_don[df_purch_don['event_start_date'].dt.year!=2030]['form'].unique():
    this_df = df_purch_don[df_purch_don['form']==form]
    
    lt_1_hour = len(this_df[this_df['time_since_start'].dt.total_seconds()<one_hour])
    lt_6_hours = len(this_df[this_df['time_since_start'].dt.total_seconds()<(one_hour * 6)])
    lt_24_hours = len(this_df[this_df['time_since_start'].dt.total_seconds()<(one_hour * 24)])
    
    lt_1_hour_end = len(this_df[this_df['time_since_end'].dt.total_seconds()<one_hour])
    lt_6_hours_end = len(this_df[this_df['time_since_end'].dt.total_seconds()<(one_hour * 6)])
    lt_24_hours_end = len(this_df[this_df['time_since_end'].dt.total_seconds()<(one_hour * 24)])
    
    form_data.append({
        'form': form,
        'purchases_n_donations_time_since_start_mean': this_df['time_since_start'].mean(),
        'purchases_n_donations_time_since_end_mean': this_df['time_since_end'].mean(),
        'purchases_n_donations_transaction_count': len(this_df),
        'purchases_n_donations_perc_complete_1_hour_from_start': float(lt_1_hour) / float(len(this_df)),
        'purchases_n_donations_perc_complete_6_hour_from_start': float(lt_6_hours) / float(len(this_df)),
        'purchases_n_donations_perc_complete_24_hour_from_start': float(lt_24_hours) / float(len(this_df)),
        'purchases_n_donations_perc_complete_1_hour_from_end': float(lt_1_hour_end) / float(len(this_df)),
        'purchases_n_donations_perc_complete_6_hour_from_end': float(lt_6_hours_end) / float(len(this_df)),
        'purchases_n_donations_perc_complete_24_hour_from_end': float(lt_24_hours_end) / float(len(this_df))
    })

In [138]:
purchases_and_donations_form_data = pd.DataFrame(form_data)
purchases_and_donations_form_data.dropna()[['purchases_n_donations_time_since_start_mean', 'purchases_n_donations_time_since_end_mean']].mean()

purchases_n_donations_time_since_start_mean   -2 days +23:27:59.559345
purchases_n_donations_time_since_end_mean       2 days 22:53:40.440654
dtype: timedelta64[ns]

### merging 'just purchases' and 'purchases and donations'

In [139]:
mrgd = purchases_form_data.merge(purchases_and_donations_form_data, on='form').dropna()

In [140]:
cols = ['form', 'purchases_time_since_start_mean', 'purchases_time_since_end_mean',
       'purchases_transaction_count',
       'purchases_n_donations_time_since_start_mean',
       'purchases_n_donations_time_since_end_mean',
       'purchases_n_donations_transaction_count',
       'purchases_perc_complete_1_hour_from_start',
       'purchases_perc_complete_6_hour_from_start',
       'purchases_perc_complete_24_hour_from_start',
       'purchases_perc_complete_1_hour_from_end',
       'purchases_perc_complete_6_hour_from_end',
       'purchases_perc_complete_24_hour_from_end',
       'purchases_n_donations_perc_complete_1_hour_from_start',
       'purchases_n_donations_perc_complete_6_hour_from_start',
       'purchases_n_donations_perc_complete_24_hour_from_start',
       'purchases_n_donations_perc_complete_1_hour_from_end',
       'purchases_n_donations_perc_complete_6_hour_from_end',
       'purchases_n_donations_perc_complete_24_hour_from_end']

In [142]:
mrgd[cols].to_csv("auction.times.csv", index=False)