In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("white")
%matplotlib inline

In [2]:
donations_13to17=pd.read_pickle('donations_13to17.pkl')

### explore distribution of amounts

In [3]:
donations_13to17.query('amount_2013 > 0').describe()['amount_2013']

count    320069.000000
mean         95.016273
std        1036.920844
min           0.400000
25%          20.000000
50%          25.000000
75%          75.000000
max      442794.380000
Name: amount_2013, dtype: float64

In [4]:
donations_13to17.query('amount_2014 > 0').describe()['amount_2014']

count    382803.000000
mean        108.259304
std        1169.334035
min           1.000000
25%          25.000000
50%          50.000000
75%         100.000000
max      570161.900000
Name: amount_2014, dtype: float64

In [5]:
donations_13to17.query('amount_2015 > 0').describe()['amount_2015']

count    446037.000000
mean        113.174654
std        1013.841004
min           0.920000
25%          25.000000
50%          50.000000
75%         100.000000
max      346566.480000
Name: amount_2015, dtype: float64

In [6]:
donations_13to17.query('amount_2016 > 0').describe()['amount_2016']

count    555836.000000
mean        112.707242
std        1028.602129
min           0.060000
25%          25.000000
50%          50.000000
75%         100.000000
max      294946.630000
Name: amount_2016, dtype: float64

In [7]:
donations_13to17.query('amount_2017 > 0').describe()['amount_2017']

count    574591.000000
mean        126.817661
std        1628.855404
min           1.000000
25%          25.000000
50%          50.000000
75%         100.000000
max      634709.970000
Name: amount_2017, dtype: float64

In [31]:
#create dataframes by year with amounts > 0
donations_2013=donations_13to17.query('amount_2013 > 0')
donations_2014=donations_13to17.query('amount_2014 > 0')
donations_2015=donations_13to17.query('amount_2015 > 0')
donations_2016=donations_13to17.query('amount_2016 > 0')
donations_2017=donations_13to17.query('amount_2017 > 0')

In [None]:
sns.distplot(donations_2013['amount_2013'], kde=False, bins=20)
plt.yscale('log')

In [None]:
fig = px.histogram(donations_2013, x="amount_2013", nbins=100, log_y=True)
fig.show()

In [None]:
#largest donors in 2013
donations_2013.amount_2013.nlargest(n=10)

In [None]:
#percent contributed by top x donors in 2013
donations_2013.amount_2013.nlargest(n=3).sum()/donations_2013.amount_2013.sum()*100

In [None]:
# total contributed in 2013
donations_2013.amount_2013.sum()

In [None]:
donations_2013.query('amount_2013 == 442794.38')

In [51]:
donations_2013.columns

Index(['id', 'first_gift_year', 'lapsed_count', 'recovered_count',
       'max_consec_giving_years', 'total_giving_years', 'count_2013',
       'count_2014', 'count_2015', 'count_2016',
       ...
       'behavior_2017', 'gainloss_2013', 'gainloss_2014', 'gainloss_2015',
       'gainloss_2016', 'gainloss_2017', 'gainloss_2014_amt',
       'gainloss_2015_amt', 'gainloss_2016_amt', 'gainloss_2017_amt'],
      dtype='object', length=203)

### explore distribution of donation counts

In [45]:
#count_df=donations_2013.copy()
#count_df=donations_2014.copy()
#count_df=donations_2015.copy()
count_df=donations_2016.copy()
# count_df=donations_2017.copy()

count_df.rename(columns={
#    'count_2013':'count_col', 'amount_2013':'amount_col'},
#    'count_2014':'count_col', 'amount_2014':'amount_col'},
#    'count_2015':'count_col', 'amount_2015':'amount_col'},
    'count_2016':'count_col', 'amount_2016':'amount_col'},
#    'count_2017':'count_col', 'amount_2017':'amount_col'},
                inplace = True) 

In [46]:
# number of donors in year
print(f'# donors: ', "{:,}".format(count_df['id'].count()))

# amount donations in year
print(f'amount of donations: ', "${:,.0f}".format(count_df['amount_col'].sum()))

# max # donations in year
print(f'max number donations: ', count_df['count_col'].max())

print(f'10 highest numbers of donation: ', count_df['count_col'].nlargest(n=50))

# donors:  555,836
amount of donations:  $62,646,743
max number donations:  3687
10 highest numbers of donation:  52       3687
2508     3082
176      2534
614      1581
619      1501
137      1425
145      1234
149      1211
159      1114
251      1086
1713     1077
834      1054
615       967
1240      937
2318      936
168       910
1237      858
249       847
1086      773
129       768
2621      729
689       692
1085      686
1214      680
1243      638
142       636
361       635
135       630
1220      568
169       532
621       532
722       525
1645      469
10371     461
2262      459
1555      454
14032     450
12551     449
1552      435
1306      429
191       428
132       424
2823      416
4603      416
2605      415
1247      414
2281      414
248       411
3191      408
3082      402
Name: count_col, dtype: int64


In [47]:
# donors by # contributions
l = 12
for n in range(1,l+1,1):
    if n < l + 1:
        count=count_df.loc[count_df['count_col'] == n]
        print(n, f'/',
              "{:,}".format(count.count_col.count()), f'/',
              "{:.2%}".format(count.count_col.count()/count_df.count_col.count()), f'/',
             "${:,.0f}".format(count.amount_col.sum()), f'/',
             "{:.2%}".format(count.amount_col.sum()/count_df.amount_col.sum()))

1 / 445,766 / 80.20% / $25,998,167 / 41.50%
2 / 60,609 / 10.90% / $8,598,328 / 13.73%
3 / 19,212 / 3.46% / $4,437,224 / 7.08%
4 / 9,331 / 1.68% / $2,872,638 / 4.59%
5 / 5,442 / 0.98% / $2,122,789 / 3.39%
6 / 3,462 / 0.62% / $1,667,315 / 2.66%
7 / 2,362 / 0.42% / $1,275,255 / 2.04%
8 / 1,723 / 0.31% / $1,073,225 / 1.71%
9 / 1,258 / 0.23% / $825,033 / 1.32%
10 / 1,006 / 0.18% / $726,653 / 1.16%
11 / 784 / 0.14% / $688,105 / 1.10%
12 / 691 / 0.12% / $612,916 / 0.98%


In [48]:
# number who contributed greater than 12 times
print(f'# donors contributed > 12x: ', count_df.loc[count_df['count_col'] > 12].count()['id'])

# amount donated who contributed greater than 12 times
print(f'amount: ', "${:,.0f}".format(count_df.loc[count_df['count_col'] > 12].sum()['amount_col']))

# donors contributed > 12x:  4190
amount:  $11,749,094


In [29]:
# amount donated who contributed greater than 12 times
count_df.loc[count_df['count_col'] > 12].sum()['amount_col']

4852803.7399999965

In [None]:
#percent that contribute > 4x in 2013
donations_2013.query('count_2013 > 4').count()['id']/donations_2013['id'].count()*100

In [None]:
sns.distplot(donations_2013['count_2013'], kde=False, bins=20)
sns.distplot(donations_2014['count_2014'], kde=False, bins=20)
plt.yscale('log')

In [None]:
#fig, axes = plt.subplots(nrows=2, ncols=2)

#sns.distplot(donations_2013['count_2013'], kde=False, bins=20, ax=axes[0,0], plt.yscale=log)

#sns.distplot(donations_2014['count_2014'], kde=False, bins=20, ax=axes[0,1])

fig = plt.figure()
fig.add_subplot(221)   #top left
sns.distplot(donations_2013['count_2013'], kde=False, bins=20, ax=axes[0,0])

fig.add_subplot(222)   #top right
fig.add_subplot(223)   #bottom left
fig.add_subplot(224)   #bottom right 
plt.show()


In [None]:
gr1_2013=donations_2013.query('count_2013 > 1')

In [None]:
gr1_2013.shape

In [None]:
fig = px.histogram(donations_2014, x="count_2014", color='behavior_2014', title="# of donations per donor", labels="donations", nbins=50, log_y=True)
fig.show()

In [None]:
bins, counts = np.histogram(donations_2013.count_2013)


fig = px.bar(x=bins, y=counts, labels={'x':'# donations', 'y':'count'})
fig.show()

In [None]:
f, axes = plt.subplots(2, 2, figsize=(7, 7), sharex=True)
#ax.yscale('log')
#ax.set_xscale("log", nonposx='clip')

sns.distplot( donations_2013['count_2013'], color="skyblue", ax=axes[0, 0], bins=20)
#plt.yscale('log')
sns.distplot( donations_2014['count_2014'] , color="olive", ax=axes[0, 1], bins=20)
sns.distplot( donations_2015['count_2015']  , color="gold", ax=axes[1, 0], bins=20)
sns.distplot( donations_2016['count_2016']  , color="teal", ax=axes[1, 1], bins=20)

In [None]:
donations_2017.count_2017.describe()

In [None]:
donations_2013.head()

In [None]:
px.histogram(data_frame=donations_2014
             ,x="count_2014"
             #,y="Name"
             ,color="behavior_2014"
             ,histfunc="count"
             ,title="Distribution of Heroes, by Publisher | Good-Bad-Neutral"
             ,labels={'Name':'Characters'}
             ,template='plotly_white'
             ,nbins=20
             ,log_y=True
            )

In [None]:
counts, bins = np.histogram(donations_2013.count_2013, bins=range(1, 102, 1))
bins = .5*(bins[:-1] + bins[1:])-.5

fig = px.bar(x=bins, y=counts, labels={'x':'# donations', 'y':'# donors'},log_y=True)
fig.show()

In [None]:
counts, bins = np.histogram(donations_2014.count_2014, bins=range(1, 102, 1))
bins = .5*(bins[:-1] + bins[1:])-.5

fig = px.bar(x=bins, y=counts, labels={'x':'# donations', 'y':'# donors'},log_y=True)
fig.show()