### Dataframes in this notebook

* donations_df (original read of csv)
* donations (additional columns, converted cents to dollars, etc.)
* donations <-- PICKLE created
* donations_13to17 (only donors who donated 2013-2017; removed other cohorts) <-- PICKLE created
* donations_13to17_sample <-- CSV created 

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("white")
%matplotlib inline

In [2]:
donations_df=pd.read_csv('../data/donation_export_cohort.csv')

In [3]:
donations_df.shape

(2024554, 118)

In [4]:
for col in donations_df.columns:
    print(col)

.id
 .transactions.first_gift_year
 .transactions.lapsed_total_amount
 .transactions.recovered_total_amount
 .transactions.lapsed_count
 .transactions.recovered_count
 .transactions.max_consecutive_giving_years
 .transactions.total_giving_years
 .transactions.amount_in_year.in.2013-01-01
  .transactions.amount_in_year.in.2014-01-01
  .transactions.amount_in_year.in.2015-01-01
  .transactions.amount_in_year.in.2016-01-01
  .transactions.amount_in_year.in.2017-01-01
 .transactions.count_in_year.in.2013-01-01
  .transactions.count_in_year.in.2014-01-01
  .transactions.count_in_year.in.2015-01-01
  .transactions.count_in_year.in.2016-01-01
  .transactions.count_in_year.in.2017-01-01
 .transactions.lapsed_amount_in_year.in.2013-01-01
  .transactions.lapsed_amount_in_year.in.2014-01-01
  .transactions.lapsed_amount_in_year.in.2015-01-01
  .transactions.lapsed_amount_in_year.in.2016-01-01
  .transactions.lapsed_amount_in_year.in.2017-01-01
 .transactions.upgraded_amount_in_year.in.2013-01-01


In [5]:
donations_df.head()

Unnamed: 0,.id,.transactions.first_gift_year,.transactions.lapsed_total_amount,.transactions.recovered_total_amount,.transactions.lapsed_count,.transactions.recovered_count,.transactions.max_consecutive_giving_years,.transactions.total_giving_years,.transactions.amount_in_year.in.2013-01-01,.transactions.amount_in_year.in.2014-01-01,...,.transactions.amount_in_month.in.2017-03-01,.transactions.amount_in_month.in.2017-04-01,.transactions.amount_in_month.in.2017-05-01,.transactions.amount_in_month.in.2017-06-01,.transactions.amount_in_month.in.2017-07-01,.transactions.amount_in_month.in.2017-08-01,.transactions.amount_in_month.in.2017-09-01,.transactions.amount_in_month.in.2017-10-01,.transactions.amount_in_month.in.2017-11-01,.transactions.amount_in_month.in.2017-12-01
0,1f4b5b6e68445c6c4a0509b3aca93f38,2015-01-01,0,0,4,4,4,4,0,0,...,1929073,0,0,0,182290,134853,0,1327669,2615659,74954.0
1,4aaab6d244bf3599682239ed5591af8a,2016-01-01,2500,0,2,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,
2,0b0765dc9c759adc48a07688ba25e94e,2015-01-01,4000,0,3,2,2,2,0,0,...,0,0,0,0,0,0,0,0,0,
3,377944ad61f72d800b25ec1862aec363,2016-01-01,2500,0,2,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,
4,6d5b22d39e68c656071a842732c63a0c,2015-01-01,2500,10000,4,3,2,3,0,0,...,0,0,0,0,0,0,0,0,0,


In [6]:
with pd.option_context('display.max_rows', 10, 'display.max_columns', None):
    print(donations_df)


                                      .id  .transactions.first_gift_year  \
0        1f4b5b6e68445c6c4a0509b3aca93f38                     2015-01-01   
1        4aaab6d244bf3599682239ed5591af8a                     2016-01-01   
2        0b0765dc9c759adc48a07688ba25e94e                     2015-01-01   
3        377944ad61f72d800b25ec1862aec363                     2016-01-01   
4        6d5b22d39e68c656071a842732c63a0c                     2015-01-01   
...                                   ...                            ...   
2024549  17349045834fff2e3a1644a240ada6a5                     2016-01-01   
2024550  07cbfd22c88371d0fc4e5d68799dc4cc                     2016-01-01   
2024551  90449de3dfeeac866d2ec5c88ac9da60                     2016-01-01   
2024552  071677d93e6f46454d8c53daa84a1463                     2016-01-01   
2024553  ca4b55c74c251d3a50311d6d6540d099                     2018-01-01   

          .transactions.lapsed_total_amount  \
0                                       

### 1. Data clean up / create master dataframe

In [7]:
# rename columns
donations_df.columns=[
'id',
'first_gift_year',
'lapsed_amount',
'recovered_amount',
'lapsed_count',
'recovered_count',
'max_consec_giving_years',
'total_giving_years',
'amount_2013',
'amount_2014',
'amount_2015',
'amount_2016',
'amount_2017',
'count_2013',
'count_2014',
'count_2015',
'count_2016',
'count_2017',
'lapsed_amount_2013',
'lapsed_amount_2014',
'lapsed_amount_2015',
'lapsed_amount_2016',
'lapsed_amount_2017',
'upgraded_amount_2013',
'upgraded_amount_2014',
'upgraded_amount_2015',
'upgraded_amount_2016',
'upgraded_amount_2017',
'downgraded_amount_2013',
'downgraded_amount_2014',
'downgraded_amount_2015',
'downgraded_amount_2016',
'downgraded_amount_2017',
'amount_yr0',
'amount_yr1',
'amount_yr2',
'amount_yr3',
'amount_yr4',
'count_yr0',
'count_yr1',
'count_yr2',
'count_yr3',
'count_yr4',
'lapsed_amount_yr0',
'lapsed_amount_yr1',
'lapsed_amount_yr2',
'lapsed_amount_yr3',
'lapsed_amount_yr4',
'upgraded_amount_yr0',
'upgraded_amount_yr1',
'upgraded_amount_yr2',
'upgraded_amount_yr3',
'upgraded_amount_yr4',
'downgraded_amount_yr0',
'downgraded_amount_yr1',
'downgraded_amount_yr2',
'downgraded_amount_yr3',
'downgraded_amount_yr4',
'amount_01_2013',
'amount_02_2013',
'amount_03_2013',
'amount_04_2013',
'amount_05_2013',
'amount_06_2013',
'amount_07_2013',
'amount_08_2013',
'amount_09_2013',
'amount_10_2013',
'amount_11_2013',
'amount_12_2013',
'amount_01_2014',
'amount_02_2014',
'amount_03_2014',
'amount_04_2014',
'amount_05_2014',
'amount_06_2014',
'amount_07_2014',
'amount_08_2014',
'amount_09_2014',
'amount_10_2014',
'amount_11_2014',
'amount_12_2014',
'amount_01_2015',
'amount_02_2015',
'amount_03_2015',
'amount_04_2015',
'amount_05_2015',
'amount_06_2015',
'amount_07_2015',
'amount_08_2015',
'amount_09_2015',
'amount_10_2015',
'amount_11_2015',
'amount_12_2015',
'amount_01_2016',
'amount_02_2016',
'amount_03_2016',
'amount_04_2016',
'amount_05_2016',
'amount_06_2016',
'amount_07_2016',
'amount_08_2016',
'amount_09_2016',
'amount_10_2016',
'amount_11_2016',
'amount_12_2016',
'amount_01_2017',
'amount_02_2017',
'amount_03_2017',
'amount_04_2017',
'amount_05_2017',
'amount_06_2017',
'amount_07_2017',
'amount_08_2017',
'amount_09_2017',
'amount_10_2017',
'amount_11_2017',
'amount_12_2017',
]

In [8]:
donations_df.head()

Unnamed: 0,id,first_gift_year,lapsed_amount,recovered_amount,lapsed_count,recovered_count,max_consec_giving_years,total_giving_years,amount_2013,amount_2014,...,amount_03_2017,amount_04_2017,amount_05_2017,amount_06_2017,amount_07_2017,amount_08_2017,amount_09_2017,amount_10_2017,amount_11_2017,amount_12_2017
0,1f4b5b6e68445c6c4a0509b3aca93f38,2015-01-01,0,0,4,4,4,4,0,0,...,1929073,0,0,0,182290,134853,0,1327669,2615659,74954.0
1,4aaab6d244bf3599682239ed5591af8a,2016-01-01,2500,0,2,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,
2,0b0765dc9c759adc48a07688ba25e94e,2015-01-01,4000,0,3,2,2,2,0,0,...,0,0,0,0,0,0,0,0,0,
3,377944ad61f72d800b25ec1862aec363,2016-01-01,2500,0,2,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,
4,6d5b22d39e68c656071a842732c63a0c,2015-01-01,2500,10000,4,3,2,3,0,0,...,0,0,0,0,0,0,0,0,0,


In [9]:
#replace nans in Dec 2017 amount donated column with zeros
donations_df['amount_12_2017'] = donations_df['amount_12_2017'].fillna(0)

In [10]:
#create new dataframe with all "amount" columns to convert from cents to dollars
amount_columns = donations_df[[
'lapsed_amount',
'recovered_amount',
"amount_2013",
"amount_2014",
"amount_2015",
"amount_2016",
"amount_2017",
"lapsed_amount_2013",
"lapsed_amount_2014",
"lapsed_amount_2015",
"lapsed_amount_2016",
"lapsed_amount_2017",
"upgraded_amount_2013",
"upgraded_amount_2014",
"upgraded_amount_2015",
"upgraded_amount_2016",
"upgraded_amount_2017",
"downgraded_amount_2013",
"downgraded_amount_2014",
"downgraded_amount_2015",
"downgraded_amount_2016",
"downgraded_amount_2017",
'amount_yr0',
'amount_yr1',
'amount_yr2',
'amount_yr3',
'amount_yr4',
'lapsed_amount_yr0',
'lapsed_amount_yr1',
'lapsed_amount_yr2',
'lapsed_amount_yr3',
'lapsed_amount_yr4',
'upgraded_amount_yr0',
'upgraded_amount_yr1',
'upgraded_amount_yr2',
'upgraded_amount_yr3',
'upgraded_amount_yr4',
'downgraded_amount_yr0',
'downgraded_amount_yr1',
'downgraded_amount_yr2',
'downgraded_amount_yr3',
'downgraded_amount_yr4',
"amount_01_2013",
"amount_02_2013",
"amount_03_2013",
"amount_04_2013",
"amount_05_2013",
"amount_06_2013",
"amount_07_2013",
"amount_08_2013",
"amount_09_2013",
"amount_10_2013",
"amount_11_2013",
"amount_12_2013",
"amount_01_2014",
"amount_02_2014",
"amount_03_2014",
"amount_04_2014",
"amount_05_2014",
"amount_06_2014",
"amount_07_2014",
"amount_08_2014",
"amount_09_2014",
"amount_10_2014",
"amount_11_2014",
"amount_12_2014",
"amount_01_2015",
"amount_02_2015",
"amount_03_2015",
"amount_04_2015",
"amount_05_2015",
"amount_06_2015",
"amount_07_2015",
"amount_08_2015",
"amount_09_2015",
"amount_10_2015",
"amount_11_2015",
"amount_12_2015",
"amount_01_2016",
"amount_02_2016",
"amount_03_2016",
"amount_04_2016",
"amount_05_2016",
"amount_06_2016",
"amount_07_2016",
"amount_08_2016",
"amount_09_2016",
"amount_10_2016",
"amount_11_2016",
"amount_12_2016",
"amount_01_2017",
"amount_02_2017",
"amount_03_2017",
"amount_04_2017",
"amount_05_2017",
"amount_06_2017",
"amount_07_2017",
"amount_08_2017",
"amount_09_2017",
"amount_10_2017",
"amount_11_2017",
"amount_12_2017"]].copy()

In [11]:
#create new dataframe with non-amount columns
non_amount_columns = donations_df[[
'id',
'first_gift_year',
"lapsed_count",
"recovered_count",
"max_consec_giving_years",
"total_giving_years",
"count_2013",
"count_2014",
"count_2015",
"count_2016",
"count_2017",
'count_yr0',
'count_yr1',
'count_yr2',
'count_yr3',
'count_yr4']].copy()

In [12]:
#convert cents to dollars in all "amount" columns
amount_columns=amount_columns/100

In [13]:
#merge amount and non_amount dataframes back into one
donations=non_amount_columns.merge(amount_columns, left_index=True, right_index=True)

In [14]:
#create new column "amount_total", which equals a donor's total donation amount 2013-2017
donations['amount_total']=donations['amount_2013'] + donations['amount_2014'] + donations['amount_2015'] + donations['amount_2016'] + donations['amount_2017']

In [15]:
#create new column "count_total", which equals a donor's total number of donations 2013-2017
donations['count_total']=donations['count_2013'] + donations['count_2014'] + donations['count_2015'] + donations['count_2016'] + donations['count_2017']

In [16]:
#create columns for cohort13--amounts donated per month by donor year

donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_01_yr0'] = donations.amount_01_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_02_yr0'] = donations.amount_02_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_03_yr0'] = donations.amount_03_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_04_yr0'] = donations.amount_04_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_05_yr0'] = donations.amount_05_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_06_yr0'] = donations.amount_06_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_07_yr0'] = donations.amount_07_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_08_yr0'] = donations.amount_08_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_09_yr0'] = donations.amount_09_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_10_yr0'] = donations.amount_10_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_11_yr0'] = donations.amount_11_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_12_yr0'] = donations.amount_12_2013
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_01_yr1'] = donations.amount_01_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_02_yr1'] = donations.amount_02_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_03_yr1'] = donations.amount_03_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_04_yr1'] = donations.amount_04_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_05_yr1'] = donations.amount_05_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_06_yr1'] = donations.amount_06_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_07_yr1'] = donations.amount_07_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_08_yr1'] = donations.amount_08_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_09_yr1'] = donations.amount_09_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_10_yr1'] = donations.amount_10_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_11_yr1'] = donations.amount_11_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_12_yr1'] = donations.amount_12_2014
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_01_yr2'] = donations.amount_01_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_02_yr2'] = donations.amount_02_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_03_yr2'] = donations.amount_03_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_04_yr2'] = donations.amount_04_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_05_yr2'] = donations.amount_05_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_06_yr2'] = donations.amount_06_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_07_yr2'] = donations.amount_07_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_08_yr2'] = donations.amount_08_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_09_yr2'] = donations.amount_09_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_10_yr2'] = donations.amount_10_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_11_yr2'] = donations.amount_11_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_12_yr2'] = donations.amount_12_2015
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_01_yr3'] = donations.amount_01_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_02_yr3'] = donations.amount_02_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_03_yr3'] = donations.amount_03_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_04_yr3'] = donations.amount_04_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_05_yr3'] = donations.amount_05_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_06_yr3'] = donations.amount_06_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_07_yr3'] = donations.amount_07_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_08_yr3'] = donations.amount_08_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_09_yr3'] = donations.amount_09_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_10_yr3'] = donations.amount_10_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_11_yr3'] = donations.amount_11_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_12_yr3'] = donations.amount_12_2016
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_01_yr4'] = donations.amount_01_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_02_yr4'] = donations.amount_02_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_03_yr4'] = donations.amount_03_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_04_yr4'] = donations.amount_04_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_05_yr4'] = donations.amount_05_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_06_yr4'] = donations.amount_06_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_07_yr4'] = donations.amount_07_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_08_yr4'] = donations.amount_08_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_09_yr4'] = donations.amount_09_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_10_yr4'] = donations.amount_10_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_11_yr4'] = donations.amount_11_2017
donations.loc[(donations['first_gift_year'] == '2013-01-01'), 'amount_12_yr4'] = donations.amount_12_2017

In [17]:
#create columns for cohort14--amounts donated per month by donor year

donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_01_yr1'] = donations.amount_01_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_02_yr1'] = donations.amount_02_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_03_yr1'] = donations.amount_03_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_04_yr1'] = donations.amount_04_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_05_yr1'] = donations.amount_05_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_06_yr1'] = donations.amount_06_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_07_yr1'] = donations.amount_07_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_08_yr1'] = donations.amount_08_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_09_yr1'] = donations.amount_09_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_10_yr1'] = donations.amount_10_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_11_yr1'] = donations.amount_11_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_12_yr1'] = donations.amount_12_2014
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_01_yr2'] = donations.amount_01_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_02_yr2'] = donations.amount_02_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_03_yr2'] = donations.amount_03_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_04_yr2'] = donations.amount_04_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_05_yr2'] = donations.amount_05_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_06_yr2'] = donations.amount_06_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_07_yr2'] = donations.amount_07_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_08_yr2'] = donations.amount_08_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_09_yr2'] = donations.amount_09_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_10_yr2'] = donations.amount_10_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_11_yr2'] = donations.amount_11_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_12_yr2'] = donations.amount_12_2015
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_01_yr3'] = donations.amount_01_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_02_yr3'] = donations.amount_02_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_03_yr3'] = donations.amount_03_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_04_yr3'] = donations.amount_04_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_05_yr3'] = donations.amount_05_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_06_yr3'] = donations.amount_06_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_07_yr3'] = donations.amount_07_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_08_yr3'] = donations.amount_08_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_09_yr3'] = donations.amount_09_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_10_yr3'] = donations.amount_10_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_11_yr3'] = donations.amount_11_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_12_yr3'] = donations.amount_12_2016
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_01_yr4'] = donations.amount_01_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_02_yr4'] = donations.amount_02_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_03_yr4'] = donations.amount_03_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_04_yr4'] = donations.amount_04_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_05_yr4'] = donations.amount_05_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_06_yr4'] = donations.amount_06_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_07_yr4'] = donations.amount_07_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_08_yr4'] = donations.amount_08_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_09_yr4'] = donations.amount_09_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_10_yr4'] = donations.amount_10_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_11_yr4'] = donations.amount_11_2017
donations.loc[(donations['first_gift_year'] == '2014-01-01'), 'amount_12_yr4'] = donations.amount_12_2017

In [18]:
#create columns for cohort15--amounts donated per month by donor year

donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_01_yr1'] = donations.amount_01_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_02_yr1'] = donations.amount_02_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_03_yr1'] = donations.amount_03_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_04_yr1'] = donations.amount_04_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_05_yr1'] = donations.amount_05_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_06_yr1'] = donations.amount_06_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_07_yr1'] = donations.amount_07_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_08_yr1'] = donations.amount_08_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_09_yr1'] = donations.amount_09_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_10_yr1'] = donations.amount_10_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_11_yr1'] = donations.amount_11_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_12_yr1'] = donations.amount_12_2015
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_01_yr2'] = donations.amount_01_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_02_yr2'] = donations.amount_02_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_03_yr2'] = donations.amount_03_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_04_yr2'] = donations.amount_04_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_05_yr2'] = donations.amount_05_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_06_yr2'] = donations.amount_06_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_07_yr2'] = donations.amount_07_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_08_yr2'] = donations.amount_08_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_09_yr2'] = donations.amount_09_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_10_yr2'] = donations.amount_10_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_11_yr2'] = donations.amount_11_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_12_yr2'] = donations.amount_12_2016
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_01_yr3'] = donations.amount_01_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_02_yr3'] = donations.amount_02_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_03_yr3'] = donations.amount_03_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_04_yr3'] = donations.amount_04_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_05_yr3'] = donations.amount_05_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_06_yr3'] = donations.amount_06_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_07_yr3'] = donations.amount_07_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_08_yr3'] = donations.amount_08_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_09_yr3'] = donations.amount_09_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_10_yr3'] = donations.amount_10_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_11_yr3'] = donations.amount_11_2017
donations.loc[(donations['first_gift_year'] == '2015-01-01'), 'amount_12_yr3'] = donations.amount_12_2017

In [18]:
#create columns for cohort16--amounts donated per month by donor year

donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_01_yr1'] = donations.amount_01_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_02_yr1'] = donations.amount_02_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_03_yr1'] = donations.amount_03_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_04_yr1'] = donations.amount_04_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_05_yr1'] = donations.amount_05_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_06_yr1'] = donations.amount_06_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_07_yr1'] = donations.amount_07_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_08_yr1'] = donations.amount_08_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_09_yr1'] = donations.amount_09_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_10_yr1'] = donations.amount_10_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_11_yr1'] = donations.amount_11_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_12_yr1'] = donations.amount_12_2016
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_01_yr2'] = donations.amount_01_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_02_yr2'] = donations.amount_02_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_03_yr2'] = donations.amount_03_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_04_yr2'] = donations.amount_04_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_05_yr2'] = donations.amount_05_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_06_yr2'] = donations.amount_06_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_07_yr2'] = donations.amount_07_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_08_yr2'] = donations.amount_08_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_09_yr2'] = donations.amount_09_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_10_yr2'] = donations.amount_10_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_11_yr2'] = donations.amount_11_2017
donations.loc[(donations['first_gift_year'] == '2016-01-01'), 'amount_12_yr2'] = donations.amount_12_2017

In [19]:
#create columns for cohort17--amounts donated per month by donor year

donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_01_yr1'] = donations.amount_01_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_02_yr1'] = donations.amount_02_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_03_yr1'] = donations.amount_03_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_04_yr1'] = donations.amount_04_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_05_yr1'] = donations.amount_05_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_06_yr1'] = donations.amount_06_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_07_yr1'] = donations.amount_07_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_08_yr1'] = donations.amount_08_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_09_yr1'] = donations.amount_09_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_10_yr1'] = donations.amount_10_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_11_yr1'] = donations.amount_11_2017
donations.loc[(donations['first_gift_year'] == '2017-01-01'), 'amount_12_yr1'] = donations.amount_12_2017

In [20]:
donations.to_pickle('donations.pkl')

In [2]:
donations=pd.read_pickle('donations.pkl')

In [21]:
#donations.drop(['years_donated1'], axis=1, inplace=True)

### 2. Create dataframe isolating donations made 2013-2017

In [22]:
donations.groupby('first_gift_year').count()['id']

first_gift_year
2012-01-01       141
2013-01-01    320069
2014-01-01    320673
2015-01-01    355137
2016-01-01    445598
2017-01-01    434991
2018-01-01    147945
Name: id, dtype: int64

In [5]:
# Remove donors who did not make donation 2013-2017)
donations_13to17=donations.query('first_gift_year != "2012-01-01" & first_gift_year != "2018-01-01"')

In [6]:
donations_13to17.groupby('first_gift_year').count()['id']

first_gift_year
2013-01-01    320069
2014-01-01    320673
2015-01-01    355137
2016-01-01    445598
2017-01-01    434991
Name: id, dtype: int64

In [7]:
#years_donated
conditions = [(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0),
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0),
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0),
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']== 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']==0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']==0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] ==0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0) |
(donations_13to17.loc[:,'amount_2013'] ==0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0),
(donations_13to17.loc[:,'amount_2013'] >0) & (donations_13to17.loc[:,'amount_2014'] >0) & (donations_13to17.loc[:,'amount_2015']>0) & (donations_13to17.loc[:,'amount_2016']>0) & (donations_13to17.loc[:,'amount_2017']> 0)]

choices = ['1', '2', '3', '4', '5']

donations_13to17.loc[:,'years_donated'] = np.select(conditions, choices, default='0')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [8]:
#giving patterns
conditions=[(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]> 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]> 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]>0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]> 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]== 0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]==0) & (donations_13to17[('amount_yr4')]>0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]==0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]>0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] ==0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]>0) ,
(donations_13to17[('amount_yr0')] >0) & (donations_13to17[('amount_yr1')] >0) & (donations_13to17[('amount_yr2')]>0) & (donations_13to17[('amount_yr3')]>0) & (donations_13to17[('amount_yr4')]>0)]


choices = ['0,',
'0,1,',
'0,2,',
'0,3,',
'0,4,',
'0,1,2,',
'0,1,3,',
'0,1,4,',
'0,2,3,',
'0,2,4,',
'0,3,4',
'0,1,2,3,',
'0,1,2,4,',
'0,1,3,4,',
'0,2,3,4,',
'0,1,2,3,4,']

donations_13to17.loc[:,'cohort_gift_pattern'] = np.select(conditions, choices, default='none')

In [9]:
donations_13to17.loc[:,'years_donated']= donations_13to17.loc[:,'years_donated'].astype(float)

In [10]:
donations_13to17.loc[:,'perc_years_donated'] = donations_13to17.years_donated / 5

In [11]:
#giving patterns
conditions = [(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0),
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0),
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0),
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]== 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]==0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]==0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] ==0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0) ,
(donations_13to17.loc[:,('amount_2013')] ==0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0),
(donations_13to17.loc[:,('amount_2013')] >0) & (donations_13to17.loc[:,('amount_2014')] >0) & (donations_13to17.loc[:,('amount_2015')]>0) & (donations_13to17.loc[:,('amount_2016')]>0) & (donations_13to17.loc[:,('amount_2017')]> 0)
]

choices = ['1,', '2,', 
'3,', 
'4,', 
'5,', 
'1,2,',
'1,3,', 
'1,4,', 
'1,5,', 
'2,3,', 
'2,4,', 
'2,5,', 
'3,4,', 
'3,5,', 
'4,5,', 
'1,2,3,', 
'1,2,4,', 
'1,2,5,', 
'1,3,4,', 
'1,3,5,', 
'1,4,5,', 
'2,3,4,', 
'2,3,5,', 
'2,4,5,', 
'3,4,5,', 
'1,2,3,4,', 
'1,2,3,5,', 
'1,2,4,5,', 
'1,3,4,5,', 
'2,3,4,5,', 
'1,2,3,4,5,' 
]

donations_13to17.loc[:,'gift_pattern'] = np.select(conditions, choices, default='none')

In [12]:
donations_13to17

Unnamed: 0,id,first_gift_year,lapsed_count,recovered_count,max_consec_giving_years,total_giving_years,count_2013,count_2014,count_2015,count_2016,...,amount_07_yr4,amount_08_yr4,amount_09_yr4,amount_10_yr4,amount_11_yr4,amount_12_yr4,years_donated,cohort_gift_pattern,perc_years_donated,gift_pattern
0,1f4b5b6e68445c6c4a0509b3aca93f38,2015-01-01,4,4,4,4,0,0,42,64,...,,,,,,,3.0,0123,0.6,345
1,4aaab6d244bf3599682239ed5591af8a,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,,1.0,0,0.2,4
2,0b0765dc9c759adc48a07688ba25e94e,2015-01-01,3,2,2,2,0,0,2,2,...,,,,,,,2.0,01,0.4,34
3,377944ad61f72d800b25ec1862aec363,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,,1.0,0,0.2,4
4,6d5b22d39e68c656071a842732c63a0c,2015-01-01,4,3,2,3,0,0,1,1,...,,,,,,,2.0,013,0.4,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024548,e6a4ef71c2ad1fae992b0ca093b41dad,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,,1.0,0,0.2,4
2024549,17349045834fff2e3a1644a240ada6a5,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,,1.0,0,0.2,4
2024550,07cbfd22c88371d0fc4e5d68799dc4cc,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,,1.0,0,0.2,4
2024551,90449de3dfeeac866d2ec5c88ac9da60,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,,1.0,0,0.2,4


In [13]:
#donor_profile1: one-time vs repeat
donations_13to17.loc[(donations_13to17['years_donated'] == 1), 'donor_profile'] = 'one_time'
donations_13to17.loc[(donations_13to17['years_donated'] > 1), 'donor_profile'] = 'repeat'

In [14]:
donations_13to17.head()

Unnamed: 0,id,first_gift_year,lapsed_count,recovered_count,max_consec_giving_years,total_giving_years,count_2013,count_2014,count_2015,count_2016,...,amount_08_yr4,amount_09_yr4,amount_10_yr4,amount_11_yr4,amount_12_yr4,years_donated,cohort_gift_pattern,perc_years_donated,gift_pattern,donor_profile
0,1f4b5b6e68445c6c4a0509b3aca93f38,2015-01-01,4,4,4,4,0,0,42,64,...,,,,,,3.0,123,0.6,345,repeat
1,4aaab6d244bf3599682239ed5591af8a,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,1.0,0,0.2,4,one_time
2,0b0765dc9c759adc48a07688ba25e94e,2015-01-01,3,2,2,2,0,0,2,2,...,,,,,,2.0,1,0.4,34,repeat
3,377944ad61f72d800b25ec1862aec363,2016-01-01,2,1,1,1,0,0,0,1,...,,,,,,1.0,0,0.2,4,one_time
4,6d5b22d39e68c656071a842732c63a0c,2015-01-01,4,3,2,3,0,0,1,1,...,,,,,,2.0,13,0.4,34,repeat


In [15]:
donations_13to17.dtypes

id                          object
first_gift_year             object
lapsed_count                 int64
recovered_count              int64
max_consec_giving_years      int64
                            ...   
years_donated              float64
cohort_gift_pattern         object
perc_years_donated         float64
gift_pattern                object
donor_profile               object
Length: 185, dtype: object

In [16]:
yr_info=donations_13to17

In [17]:
#2013_behavior
yr_info.loc[:,'behavior_2013'] = np.where(yr_info['amount_2013'] > 0, "new", "none")

In [18]:
#2014_behavior
conditions = [(yr_info['amount_2013'] == 0) & (yr_info['amount_2014'] > 0),
              (yr_info['amount_2013'] > 0) & (yr_info['amount_2013'] > 0) & (yr_info['amount_2013'] < yr_info['amount_2014']),
              (yr_info['amount_2013'] > 0) & (yr_info['amount_2014'] > 0) & (yr_info['amount_2013'] > yr_info['amount_2014']),
              (yr_info['amount_2013'] > 0) & (yr_info['amount_2014'] > 0) & (yr_info['amount_2013'] == yr_info['amount_2014']),
              (yr_info['amount_2013'] > 0) & (yr_info['amount_2014'] == 0)]
              
choices = ['new', 'upgrade', 'downgrade', 'maintain', 'lapse_new']

yr_info.loc[:,'behavior_2014'] = np.select(conditions, choices, default='none')

In [19]:
#2015_behavior
conditions = [(yr_info['amount_2013'] == 0) & (yr_info['amount_2014'] == 0) & (yr_info['amount_2015'] > 0),
              (yr_info['amount_2014'] > 0) & (yr_info['amount_2015'] > 0) & (yr_info['amount_2014'] < yr_info['amount_2015']),
              (yr_info['amount_2014'] > 0) & (yr_info['amount_2015'] > 0) & (yr_info['amount_2014'] > yr_info['amount_2015']),
              (yr_info['amount_2014'] > 0) & (yr_info['amount_2015'] > 0) & (yr_info['amount_2014'] == yr_info['amount_2015']),
              (yr_info['amount_2013'] == 0) & (yr_info['amount_2014'] > 0) & (yr_info['amount_2015'] == 0),
              (yr_info['amount_2013'] > 0) & (yr_info['amount_2014'] > 0) & (yr_info['amount_2015'] == 0),              
              (yr_info['amount_2013'] > 0) & (yr_info['amount_2014'] == 0) & (yr_info['amount_2015'] > 0)]
              
choices = ['new', 'upgrade', 'downgrade', 'maintain', 'lapse_new', 'lapse_repeat', 'recover']

yr_info.loc[:,'behavior_2015'] = np.select(conditions, choices, default='none')

In [20]:
#2016_behavior
conditions = [(yr_info['amount_2013'] == 0) & (yr_info['amount_2014'] == 0) & (yr_info['amount_2015'] == 0) & (yr_info['amount_2016'] > 0),
              (yr_info['amount_2015'] > 0) & (yr_info['amount_2016'] > 0) & (yr_info['amount_2015'] < yr_info['amount_2016']),
              (yr_info['amount_2015'] > 0) & (yr_info['amount_2016'] > 0) & (yr_info['amount_2015'] > yr_info['amount_2016']),
              (yr_info['amount_2015'] > 0) & (yr_info['amount_2016'] > 0) & (yr_info['amount_2015'] == yr_info['amount_2016']),
              (yr_info['amount_2013'] == 0) & (yr_info['amount_2014'] == 0) & (yr_info['amount_2015'] > 0) & (yr_info['amount_2016'] == 0),
              ((yr_info['amount_2013'] > 0) | (yr_info['amount_2014'] > 0)) & (yr_info['amount_2015'] > 0) & (yr_info['amount_2016'] == 0),
              ((yr_info['amount_2013'] > 0) | (yr_info['amount_2014'] > 0)) & (yr_info['amount_2015'] == 0) & (yr_info['amount_2016'] > 0)]
              
choices = ['new', 'upgrade', 'downgrade', 'maintain', 'lapse_new', 'lapse_repeat', 'recover']

yr_info.loc[:,'behavior_2016'] = np.select(conditions, choices, default='none')

In [21]:
#2017_behavior
conditions = [(yr_info['amount_2013'] == 0) & (yr_info['amount_2014'] == 0) & (yr_info['amount_2015'] == 0) & (yr_info['amount_2016'] == 0) & (yr_info['amount_2017'] > 0),
              (yr_info['amount_2016'] > 0) & (yr_info['amount_2017'] > 0) & (yr_info['amount_2016'] < yr_info['amount_2017']),
              (yr_info['amount_2016'] > 0) & (yr_info['amount_2017'] > 0) & (yr_info['amount_2016'] > yr_info['amount_2017']),
              (yr_info['amount_2016'] > 0) & (yr_info['amount_2017'] > 0) & (yr_info['amount_2016'] == yr_info['amount_2017']),    
              (yr_info['amount_2013'] == 0) & (yr_info['amount_2014'] == 0) & (yr_info['amount_2015'] == 0) & (yr_info['amount_2016'] > 0) & (yr_info['amount_2017'] == 0),
              ((yr_info['amount_2013'] > 0) | (yr_info['amount_2014'] > 0) | (yr_info['amount_2015'] > 0)) & (yr_info['amount_2016'] > 0) & (yr_info['amount_2017'] == 0),
              ((yr_info['amount_2013'] > 0) | (yr_info['amount_2014'] > 0) | (yr_info['amount_2015'] > 0)) & (yr_info['amount_2016'] == 0) & (yr_info['amount_2017'] > 0)]
              
choices = ['new', 'upgrade', 'downgrade', 'maintain', 'lapse_new', 'lapse_repeat', 'recover']

yr_info.loc[:,'behavior_2017'] = np.select(conditions, choices, default='none')

In [22]:
#gainloss 2013
conditions = [(yr_info['behavior_2013'] == "new") | (yr_info['behavior_2013'] == "upgrade") | (yr_info['behavior_2013'] == "same") | (yr_info['behavior_2013'] == "recover"),
              (yr_info['behavior_2013'] == "downgrade") | (yr_info['behavior_2013'] == "lapse_new") | (yr_info['behavior_2013'] == "lapse_repeat")] 

choices = ['gain', 'loss']

yr_info.loc[:,'gainloss_2013'] = np.select(conditions, choices, default='none')

In [23]:
#gainloss 2014
conditions = [(yr_info['behavior_2014'] == "new") | (yr_info['behavior_2014'] == "upgrade") | (yr_info['behavior_2014'] == "same") | (yr_info['behavior_2014'] == "recover"),
              (yr_info['behavior_2014'] == "downgrade") | (yr_info['behavior_2014'] == "lapse_new") | (yr_info['behavior_2014'] == "lapse_repeat")] 

choices = ['gain', 'loss']

yr_info.loc[:,'gainloss_2014'] = np.select(conditions, choices, default='none')

In [24]:
#gainloss 2015
conditions = [(yr_info['behavior_2015'] == "new") | (yr_info['behavior_2015'] == "upgrade") | (yr_info['behavior_2015'] == "same") | (yr_info['behavior_2015'] == "recover"),
              (yr_info['behavior_2015'] == "downgrade") | (yr_info['behavior_2015'] == "lapse_new") | (yr_info['behavior_2015'] == "lapse_repeat")] 

choices = ['gain', 'loss']

yr_info.loc[:,'gainloss_2015'] = np.select(conditions, choices, default='none')

In [25]:
#gainloss 2016
conditions = [(yr_info['behavior_2016'] == "new") | (yr_info['behavior_2016'] == "upgrade") | (yr_info['behavior_2016'] == "same") | (yr_info['behavior_2016'] == "recover"),
              (yr_info['behavior_2016'] == "downgrade") | (yr_info['behavior_2016'] == "lapse_new") | (yr_info['behavior_2016'] == "lapse_repeat")] 

choices = ['gain', 'loss']

yr_info.loc[:,'gainloss_2016'] = np.select(conditions, choices, default='none')

In [26]:
#gainloss 2017
conditions = [(yr_info['behavior_2017'] == "new") | (yr_info['behavior_2017'] == "upgrade") | (yr_info['behavior_2017'] == "same") | (yr_info['behavior_2017'] == "recover"),
              (yr_info['behavior_2017'] == "downgrade") | (yr_info['behavior_2017'] == "lapse_new") | (yr_info['behavior_2017'] == "lapse_repeat")] 

choices = ['gain', 'loss']

yr_info.loc[:,'gainloss_2017'] = np.select(conditions, choices, default='none')

In [27]:
#gainloss 2014 amount
conditions = [(yr_info['behavior_2014'] == "new") | (yr_info['behavior_2014'] == "recover"), 
              (yr_info['behavior_2014'] == "upgrade") | (yr_info['behavior_2014'] == "downgrade"), 
              (yr_info['behavior_2014'] == "lapse_new") | (yr_info['behavior_2014'] == "lapse_repeat")] 

choices = [yr_info['amount_2014'], yr_info['amount_2014']-yr_info['amount_2013'], yr_info['amount_2013']*-1]

yr_info.loc[:,'gainloss_2014_amt'] = np.select(conditions, choices, default='0')

In [28]:
#gainloss 2015 amounts
conditions = [(yr_info['behavior_2015'] == "new") | (yr_info['behavior_2015'] == "recover"), 
              (yr_info['behavior_2015'] == "upgrade") | (yr_info['behavior_2015'] == "downgrade"), 
              (yr_info['behavior_2015'] == "lapse_new") | (yr_info['behavior_2015'] == "lapse_repeat")] 

choices = [yr_info['amount_2015'], yr_info['amount_2015']-yr_info['amount_2014'], yr_info['amount_2014']*-1]

yr_info.loc[:,'gainloss_2015_amt'] = np.select(conditions, choices, default='0')

In [29]:
#gainloss 2016 amounts
conditions = [(yr_info['behavior_2016'] == "new") | (yr_info['behavior_2016'] == "recover"), 
              (yr_info['behavior_2016'] == "upgrade") | (yr_info['behavior_2016'] == "downgrade"), 
              (yr_info['behavior_2016'] == "lapse_new") | (yr_info['behavior_2016'] == "lapse_repeat")] 

choices = [yr_info['amount_2016'], yr_info['amount_2016']-yr_info['amount_2015'], yr_info['amount_2015']*-1]

yr_info.loc[:,'gainloss_2016_amt'] = np.select(conditions, choices, default='0')

In [30]:
#gainloss 2017 amounts
conditions = [(yr_info['behavior_2017'] == "new") | (yr_info['behavior_2017'] == "recover"), 
              (yr_info['behavior_2017'] == "upgrade") | (yr_info['behavior_2017'] == "downgrade"), 
              (yr_info['behavior_2017'] == "lapse_new") | (yr_info['behavior_2017'] == "lapse_repeat")] 

choices = [yr_info['amount_2017'], yr_info['amount_2017']-yr_info['amount_2016'], yr_info['amount_2016']*-1]

yr_info.loc[:,'gainloss_2017_amt'] = np.select(conditions, choices, default='0')

In [31]:
yr_info.loc[:,'gainloss_2014_amt']= yr_info['gainloss_2014_amt'].astype(float)
yr_info.loc[:,'gainloss_2015_amt']= yr_info['gainloss_2015_amt'].astype(float)
yr_info.loc[:,'gainloss_2016_amt']= yr_info['gainloss_2016_amt'].astype(float)
yr_info.loc[:,'gainloss_2017_amt']= yr_info['gainloss_2017_amt'].astype(float)

In [32]:
donations_13to17=yr_info

In [33]:
donations_13to17.dtypes

id                          object
first_gift_year             object
lapsed_count                 int64
recovered_count              int64
max_consec_giving_years      int64
                            ...   
gainloss_2017               object
gainloss_2014_amt          float64
gainloss_2015_amt          float64
gainloss_2016_amt          float64
gainloss_2017_amt          float64
Length: 199, dtype: object

In [34]:
donations_13to17.to_pickle('donations_13to17.pkl')

#### Create sample dataset of donations_13to17

In [None]:
#donations_13to17_sample=donations_13to17.sample(n=30000)

In [None]:
#donations_13to17_sample.head()

In [None]:
# donations_13to17_sample1=donations_13to17_sample[['id', 'first_gift_year', 'amount_total','amount_2013', 'amount_2014','amount_2015','amount_2016','amount_2017','amount_yr0',
# 'amount_yr1','amount_yr2','amount_yr3',
# 'amount_yr4', 'amount_01_2013', 
# 'amount_02_2013',
# 'amount_03_2013',
# 'amount_04_2013',
# 'amount_05_2013',
# 'amount_06_2013',
# 'amount_07_2013',
# 'amount_08_2013',
# 'amount_09_2013',
# 'amount_10_2013',
# 'amount_11_2013',
# 'amount_12_2013',
# 'amount_01_2014',
# 'amount_02_2014',
# 'amount_03_2014',
# 'amount_04_2014',
# 'amount_05_2014',
# 'amount_06_2014',
# 'amount_07_2014',
# 'amount_08_2014',
# 'amount_09_2014',
# 'amount_10_2014',
# 'amount_11_2014',
# 'amount_12_2014',
# 'amount_01_2015',
# 'amount_02_2015',
# 'amount_03_2015',
# 'amount_04_2015',
# 'amount_05_2015',
# 'amount_06_2015',
# 'amount_07_2015',
# 'amount_08_2015',
# 'amount_09_2015',
# 'amount_10_2015',
# 'amount_11_2015',
# 'amount_12_2015',
# 'amount_01_2016',
# 'amount_02_2016',
# 'amount_03_2016',
# 'amount_04_2016',
# 'amount_05_2016',
# 'amount_06_2016',
# 'amount_07_2016',
# 'amount_08_2016',
# 'amount_09_2016',
# 'amount_10_2016',
# 'amount_11_2016',
# 'amount_12_2016',
# 'amount_01_2017',
# 'amount_02_2017',
# 'amount_03_2017',
# 'amount_04_2017',
# 'amount_05_2017',
# 'amount_06_2017',
# 'amount_07_2017',
# 'amount_08_2017',
# 'amount_09_2017',
# 'amount_10_2017',
# 'amount_11_2017',
# 'amount_12_2017',
# 'years_donated','perc_years_donated','donor_profile','behavior_2013','behavior_2014','behavior_2015','behavior_2016','behavior_2017','gift_pattern','gainloss_2014','gainloss_2015', 'gainloss_2016','gainloss_2017','gainloss_2014_amt','gainloss_2015_amt', 'gainloss_2016_amt', 'gainloss_2017_amt']].copy()

In [None]:
# donations_13to17_sample1.columns = ['id', 'first_gift_year', 'amount_total','amount_2013', 'amount_2014','amount_2015','amount_2016','amount_2017','amount_yr0',
# 'amount_yr1','amount_yr2','amount_yr3',
# 'amount_yr4', '01-01-2013',
# '02-01-2013',
# '03-01-2013',
# '04-01-2013',
# '05-01-2013',
# '06-01-2013',
# '07-01-2013',
# '08-01-2013',
# '09-01-2013',
# '10-01-2013',
# '11-01-2013',
# '12-01-2013',
# '01-01-2014',
# '02-01-2014',
# '03-01-2014',
# '04-01-2014',
# '05-01-2014',
# '06-01-2014',
# '07-01-2014',
# '08-01-2014',
# '09-01-2014',
# '10-01-2014',
# '11-01-2014',
# '12-01-2014',
# '01-01-2015',
# '02-01-2015',
# '03-01-2015',
# '04-01-2015',
# '05-01-2015',
# '06-01-2015',
# '07-01-2015',
# '08-01-2015',
# '09-01-2015',
# '10-01-2015',
# '11-01-2015',
# '12-01-2015',
# '01-01-2016',
# '02-01-2016',
# '03-01-2016',
# '04-01-2016',
# '05-01-2016',
# '06-01-2016',
# '07-01-2016',
# '08-01-2016',
# '09-01-2016',
# '10-01-2016',
# '11-01-2016',
# '12-01-2016',
# '01-01-2017',
# '02-01-2017',
# '03-01-2017',
# '04-01-2017',
# '05-01-2017',
# '06-01-2017',
# '07-01-2017',
# '08-01-2017',
# '09-01-2017',
# '10-01-2017',
# '11-01-2017',
# '12-01-2017','years_donated','perc_years_donated','donor_profile','behavior_2013','behavior_2014','behavior_2015','behavior_2016','behavior_2017','gift_pattern','gainloss_2014','gainloss_2015', 'gainloss_2016','gainloss_2017','gainloss_2014_amt','gainloss_2015_amt', 'gainloss_2016_amt', 'gainloss_2017_amt']

In [None]:
#donations_13to17_sample1.to_csv('unmelted_sample.csv')

In [None]:
# melted_sample=pd.melt(donations_13to17_sample1, id_vars =['id','first_year_gift','years_donated','perc_years_donated','donor_profile','behavior_2013','behavior_2014','behavior_2015','behavior_2016','behavior_2017','gift_pattern','gainloss_2014','gainloss_2015','gainloss_2016','gainloss_2017','gainloss_2014_amt','gainloss_2015_amt','gainloss_2016_amt','gainloss_2017_amt'], value_vars =['01-01-2013',
# '02-01-2013',
# '03-01-2013',
# '04-01-2013',
# '05-01-2013',
# '06-01-2013',
# '07-01-2013',
# '08-01-2013',
# '09-01-2013',
# '10-01-2013',
# '11-01-2013',
# '12-01-2013',
# '01-01-2014',
# '02-01-2014',
# '03-01-2014',
# '04-01-2014',
# '05-01-2014',
# '06-01-2014',
# '07-01-2014',
# '08-01-2014',
# '09-01-2014',
# '10-01-2014',
# '11-01-2014',
# '12-01-2014',
# '01-01-2015',
# '02-01-2015',
# '03-01-2015',
# '04-01-2015',
# '05-01-2015',
# '06-01-2015',
# '07-01-2015',
# '08-01-2015',
# '09-01-2015',
# '10-01-2015',
# '11-01-2015',
# '12-01-2015',
# '01-01-2016',
# '02-01-2016',
# '03-01-2016',
# '04-01-2016',
# '05-01-2016',
# '06-01-2016',
# '07-01-2016',
# '08-01-2016',
# '09-01-2016',
# '10-01-2016',
# '11-01-2016',
# '12-01-2016',
# '01-01-2017',
# '02-01-2017',
# '03-01-2017',
# '04-01-2017',
# '05-01-2017',
# '06-01-2017',
# '07-01-2017',
# '08-01-2017',
# '09-01-2017',
# '10-01-2017',
# '11-01-2017',
# '12-01-2017',
# ], var_name='date', value_name='amount') 

In [None]:
# melted_sample1=melted_sample.query('amount > 0')

In [None]:
# melted_sample1.to_csv('fft_sample.csv')