# Query by Donor Segment #

## Overview ##

This approach will create the following query contexts:

* `ctx_dseg`
* `ctx_dseg_memb`
* `ctx_donor`
* `ctx_indiv`
* `ctx_contrib`

## Notebook Setup ##

* Configure database connect information and options
* Clear potentially interfering context (PostgreSQL doesn't let you replace a view definition with conflicting column names)
* Set styling for notebook

In [1]:
sqlconnect = "postgresql+psycopg2://crash@localhost/fecdb"

%load_ext sql
%config SqlMagic.autopandas=True
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'
%sql $sqlconnect

'Connected: crash@fecdb'

In [2]:
%sql drop view if exists ctx_contrib cascade
%sql drop view if exists ctx_indiv cascade
%sql drop view if exists ctx_donor cascade
%sql drop view if exists ctx_dseg_memb cascade
%sql drop view if exists ctx_dseg cascade

 * postgresql+psycopg2://crash@localhost/fecdb
Done.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.


In [3]:
%%html
<style>
  tr, th, td {
    text-align: left !important;
  }
</style>

## Create Donor Segment for Top 314 Donors ##

In [4]:
%sql delete from donor_seg
%sql drop table if exists seg_def
%sql drop materialized view if exists donor_sum
%sql drop materialized view if exists donor_sum_314
%sql drop materialized view if exists indiv_group

 * postgresql+psycopg2://crash@localhost/fecdb
15 rows affected.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.
 * postgresql+psycopg2://crash@localhost/fecdb
Done.


In [5]:
%%sql
create materialized view indiv_group as
select ip.part1                  as last_name,
       substr(ip.part2, 1, 3)    as first_name_pfx,
       substr(ip.zip_code, 1, 3) as zip_pfx,
       count(distinct ip.id)     as indivs,
       array_agg(distinct ip.id) as indiv_ids
  from indiv_parsed ip
 where ip.name ~ '^[A-Z][^,]'
   and ip.zip_code is not null
   and ip.num_parts > 1
   and ip.part1 !~ ' '
 group by 1, 2, 3

 * postgresql+psycopg2://crash@localhost/fecdb
6738578 rows affected.


Create a view to represent `indiv_contrib` records associated with any committee whose name is prefixed by "314" (this can be amended if there are other patterns representing the same PAC; currently there are no others with "314" elsewhere in the name)

Note that this serves as a template for creating other segments of contributions, and hence the donors (actually, just "individuals" for now) behind them, for doing a similar type of investigation

In [6]:
%%sql
create or replace view contrib_to_314 as
select cm.cmte_nm,
       ic.*
  from cmte cm
  join indiv_contrib ic
       on ic.cmte_id = cm.cmte_id
 where cm.cmte_nm like '314%'

 * postgresql+psycopg2://crash@localhost/fecdb
Done.


Not sure whether it is better to re-aggregate the unnested ids (even though we are not able to omit the `distinct` qualifier), or select `ig.indiv_ids` and add to GROUP BY clause&mdash;voting for the former option right now...

In [7]:
%%sql
create materialized view donor_sum_314 as
with indiv_group_memb as (
    select ig.last_name,
           ig.first_name_pfx,
           ig.zip_pfx,
           --ig.indiv_ids,
           unnest(ig.indiv_ids) as indiv_id
      from indiv_group ig
)
select igm.last_name,
       igm.first_name_pfx,
       igm.zip_pfx,
       array_agg(distinct igm.indiv_id)
                                 as indiv_ids,
       count(ct.transaction_amt) as contribs,
       sum(ct.transaction_amt)   as total_amt,
       round(sum(ct.transaction_amt) / count(ct.transaction_amt), 2)
                                 as avg_amt,
       array_agg(distinct ct.elect_cycle)
                                 as elect_cycles
  from indiv_group_memb igm
  join contrib_to_314 ct on ct.indiv_id = igm.indiv_id
 group by 1, 2, 3
 order by 6 desc, 5 desc
 limit 50

 * postgresql+psycopg2://crash@localhost/fecdb
50 rows affected.


In [8]:
%%sql
select *
  from donor_sum_314
 order by total_amt desc, contribs desc

 * postgresql+psycopg2://crash@localhost/fecdb
50 rows affected.


Unnamed: 0,last_name,first_name_pfx,zip_pfx,indiv_ids,contribs,total_amt,avg_amt,elect_cycles
0,STOREY,BAY,191,[11659801],21,240000.0,11428.57,[2016]
1,ROSZAK,MAT,600,[10325529],2,204562.0,102281.0,[2018]
2,PROCKOP,DAR,191,[9683572],2,200000.0,100000.0,[2018]
3,PARK,TOD,940,[9188602],2,200000.0,100000.0,[2018]
4,SHENKER,SCO,947,[10979072],7,105500.0,15071.43,"[2014, 2018]"
5,GIRARDI,THO,900,[4336953],4,100000.0,25000.0,[2018]
6,NASH,RIC,598,[8656333],2,100000.0,50000.0,[2018]
7,ABRAMSON,RON,200,[28429],4,50000.0,12500.0,[2018]
8,TAYLOR,DAL,606,[11902785],2,50000.0,25000.0,[2018]
9,LARSEN,CHR,941,[6822374],2,50000.0,25000.0,[2018]


In [9]:
%%sql
with donor_set as (
    select row(indiv_ids)::id_array as ids
      from donor_sum_314
)
select create_donor_seg(array_agg(ids), 'Top 314 Donors')
  from donor_set

 * postgresql+psycopg2://crash@localhost/fecdb
1 rows affected.


Unnamed: 0,create_donor_seg
0,64


## Create Context Views ##

### Create `ctx_dseg` ###

In [10]:
%%sql
create or replace view ctx_dseg as
select id,
       name,
       description
  from donor_seg ds
 where ds.name = 'Top 314 Donors'

 * postgresql+psycopg2://crash@localhost/fecdb
Done.


In [11]:
%%sql
select *
  from ctx_dseg

 * postgresql+psycopg2://crash@localhost/fecdb
1 rows affected.


Unnamed: 0,id,name,description
0,64,Top 314 Donors,


In [12]:
%%sql
with seg_donors as (
    select dsm.donor_indiv_id
      from ctx_dseg dsx
      join donor_seg_memb dsm on dsm.donor_seg_id = dsx.id
)
select i.id,
       i.name,
       i.city,
       i.state,
       i.zip_code
  from seg_donors sd
  join indiv i on i.id = sd.donor_indiv_id
 order by i.name, i.zip_code

 * postgresql+psycopg2://crash@localhost/fecdb
50 rows affected.


Unnamed: 0,id,name,city,state,zip_code
0,28429,"ABRAMSON, RONALD",WASHINGTON,DC,200063807
1,678448,"BASSI, STEVE",CARLSBAD,CA,920081900
2,779629,"BEEUWKES, REINIER",CONCORD,MA,17425322
3,877907,"BERG, FRED",CUSHING,ME,45633307
4,1083690,"BLUE, ALLEN",VENICE,CA,902913830
5,1661575,"BYERS, BROOK",MENLO PARK,CA,940257020
6,2576925,"CUELLAR, CLIFFORD",TACOMA,WA,984053360
7,3678807,"FERSTER, DAVID",WILMETTE,IL,600911553
8,3850042,"FORDE, JAMES",TUSTIN,CA,927806320
9,3993057,"FRIEDMAN, DONNA",MOUNT PLEASANT,SC,294644305


### Create `ctx_dseg_memb` ###

In [13]:
%%sql
create or replace view ctx_dseg_memb as
select dsm.*
  from ctx_dseg dsx
  join donor_seg_memb dsm on dsm.donor_seg_id = dsx.id

 * postgresql+psycopg2://crash@localhost/fecdb
Done.


In [14]:
%%sql
select ds.name as dseg_name,
       d.name  as indiv_name,
       d.city,
       d.state,
       d.zip_code,
       d.elect_cycles
  from ctx_dseg_memb dsmx
  join donor_seg ds on ds.id = dsmx.donor_seg_id
  join donor_indiv d on d.id = dsmx.donor_indiv_id

 * postgresql+psycopg2://crash@localhost/fecdb
50 rows affected.


Unnamed: 0,dseg_name,indiv_name,city,state,zip_code,elect_cycles
0,Top 314 Donors,"STOREY, BAYARD T PH.D.",PHILADELPHIA,PA,191303202,"[2006, 2014, 2016]"
1,Top 314 Donors,"ROSZAK, MATTHEW",WINNETKA,IL,600933630,"[2018, 2020]"
2,Top 314 Donors,"PROCKOP, DARWIN J. MD, PHD",PHILADELPHIA,PA,191063913,"[2016, 2018]"
3,Top 314 Donors,"PARK, TODD",LOS ALTOS HILLS,CA,940223385,"[2018, 2020]"
4,Top 314 Donors,"SHENKER, SCOTT",BERKELEY,CA,947072052,"[2014, 2016, 2018, 2020]"
5,Top 314 Donors,"GIRARDI, THOMAS V.",LOS ANGELES,CA,900171904,"[2014, 2016, 2018, 2020]"
6,Top 314 Donors,"NASH, RICHARD",MISSOULA,MT,598045862,[2018]
7,Top 314 Donors,"ABRAMSON, RONALD",WASHINGTON,DC,200063807,"[2012, 2014, 2016, 2018, 2020]"
8,Top 314 Donors,"TAYLOR, DALE",CHICAGO,IL,606146085,"[2016, 2018, 2020]"
9,Top 314 Donors,"LARSEN, CHRIS",SAN FRANCISCO,CA,941091512,"[2010, 2016, 2018, 2020]"


### Create `ctx_donor` ###

In [15]:
%%sql
create or replace view ctx_donor as
select d.*
  from ctx_dseg_memb dsmx
  join donor_indiv d on d.id = dsmx.donor_indiv_id

 * postgresql+psycopg2://crash@localhost/fecdb
Done.


In [16]:
%%sql
select id,
       name,
       city,
       state,
       zip_code,
       elect_cycles
  from ctx_donor

 * postgresql+psycopg2://crash@localhost/fecdb
50 rows affected.


Unnamed: 0,id,name,city,state,zip_code,elect_cycles
0,11659801,"STOREY, BAYARD T PH.D.",PHILADELPHIA,PA,191303202,"[2006, 2014, 2016]"
1,10325529,"ROSZAK, MATTHEW",WINNETKA,IL,600933630,"[2018, 2020]"
2,9683572,"PROCKOP, DARWIN J. MD, PHD",PHILADELPHIA,PA,191063913,"[2016, 2018]"
3,9188602,"PARK, TODD",LOS ALTOS HILLS,CA,940223385,"[2018, 2020]"
4,10979072,"SHENKER, SCOTT",BERKELEY,CA,947072052,"[2014, 2016, 2018, 2020]"
5,4336953,"GIRARDI, THOMAS V.",LOS ANGELES,CA,900171904,"[2014, 2016, 2018, 2020]"
6,8656333,"NASH, RICHARD",MISSOULA,MT,598045862,[2018]
7,28429,"ABRAMSON, RONALD",WASHINGTON,DC,200063807,"[2012, 2014, 2016, 2018, 2020]"
8,11902785,"TAYLOR, DALE",CHICAGO,IL,606146085,"[2016, 2018, 2020]"
9,6822374,"LARSEN, CHRIS",SAN FRANCISCO,CA,941091512,"[2010, 2016, 2018, 2020]"


### Create `ctx_indiv` ###

In [17]:
%%sql
create or replace view ctx_indiv as
select i.*
  from ctx_donor dx
  join indiv i on i.donor_indiv_id = dx.id

 * postgresql+psycopg2://crash@localhost/fecdb
Done.


In [18]:
%%sql
select id,
       name,
       city,
       state,
       zip_code,
       elect_cycles,
       donor_indiv_id
  from ctx_indiv
 order by donor_indiv_id, name

 * postgresql+psycopg2://crash@localhost/fecdb
54 rows affected.


Unnamed: 0,id,name,city,state,zip_code,elect_cycles,donor_indiv_id
0,28429,"ABRAMSON, RONALD",WASHINGTON,DC,200063807,"[2012, 2014, 2016, 2018, 2020]",28429
1,678448,"BASSI, STEVE",CARLSBAD,CA,920081900,[2018],678448
2,779629,"BEEUWKES, REINIER",CONCORD,MA,17425322,"[2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020]",779629
3,877907,"BERG, FRED",CUSHING,ME,45633307,"[2016, 2018]",877907
4,1083690,"BLUE, ALLEN",VENICE,CA,902913830,"[2016, 2018]",1083690
5,1661575,"BYERS, BROOK",MENLO PARK,CA,940257020,"[2014, 2016, 2018, 2020]",1661575
6,2576925,"CUELLAR, CLIFFORD",TACOMA,WA,984053360,"[2016, 2018, 2020]",2576925
7,3678807,"FERSTER, DAVID",WILMETTE,IL,600911553,"[2014, 2016, 2018, 2020]",3678807
8,3850042,"FORDE, JAMES",TUSTIN,CA,927806320,[2018],3850042
9,3993057,"FRIEDMAN, DONNA",MOUNT PLEASANT,SC,294644305,"[2014, 2016, 2018, 2020]",3993057


### Create `ctx_contrib` ###

In [19]:
%%sql
create or replace view ctx_contrib as
select ic.*,
       ix.donor_indiv_id
  from ctx_indiv ix
  join indiv_contrib ic on ic.indiv_id = ix.id

 * postgresql+psycopg2://crash@localhost/fecdb
Done.


In [20]:
%%sql
select count(*)             as contribs,
       sum(transaction_amt) as total_amt,
       array_agg(distinct elect_cycle) as elect_cycles
  from ctx_contrib

 * postgresql+psycopg2://crash@localhost/fecdb
1 rows affected.


Unnamed: 0,contribs,total_amt,elect_cycles
0,4369,6663529.0,"[2002, 2004, 2006, 2008, 2010, 2012, 2014, 201..."


## Query Based on Context ##

### Query using `ctx_dseg` ###

In [21]:
%%sql
select ic.elect_cycle,
       count(*) cycle_contribs,
       sum(ic.transaction_amt) cycle_amount,
       round(avg(ic.transaction_amt), 2) avg_amount,
       min(ic.transaction_amt) min_amount,
       max(ic.transaction_amt) max_amount
  from ctx_dseg dsx
  join donor_seg_memb dsm on dsm.donor_seg_id = dsx.id
  join indiv i on i.donor_indiv_id = dsm.donor_indiv_id
  join indiv_contrib ic on ic.indiv_id = i.id
 group by 1
 order by 1

 * postgresql+psycopg2://crash@localhost/fecdb
10 rows affected.


Unnamed: 0,elect_cycle,cycle_contribs,cycle_amount,avg_amount,min_amount,max_amount
0,2002,5,3500.0,700.0,250.0,1000.0
1,2004,8,31000.0,3875.0,500.0,25000.0
2,2006,17,14200.0,835.29,200.0,2000.0
3,2008,22,13100.0,595.45,200.0,2300.0
4,2010,18,12250.0,680.56,250.0,2500.0
5,2012,19,21000.0,1105.26,250.0,10000.0
6,2014,322,635002.0,1972.06,-2600.0,100000.0
7,2016,1066,1493514.0,1401.05,-2700.0,73175.0
8,2018,2563,3892299.0,1518.65,-2744.0,102281.0
9,2020,329,547664.0,1664.63,-2800.0,50000.0


### Query using `ctx_dseg_memb` ###

In [22]:
%%sql
select ic.elect_cycle,
       count(*) cycle_contribs,
       sum(ic.transaction_amt) cycle_amount,
       round(avg(ic.transaction_amt), 2) avg_amount,
       min(ic.transaction_amt) min_amount,
       max(ic.transaction_amt) max_amount
  from ctx_dseg_memb dsmx
  join indiv i on i.donor_indiv_id = dsmx.donor_indiv_id
  join indiv_contrib ic on ic.indiv_id = i.id
 group by 1
 order by 1

 * postgresql+psycopg2://crash@localhost/fecdb
10 rows affected.


Unnamed: 0,elect_cycle,cycle_contribs,cycle_amount,avg_amount,min_amount,max_amount
0,2002,5,3500.0,700.0,250.0,1000.0
1,2004,8,31000.0,3875.0,500.0,25000.0
2,2006,17,14200.0,835.29,200.0,2000.0
3,2008,22,13100.0,595.45,200.0,2300.0
4,2010,18,12250.0,680.56,250.0,2500.0
5,2012,19,21000.0,1105.26,250.0,10000.0
6,2014,322,635002.0,1972.06,-2600.0,100000.0
7,2016,1066,1493514.0,1401.05,-2700.0,73175.0
8,2018,2563,3892299.0,1518.65,-2744.0,102281.0
9,2020,329,547664.0,1664.63,-2800.0,50000.0


### Query using `ctx_indiv` ###

In [23]:
%%sql
select ic.elect_cycle,
       count(*) cycle_contribs,
       sum(ic.transaction_amt) cycle_amount,
       round(avg(ic.transaction_amt), 2) avg_amount,
       min(ic.transaction_amt) min_amount,
       max(ic.transaction_amt) max_amount
  from ctx_indiv ix
  join indiv_contrib ic on ic.indiv_id = ix.id
 group by 1
 order by 1

 * postgresql+psycopg2://crash@localhost/fecdb
10 rows affected.


Unnamed: 0,elect_cycle,cycle_contribs,cycle_amount,avg_amount,min_amount,max_amount
0,2002,5,3500.0,700.0,250.0,1000.0
1,2004,8,31000.0,3875.0,500.0,25000.0
2,2006,17,14200.0,835.29,200.0,2000.0
3,2008,22,13100.0,595.45,200.0,2300.0
4,2010,18,12250.0,680.56,250.0,2500.0
5,2012,19,21000.0,1105.26,250.0,10000.0
6,2014,322,635002.0,1972.06,-2600.0,100000.0
7,2016,1066,1493514.0,1401.05,-2700.0,73175.0
8,2018,2563,3892299.0,1518.65,-2744.0,102281.0
9,2020,329,547664.0,1664.63,-2800.0,50000.0


### Query using `ctx_contrib` ###

In [24]:
%%sql
select cx.elect_cycle,
       count(*) cycle_contribs,
       sum(cx.transaction_amt) cycle_amount,
       round(avg(cx.transaction_amt), 2) avg_amount,
       min(cx.transaction_amt) min_amount,
       max(cx.transaction_amt) max_amount
  from ctx_contrib cx
 group by 1
 order by 1

 * postgresql+psycopg2://crash@localhost/fecdb
10 rows affected.


Unnamed: 0,elect_cycle,cycle_contribs,cycle_amount,avg_amount,min_amount,max_amount
0,2002,5,3500.0,700.0,250.0,1000.0
1,2004,8,31000.0,3875.0,500.0,25000.0
2,2006,17,14200.0,835.29,200.0,2000.0
3,2008,22,13100.0,595.45,200.0,2300.0
4,2010,18,12250.0,680.56,250.0,2500.0
5,2012,19,21000.0,1105.26,250.0,10000.0
6,2014,322,635002.0,1972.06,-2600.0,100000.0
7,2016,1066,1493514.0,1401.05,-2700.0,73175.0
8,2018,2563,3892299.0,1518.65,-2744.0,102281.0
9,2020,329,547664.0,1664.63,-2800.0,50000.0


In [25]:
%%sql
select d.id as donor_id,
       d.name as donor_name,
       count(*) contribs,
       sum(cx.transaction_amt) total_amount,
       round(avg(cx.transaction_amt), 2) avg_amount,
       min(cx.transaction_amt) min_amount,
       max(cx.transaction_amt) max_amount,
       array_agg(distinct cx.elect_cycle) elect_cycles
  from ctx_contrib cx
  join donor_indiv d on d.id = cx.donor_indiv_id
 group by 1, 2
 order by 4 desc

 * postgresql+psycopg2://crash@localhost/fecdb
50 rows affected.


Unnamed: 0,donor_id,donor_name,contribs,total_amount,avg_amount,min_amount,max_amount,elect_cycles
0,779629,"BEEUWKES, REINIER",433,888258.0,2051.4,-2700.0,100000.0,"[2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020]"
1,6169689,"KARPLUS, BARBARA",340,878285.0,2583.19,-2800.0,73175.0,"[2016, 2018, 2020]"
2,10979072,"SHENKER, SCOTT",241,849800.0,3526.14,-2700.0,50000.0,"[2014, 2016, 2018, 2020]"
3,28429,"ABRAMSON, RONALD",295,471583.0,1598.59,-1700.0,33400.0,"[2012, 2014, 2016, 2018, 2020]"
4,1083690,"BLUE, ALLEN",81,426400.0,5264.2,-2700.0,33900.0,"[2016, 2018]"
5,7863787,"MCEVOY, NION",558,383899.0,687.99,-2500.0,50000.0,"[2002, 2004, 2006, 2008, 2010, 2012, 2014, 201..."
6,11902785,"TAYLOR, DALE",48,258050.0,5376.04,250.0,66100.0,"[2016, 2018, 2020]"
7,4336953,"GIRARDI, THOMAS V.",24,235100.0,9795.83,-2800.0,100000.0,"[2014, 2016, 2018, 2020]"
8,6433238,"KIRK, CLAY",98,211850.0,2161.73,25.0,20000.0,"[2014, 2016, 2018, 2020]"
9,1661575,"BYERS, BROOK",60,194900.0,3248.33,-2700.0,25000.0,"[2014, 2016, 2018, 2020]"


In [26]:
%%sql
select d.id as donor_id,
       d.name as donor_name,
       cx.elect_cycle,
       count(*) cycle_contribs,
       sum(cx.transaction_amt) cycle_amount,
       round(avg(cx.transaction_amt), 2) avg_amount,
       min(cx.transaction_amt) min_amount,
       max(cx.transaction_amt) max_amount
  from ctx_contrib cx
  join donor_indiv d on d.id = cx.donor_indiv_id
 group by 1, 2, 3
 order by 5 desc
 limit 50

 * postgresql+psycopg2://crash@localhost/fecdb
50 rows affected.


Unnamed: 0,donor_id,donor_name,elect_cycle,cycle_contribs,cycle_amount,avg_amount,min_amount,max_amount
0,10979072,"SHENKER, SCOTT",2018,175,616700.0,3524.0,-2700.0,50000.0
1,6169689,"KARPLUS, BARBARA",2018,261,572685.0,2194.2,5.0,50000.0
2,779629,"BEEUWKES, REINIER",2018,206,493400.0,2395.15,-2700.0,100000.0
3,1083690,"BLUE, ALLEN",2016,62,288000.0,4645.16,500.0,33400.0
4,11902785,"TAYLOR, DALE",2018,38,227200.0,5978.95,1250.0,66100.0
5,28429,"ABRAMSON, RONALD",2018,176,203524.0,1156.39,25.0,33400.0
6,6169689,"KARPLUS, BARBARA",2016,35,189400.0,5411.43,25.0,73175.0
7,779629,"BEEUWKES, REINIER",2016,141,185958.0,1318.85,-2300.0,8100.0
8,7863787,"MCEVOY, NION",2018,325,175294.0,539.37,-2500.0,44300.0
9,9188602,"PARK, TODD",2018,22,142214.0,6464.27,-2700.0,100000.0
