# Setting the Stage

In [2]:
import sqlite3
import pandas as pd

# pandas settings
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)

# create candidates table
db = sqlite3.connect('L20DB.sqlite')
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS candidates")
cursor.execute("DROP TABLE IF EXISTS contributors")
cursor.execute("PRAGMA foreign_keys=1")

cursor.execute('''CREATE TABLE candidates (
               id INTEGER PRIMARY KEY NOT NULL, 
               first_name TEXT, 
               last_name TEXT, 
               middle_init TEXT, 
               party TEXT NOT NULL)''')

db.commit() # Commit changes to the database

# create contributors table
cursor.execute('''CREATE TABLE contributors (
          id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 
          last_name TEXT, 
          first_name TEXT, 
          middle_name TEXT, 
          street_1 TEXT, 
          street_2 TEXT, 
          city TEXT, 
          state TEXT, 
          zip TEXT, 
          amount REAL, 
          date DATETIME, 
          candidate_id INTEGER NOT NULL, 
          FOREIGN KEY(candidate_id) REFERENCES candidates(id))''')

db.commit()

# Step 1

In [3]:
# fill in candidates table
with open ("candidates.txt") as candidates:
    next(candidates) # jump over the header
    for line in candidates.readlines():
        cid, first_name, last_name, middle_name, party = line.strip().split('|')
        vals_to_insert = (int(cid), first_name, last_name, middle_name, party)
        cursor.execute('''INSERT INTO candidates 
                  (id, first_name, last_name, middle_init, party)
                  VALUES (?, ?, ?, ?, ?)''', vals_to_insert)
        
# fill in contributors table
with open ("contributors.txt") as contributors:
    next(contributors)
    for line in contributors.readlines():
        cid, last_name, first_name, middle_name, street_1, street_2, \
            city, state, zip_code, amount, date, candidate_id = line.strip().split('|')
        vals_to_insert = (last_name, first_name, middle_name, street_1, street_2, 
                          city, state, int(zip_code), amount, date, candidate_id)
        cursor.execute('''INSERT INTO contributors (last_name, first_name, middle_name, 
                           street_1, street_2, city, state, zip, amount, date, candidate_id) 
                           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', vals_to_insert)

# Interlude

In [4]:
# table viz
def viz_tables(cols, query):
    q = cursor.execute(query).fetchall()
    framelist = dict()
    for i, col_name in enumerate(cols):
        framelist[col_name] = [col[i] for col in q]
    return pd.DataFrame.from_dict(framelist)

candidate_cols = [col[1] for col in cursor.execute("PRAGMA table_info(candidates)")]
query = '''SELECT * FROM candidates'''
viz_tables(candidate_cols, query)

Unnamed: 0,id,first_name,last_name,middle_init,party
0,16,Mike,Huckabee,,R
1,20,Barack,Obama,,D
2,22,Rudolph,Giuliani,,R
3,24,Mike,Gravel,,D
4,26,John,Edwards,,D
5,29,Bill,Richardson,,D
6,30,Duncan,Hunter,,R
7,31,Dennis,Kucinich,,D
8,32,Ron,Paul,,R
9,33,Joseph,Biden,,D


# Step 2: Various Queries

In [26]:
# just show candidates with middle names
query = '''SELECT * FROM candidates WHERE middle_init <> ""'''
display(viz_tables(candidate_cols, query))

# count them up
print("{} candidates have a middle initial.".format(viz_tables(candidate_cols, query).shape[0]))

# contributors from PA
contributor_cols = [col[1] for col in cursor.execute("PRAGMA table_info(contributors)")]
query = '''SELECT * FROM contributors WHERE state='PA' ''' 
display(viz_tables(contributor_cols, query))
print("{} contributors from PA.".format(viz_tables(contributor_cols, query).shape[0]))

# contributors giving more than $1000
query = '''SELECT * FROM contributors WHERE amount>1000 ''' 
display(viz_tables(contributor_cols, query))
print("{} contributors giving more than $1000.".format(
    viz_tables(contributor_cols, query).shape[0]))

# contributors from UT giving more than $1000.00
query = '''SELECT * FROM contributors WHERE state='UT' AND amount>1000 ''' 
display(viz_tables(contributor_cols, query))
print("{} contributors from UT giving more than $1000.".format(
    viz_tables(contributor_cols, query).shape[0]))

# contributors who didn't list their state
query = '''SELECT * FROM contributors WHERE state='' ''' 
display(viz_tables(contributor_cols, query))
print("{} contributors that didn't list a state".format(
    viz_tables(contributor_cols, query).shape[0]))

# contributors from WA and PA
query = '''SELECT * FROM contributors WHERE state IN ('WA','PA') ''' 
display(viz_tables(contributor_cols, query))
print("{} contributors from WA or PA".format(
    viz_tables(contributor_cols, query).shape[0]))

# ontributors who gave between $100.00 and $200.00
query = '''SELECT * FROM contributors WHERE amount BETWEEN 100 AND 200 ''' 
display(viz_tables(contributor_cols, query))
print("{} contributors that gave between $100 and $200, inclusive.".format(
    viz_tables(contributor_cols, query).shape[0]))

Unnamed: 0,id,first_name,last_name,middle_init,party
0,34,Hillary,Clinton,R.,D
1,39,Christopher,Dodd,J.,D
2,41,Fred,Thompson,D.,R


3 candidates have a middle initial.


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,71,BUCKLEY,WALTER,W.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
1,72,BUCKLEY,MARJORIE,B.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
2,94,Raught,Philip,M,4714 Plum Way,,Pittsburgh,PA,15201,-1046.0,2008-04-21,32
3,95,Ferrara,Judith,D,1508 Waterford Road,,Yardley,PA,19067,-1100.0,2008-04-21,32
4,166,ABEL,JOHN,H.,422 THOMAS STREET,,BETHLEHEM,PA,180153316,200.0,2008-01-22,37


5 contributors from PA.


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,6,Akin,Mike,,181 Baywood Lane,,Monticello,AR,71655,1500.0,2007-05-18,16
1,10,Allen,John D.,,1052 Cannon Mill Drive,,North Augusta,SC,29860,1300.0,2007-06-29,16
2,14,Altes,R.D.,,8600 Moody Road,,Fort Smith,AR,72903,2300.0,2007-06-21,16
3,16,Anthony,John,,211 Long Island Drive,,Hot Springs,AR,71913,2300.0,2007-06-12,16
4,22,Baker,David,,2550 Adamsbrooke Drive,,Conway,AR,72034,2300.0,2007-04-11,16
5,29,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,2300.0,2007-08-14,20
6,31,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,4600.0,2007-08-14,20
7,34,Buck,Blaine,M,45 Eaton Ave,,Camden,ME,48431752,2300.0,2007-09-30,20
8,46,Buchanan,John,,2025 NW 29th Rd,,Boca Raton,FL,334316303,1300.0,2007-08-09,20
9,136,ABRAMOWITZ,NIRA,,411 HARBOR ROAD,,SOUTHPORT,CT,68901376,2300.0,2007-09-14,35


12 contributors giving more than $1000.


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,29,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,2300.0,2007-08-14,20
1,31,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,4600.0,2007-08-14,20


2 contributors from UT giving more than $1000.


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,126,BOURNE,TRAVIS,,LAGE KAART 77,,BRASSCHATT,,2930,-500.0,2008-11-20,35


1 contributors that didn't list a state


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,63,BURKE,SUZANNE,M.,3401 EVANSTON,,SEATTLE,WA,981038677,-700.0,2008-03-05,22
1,71,BUCKLEY,WALTER,W.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
2,72,BUCKLEY,MARJORIE,B.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
3,94,Raught,Philip,M,4714 Plum Way,,Pittsburgh,PA,15201,-1046.0,2008-04-21,32
4,95,Ferrara,Judith,D,1508 Waterford Road,,Yardley,PA,19067,-1100.0,2008-04-21,32
5,101,Aaronson,Rebecca,,2000 Village Green Dr Apt 12,,Mill Creek,WA,980125787,100.0,2008-02-08,34
6,107,Aaronson,Rebecca,,2000 Village Green Dr Apt 12,,Mill Creek,WA,980125787,100.0,2008-02-14,34
7,166,ABEL,JOHN,H.,422 THOMAS STREET,,BETHLEHEM,PA,180153316,200.0,2008-01-22,37


8 contributors from WA or PA


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,4,Ahrens,Don,,4034 Rennellwood Way,,Pleasanton,CA,94566,100.0,2007-06-21,16
1,5,Akin,Charles,,10187 Sugar Creek Road,,Bentonville,AR,72712,100.0,2007-06-16,16
2,13,Allison,Rebecca,,3206 Summit Court,,Little Rock,AR,72227,200.0,2007-06-12,16
3,18,Arbogast,Robert,,12900 State Route 56 SE,,Mount Sterling,OH,43143,100.0,2007-06-22,16
4,28,Buckheit,Bruce,,8904 KAREN DR,,FAIRFAX,VA,220312731,100.0,2007-09-19,20
5,32,Buck,Thomas,,4206 Terrace Street,,Kansas City,MO,64111,100.0,2007-09-25,20
6,33,Buck,Jay,K.,1855 Old Willow Rd Unit 322,,Northfield,IL,600932918,200.0,2007-09-12,20
7,38,Bucher,Ida,M,1400 Warnall Ave,,Los Angeles,CA,900245333,100.0,2007-07-10,20
8,47,Buchanan,John,,2025 NW 29th Rd,,Boca Raton,FL,334316303,200.0,2007-08-14,20
9,101,Aaronson,Rebecca,,2000 Village Green Dr Apt 12,,Mill Creek,WA,980125787,100.0,2008-02-08,34


36 contributors that gave between $100 and $200.


# Step 3: Sorting

In [43]:
# Sort candidates by last_name (A-Z)
print('Candidates by last name A-Z')
query = '''SELECT * FROM candidates ORDER BY last_name '''
display(viz_tables(candidate_cols, query))

# contributed amount in decending order where amount is restricted between $1000.00 and $5000.00
print('Contributed between $1000 and $5000, by descending amount')
query = '''SELECT * FROM contributors WHERE amount BETWEEN 1000 AND 5000 ORDER BY amount DESC '''
display(viz_tables(contributor_cols, query))

# contributors who donted between $1000.00 and $5000.00, sorted by candidate_id and then by 
# amount in descending order
print('Contributed between $1000 and $5000, sorted by candidate id, then by descending amount')
query = '''SELECT * FROM contributors WHERE amount BETWEEN 1000 AND 5000 
        ORDER BY candidate_id ASC, amount DESC '''
display(viz_tables(contributor_cols, query))

Candidates by last name A-Z


Unnamed: 0,id,first_name,last_name,middle_init,party
0,33,Joseph,Biden,,D
1,36,Samuel,Brownback,,R
2,34,Hillary,Clinton,R.,D
3,39,Christopher,Dodd,J.,D
4,26,John,Edwards,,D
5,22,Rudolph,Giuliani,,R
6,24,Mike,Gravel,,D
7,16,Mike,Huckabee,,R
8,30,Duncan,Hunter,,R
9,31,Dennis,Kucinich,,D


Contributed between $1000 and $5000, by descending amount


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,31,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,4600.0,2007-08-14,20
1,160,ABATE,MARIA,ELENA,1291 NIGHTINGALE AVENUE,,MIAMI SPRINGS,FL,331663832,2600.0,2008-01-25,37
2,14,Altes,R.D.,,8600 Moody Road,,Fort Smith,AR,72903,2300.0,2007-06-21,16
3,16,Anthony,John,,211 Long Island Drive,,Hot Springs,AR,71913,2300.0,2007-06-12,16
4,22,Baker,David,,2550 Adamsbrooke Drive,,Conway,AR,72034,2300.0,2007-04-11,16
5,29,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,2300.0,2007-08-14,20
6,34,Buck,Blaine,M,45 Eaton Ave,,Camden,ME,48431752,2300.0,2007-09-30,20
7,136,ABRAMOWITZ,NIRA,,411 HARBOR ROAD,,SOUTHPORT,CT,68901376,2300.0,2007-09-14,35
8,6,Akin,Mike,,181 Baywood Lane,,Monticello,AR,71655,1500.0,2007-05-18,16
9,10,Allen,John D.,,1052 Cannon Mill Drive,,North Augusta,SC,29860,1300.0,2007-06-29,16


Contributed between $1000 and $5000, sorted by candidate id, then by descending amount


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,14,Altes,R.D.,,8600 Moody Road,,Fort Smith,AR,72903,2300.0,2007-06-21,16
1,16,Anthony,John,,211 Long Island Drive,,Hot Springs,AR,71913,2300.0,2007-06-12,16
2,22,Baker,David,,2550 Adamsbrooke Drive,,Conway,AR,72034,2300.0,2007-04-11,16
3,6,Akin,Mike,,181 Baywood Lane,,Monticello,AR,71655,1500.0,2007-05-18,16
4,10,Allen,John D.,,1052 Cannon Mill Drive,,North Augusta,SC,29860,1300.0,2007-06-29,16
5,9,Allen,John D.,,1052 Cannon Mill Drive,,North Augusta,SC,29860,1000.0,2007-06-11,16
6,11,Allison,John W.,,P.O. Box 1089,,Conway,AR,72033,1000.0,2007-05-18,16
7,12,Allison,Rebecca,,3206 Summit Court,,Little Rock,AR,72227,1000.0,2007-04-25,16
8,20,Atiq,Omar,,7200 S Hazel Street,,Pine Bluff,AR,71603,1000.0,2007-05-18,16
9,21,Atiq,Omar,,7200 S Hazel Street,,Pine Bluff,AR,71603,1000.0,2007-06-27,16


# Step 4: Selecting Columns

In [56]:
query = '''SELECT DISTINCT last_name, first_name FROM contributors'''
viz_tables(['last_name', 'first_name'], query)

Unnamed: 0,last_name,first_name
0,Agee,Steven
1,Ahrens,Don
2,Akin,Charles
3,Akin,Mike
4,Akin,Rebecca
5,Aldridge,Brittni
6,Allen,John D.
7,Allison,John W.
8,Allison,Rebecca
9,Altes,R.D.


# Step 5: Altering Tables

In [93]:
# add and fill in full_name column to candidates table
candidate_cols = [col[1] for col in cursor.execute("PRAGMA table_info(candidates)")] # regenerate columns with full_name
query = '''SELECT id, last_name, first_name FROM candidates''' # Select a few columns
full_name_and_id = [(attr[1] + ", " + attr[2], attr[0]) for attr in cursor.execute(query).fetchall()] # List of tuples: (full_name, id)

update = '''UPDATE candidates SET full_name = ? WHERE id = ?''' # Update the table
for rows in full_name_and_id:
    cursor.execute(update, rows)

query = '''SELECT * FROM candidates'''
display(viz_tables(candidate_cols, query))

# update full_name column to show eventual winner, loser
print('outcome')
update = '''UPDATE candidates SET full_name = "Eventual Winner" WHERE last_name = "Obama"'''
cursor.execute(update)
update = '''UPDATE candidates SET full_name = "Eventual Loser" WHERE last_name = "Romney"'''
cursor.execute(update)
display(viz_tables(candidate_cols, query))

# add and fill in full_name column to contributors table
print('Contributors')
contributor_cols = [col[1] for col in cursor.execute("PRAGMA table_info(contributors)")] # regenerate columns with full_name
query = '''SELECT id, last_name, first_name FROM contributors''' # Select a few columns
full_name_and_id = [(attr[1] + ", " + attr[2], attr[0]) for attr in cursor.execute(query).fetchall()] # List of tuples: (full_name, id)

update = '''UPDATE contributors SET full_name = ? WHERE id = NULL''' # Update the table
for rows in full_name_and_id:
    cursor.execute(update, rows)

query = '''SELECT * FROM contributors'''
display(viz_tables(contributor_cols, query))

# update full_name column to show contributors giving more than $1000
update = '''UPDATE contributors SET full_name = "Too Much" 
WHERE amount > 1000'''
cursor.execute(update)
display(viz_tables(contributor_cols, query))

Unnamed: 0,id,first_name,last_name,middle_init,party,full_name
0,16,Mike,Huckabee,,R,"Huckabee, Mike"
1,20,Barack,Obama,,D,"Obama, Barack"
2,22,Rudolph,Giuliani,,R,"Giuliani, Rudolph"
3,24,Mike,Gravel,,D,"Gravel, Mike"
4,26,John,Edwards,,D,"Edwards, John"
5,29,Bill,Richardson,,D,"Richardson, Bill"
6,30,Duncan,Hunter,,R,"Hunter, Duncan"
7,31,Dennis,Kucinich,,D,"Kucinich, Dennis"
8,32,Ron,Paul,,R,"Paul, Ron"
9,33,Joseph,Biden,,D,"Biden, Joseph"


outcome


Unnamed: 0,id,first_name,last_name,middle_init,party,full_name
0,16,Mike,Huckabee,,R,"Huckabee, Mike"
1,20,Barack,Obama,,D,Eventual Winner
2,22,Rudolph,Giuliani,,R,"Giuliani, Rudolph"
3,24,Mike,Gravel,,D,"Gravel, Mike"
4,26,John,Edwards,,D,"Edwards, John"
5,29,Bill,Richardson,,D,"Richardson, Bill"
6,30,Duncan,Hunter,,R,"Hunter, Duncan"
7,31,Dennis,Kucinich,,D,"Kucinich, Dennis"
8,32,Ron,Paul,,R,"Paul, Ron"
9,33,Joseph,Biden,,D,"Biden, Joseph"


Contributors


OperationalError: no such column: full_name

# Step 6: Aggregation

In [137]:
# number of contributions above $0
function = '''SELECT *, amount FROM contributors WHERE amount > 1000'''
viz_tables(contributor_cols, function)
print('Number of contributions above $1000:', viz_tables(contributor_cols, function).shape[0])

# average donation
function = '''SELECT *, AVG(amount) FROM contributors WHERE amount > 0'''
print('Averge donation above $0:', viz_tables(contributor_cols, function)['amount'][0])

# average contribution from each state

function = "SELECT state,SUM(amount) FROM contributors GROUP BY state"
viz_tables(contributor_cols, function)

Number of contributions above $1000: 12
Averge donation above $0: 1300.0


IndexError: tuple index out of range