# High level steps to process data Contributor data for enities that contribute to candidates in Minneapolis

1. Query big table data that contains contributor entity data to get Contributor Name , contributor address and other details  
2. Use the contributor name to make lookup to google maps api to get place id & place details. 
3. Combine contributor name along with the json results from place detail and load that into google cloud storage as Json. We will use that to create a table on top of this 

# Step1. Get all the required packages 

In [39]:
%%bash
pip install -q googlemaps
pip install -q prettyprint
pip install -q regex
#pip install -q pyarrow
#pip install -q fastparquet

In [40]:
# get google maps and api key packages 
import googlemaps
# Package for printing better printing 
import pprint 
# get packages for getting address search calls from RapidAPI 
import requests
import json
import time 
import regex as re
import string
# Packages for bigquery 
import google.datalab.bigquery as bq
import pandas as pd
# Packages for google cloud storage
import google.datalab.storage as storage
from google.datalab import Context

# Step2 Create functions that gives placeID and place details

In [41]:
# setup googlemap client
gmaps = googlemaps.Client(key='AIzaSyB8GwLmzNPD2jYkCnD8-hFZ8n2iZlDl9xE')

def getPlaceID(title):
  placeIdDict = gmaps.places(title)
  for place in placeIdDict["results"]:
    placeid=place['place_id']
    return(placeid)

In [42]:
# define fields that we want to be returned from place details 
my_fields = ['address_component','formatted_address','geometry','icon','photo','place_id','type','url','name','formatted_phone_number','price_level','rating','website','user_ratings_total','vicinity']

def getPlaceDtl(placeid):
  place_details = gmaps.place(place_id = placeid,fields = my_fields)
  return(place_details['result'])

# Step3 Create function that parses address component results from place details API to get various parts of address

In [43]:
# This code contains functions that returns various part of geoCoding values

# Place details API from google returns address components that is a list which has dictionary within for various parts of the adddress
# Here are different functions that takes out part of address components 
def getStreetNum(address_components):
  for i in address_components:
    if i['types'] == ['street_number']:
      return(i['long_name'])   

def getRoute(address_components):
  for i in address_components:
    if i['types'] == ['route']:
      return(i['long_name'])   

def getNeighborhood(address_components):
  for i in address_components:
    if i['types'] == ['neighborhood', 'political']:
      return(i['long_name'])       
    
def getCity(address_components):
  for i in address_components:
    if i['types'] == ['locality', 'political']:
      return(i['long_name'])

def getCounty(address_components):
  for i in address_components:
    if i['types'] == ['administrative_area_level_2', 'political']:
      return(i['long_name'])

def getState(address_components):
  for i in address_components:
    if i['types'] == ['administrative_area_level_1', 'political']:
      return(i['long_name'])
    
def getCountry(address_components):
  for i in address_components:
    if i['types'] == ['country', 'political']:
      return(i['long_name'])
    
def getPostalCode(address_components):
  for i in address_components:
    if i['types'] == ['postal_code']:
      return(i['long_name'])

def getPostalCodeSuffix(address_components):
  for i in address_components:
    if i['types'] == [postal_code_suffix]:
      return(i['long_name'])

# Step4 Query the data from bigquery and create datafram that will call the required function to create the final json record

In [44]:
%%bq query -n muni_smallbusiness_dtl
SELECT  trim(replace(replace(contributorName,",",""),".","")) FullName, trim(concat(contributorAddress," ",City, " " , State )) FullAddr, trim(contri.ContributorsEmployer) ContribEmployer, sum(contri.TotalFromSourceYeartoDate ) DonationPotentialAmt, count(distinct contri.CandidateName ) CandidatesSupportedCnt
FROM `campaignanalytics-182101.Munidata.MuniHenContriData` contri
join `campaignanalytics-182101.Munidata.MuniHenCandMst` cand on trim(contri.CandidateName) = trim(cand.Candidate_name)
where cand.Office = 'Council Member' 
and contri.City  = 'Minneapolis'
and ( ( upper(trim(contri.ContributorsEmployer)) <> 'RETIRED') AND (upper(trim(contri.ContributorsEmployer)) <>'N/A') AND contri.ContributorsEmployer is not null AND  trim(contri.ContributorsEmployer) not in ('Minneapolis Public Schools','State of Minnesota','City of Minneapolis'))
group by 1,2,3
order by 5 desc

In [45]:
# Create a dataframe 
df = muni_smallbusiness_dtl.execute(output_options=bq.QueryOutput.dataframe()).result()
df.head(10)

Unnamed: 0,FullName,FullAddr,ContribEmployer,DonationPotentialAmt,CandidatesSupportedCnt
0,Cramer Steve,4832 11th Ave S Minneapolis MN,Downtown Council,225.0,2
1,William Deef,"317 Groveland Ave, Unit 600 Minneapolis MN",Meet Minneapolis,150.0,2
2,Dorian Eder and Cyrus Kalbrener,1914 Taylor St NE Minneapolis MN,Land Stewardship Project,680.0,1
3,Duffenbach Christian,2319 Qunicy St NE Minneapolis MN,Physician's Diagnostics & Rehabilitation,112.48,1
4,Esades Vincent,3134 Benjamin St NE Minneapolis MN,Heins Mills & Olson PLC,235.87,1
5,Barry Clegg and Roberta Swanson,163 Island Ave E Minneapolis MN,"Gray Plant Mooty Mooty & Bennett, PA",450.0,1
6,Jim Harkness,4033 Queen Ave S #3 Minneapolis MN,Self-Employed Consultant,150.0,1
7,Sam and Sylvia Kaplan,510 River St Minneapolis MN,"Kaplan, Strangis & Kaplan / Retired",2300.0,1
8,Rognrud Troy,3510 Valley St NE Minneapolis MN,Do Your Thing Productions,190.0,1
9,Miller Daniel,1959 McKinley St NE Minneapolis MN,Science Museum of MN,150.0,1


In [50]:
# Loop through the table values to get additional details for a given contributor name 

ListAllRows = []

for ContribEmployer in df.ContribEmployer:
  try:
    contriplaceid = getPlaceID(ContribEmployer)
    contriplacedtl = getPlaceDtl(contriplaceid)
    formatted_phone_number = contriplacedtl['formatted_phone_number']
    formatted_full_address = contriplacedtl['formatted_address']
    website = contriplacedtl['website']
    name = contriplacedtl['name']
    lat = contriplacedtl['geometry']['location']['lat']
    lang = contriplacedtl['geometry']['location']['lng']
    housenum = getStreetNum(contriplacedtl['address_components'])
    route = getRoute(contriplacedtl['address_components'])
    address = housenum + ' ' + route
    neighborhood = getNeighborhood(contriplacedtl['address_components'])
    city = getCity(contriplacedtl['address_components'])
    county = getCounty(contriplacedtl['address_components'])
    state = getState(contriplacedtl['address_components'])
    country = getCountry(contriplacedtl['address_components'])
  except:
    pass
  contri_dict = {'EmployerName' : ContribEmployer, 'Name': name, 'Website': website, 'FormattedFullAddress':formatted_full_address, 'FormattedPhoneNumber': formatted_phone_number,
                 'Latitute': lat, 'Longitude': lang, 'HouseNumber': housenum, 'Route': route, 'Address': address, 'Neighborhood': neighborhood, 'City': city, 'County': county,
                'State': state, 'Country': country}
  ListAllRows.append(contri_dict)

In [51]:
print(len(ListAllRows))

337


In [52]:
# Construct a BigQuery client object.
datasetname = 'Munidata' 
tablename = 'MuniHenContriBusinessDtls' 

ListAllRowsDF = pd.DataFrame(ListAllRows)
tableschema = bq.Schema.from_data(ListAllRowsDF)

In [53]:
table = bq.Table('{}.MuniHenContriBusinessDtls'.format(datasetname))
table.insert(ListAllRowsDF)

EmployerName,Name,Website,FormattedFullAddress,FormattedPhoneNumber,Latitute,Longitude,HouseNumber,Route,Address,City,State,Country,Neighborhood,County
BluefieldConsulting LLC,"Bluefield Process Safety, LLC",https://bluefieldsafety.com/,"3101 Cherokee St #2851, St. Louis, MO 63118, USA",(314) 420-9350,38.5945794,-90.2347452,3101,Cherokee Street,3101 Cherokee Street,St. Louis,Missouri,United States,Benton Park West,
Bluefield Consulting LLC,"Bluefield Process Safety, LLC",https://bluefieldsafety.com/,"3101 Cherokee St #2851, St. Louis, MO 63118, USA",(314) 420-9350,38.5945794,-90.2347452,3101,Cherokee Street,3101 Cherokee Street,St. Louis,Missouri,United States,Benton Park West,
"self-employed, artist","Bluefield Process Safety, LLC",https://bluefieldsafety.com/,"3101 Cherokee St #2851, St. Louis, MO 63118, USA",(314) 420-9350,38.5945794,-90.2347452,3101,Cherokee Street,3101 Cherokee Street,St. Louis,Missouri,United States,Benton Park West,
Leidos,Leidos,https://www.leidos.com/,"2337 S Grand Blvd, St. Louis, MO 63104, USA",(314) 865-1769,38.6084848,-90.2414826,2337,South Grand Boulevard,2337 South Grand Boulevard,St. Louis,Missouri,United States,Shaw,
Thrivent,Thrivent Financial-Lutherans,https://www.thrivent.com/,"1 Skinner Ln, Ponca City, OK 74604, USA",(580) 765-5190,36.7126075,-96.9364359,1,Skinner Lane,1 Skinner Lane,Ponca City,Oklahoma,United States,,Kay County
Redmond Associates Inc.,Thrivent Financial-Lutherans,https://www.thrivent.com/,"8014 Redwood Ct, Fox Lake, IL 60020, USA",(847) 973-9273,36.7126075,-96.9364359,1,Skinner Lane,1 Skinner Lane,Ponca City,Oklahoma,United States,,Kay County
Wellington Management,Wellington Management Co L L P,https://www.wellington.com/en/,"222 W Adams St # 2100, Chicago, IL 60606, USA",(312) 726-3764,41.8795708,-87.63477979999999,222,West Adams Street,222 West Adams Street,Chicago,Illinois,United States,Chicago Loop,Cook County
Leamington Co.,Jade Sterling Steel,http://www.jadesterling.com/,"7201 S Leamington Ave, Bedford Park, IL 60638, USA",(708) 496-3975,41.7607192,-87.74988549999999,7201,South Leamington Avenue,7201 South Leamington Avenue,Bedford Park,Illinois,United States,,Cook County
614 Group,"Driggs Design Group, PA",http://www.driggsdesign.com/,"614 Merchant St, Emporia, KS 66801, USA",(785) 320-2136,38.40558540000001,-96.181291,614,Merchant Street,614 Merchant Street,Emporia,Kansas,United States,,Lyon County
Homemaker,Homemakers Furniture,https://www.homemakers.com/,"10215 Douglas Ave, Urbandale, IA 50322, USA",(515) 276-2772,41.630659,-93.759358,10215,Douglas Avenue,10215 Douglas Avenue,Urbandale,Iowa,United States,,Polk County
