# High level steps to process data Contributor data for enities that contribute to candidates in Minneapolis

1. Query big table data that contains contributor entity data to get Contributor Name , contributor address and other details  
2. Use the contributor name to make lookup to google maps api to get place id & place details. 
3. Combine contributor name along with the json results from place detail and load that into google cloud storage as Json. We will use that to create a table on top of this 

# Step1. Get all the required packages 

In [1]:
%%bash
pip install -q googlemaps
pip install -q prettyprint
pip install -q regex
#pip install -q pyarrow
#pip install -q fastparquet

In [2]:
# get google maps and api key packages 
import googlemaps
# Package for printing better printing 
import pprint 
# get packages for getting address search calls from RapidAPI 
import requests
import json
import time 
import regex as re
import string
# Packages for bigquery 
import google.datalab.bigquery as bq
import pandas as pd
# Packages for google cloud storage
import google.datalab.storage as storage
from google.datalab import Context

# Step2 Create functions that gives placeID and place details

In [3]:
# setup googlemap client
gmaps = googlemaps.Client(key='AIzaSyB8GwLmzNPD2jYkCnD8-hFZ8n2iZlDl9xE')

def getPlaceID(title):
  placeIdDict = gmaps.places(title)
  for place in placeIdDict["results"]:
    placeid=place['place_id']
    return(placeid)

In [4]:
# define fields that we want to be returned from place details 
my_fields = ['address_component','formatted_address','geometry','icon','photo','place_id','type','url','name','formatted_phone_number','price_level','rating','website','user_ratings_total','vicinity']

def getPlaceDtl(placeid):
  place_details = gmaps.place(place_id = placeid,fields = my_fields)
  return(place_details['result'])

# Step3 Create functions that parses the address component field to get various components of address

In [5]:
# This code contains functions that returns various part of geoCoding values

# Place details API from google returns address components that is a list which has dictionary within for various parts of the adddress
# Here are different functions that takes out part of address components 
def getStreetNum(address_components):
  for i in address_components:
    if i['types'] == ['street_number']:
      return(i['long_name'])   

def getRoute(address_components):
  for i in address_components:
    if i['types'] == ['route']:
      return(i['long_name'])   

def getNeighborhood(address_components):
  for i in address_components:
    if i['types'] == ['neighborhood', 'political']:
      return(i['long_name'])       
    
def getCity(address_components):
  for i in address_components:
    if i['types'] == ['locality', 'political']:
      return(i['long_name'])

def getCounty(address_components):
  for i in address_components:
    if i['types'] == ['administrative_area_level_2', 'political']:
      return(i['long_name'])

def getState(address_components):
  for i in address_components:
    if i['types'] == ['administrative_area_level_1', 'political']:
      return(i['long_name'])
    
def getCountry(address_components):
  for i in address_components:
    if i['types'] == ['country', 'political']:
      return(i['long_name'])
    
def getPostalCode(address_components):
  for i in address_components:
    if i['types'] == ['postal_code']:
      return(i['long_name'])

def getPostalCodeSuffix(address_components):
  for i in address_components:
    if i['types'] == [postal_code_suffix]:
      return(i['long_name'])

# Step3 Create function that loads the final json file to google cloud storage

In [6]:
#Step4 function to write a data into google cloud storage 

project = Context.default().project_id
bucket_name = 'birchoo-munihen-contributor-entity-dtls'
# bucket_folder = 'AptDtlGoogle/'

def loaddata(filename,obj):
  bucket_object = filename + '.json'
  bucket = storage.Bucket(bucket_name)
  bucketobj = bucket.object(bucket_object)
  bucketobj.write_stream(obj, 'json')


# Step4 Query the data from bigquery and create datafram that will call the required function to create the final json record

In [22]:
%%bq query -n muni_entity_dtl
SELECT 
AssociationName, AssociationFullAddr, DonationTotal, PreviousYrDonationMax, PreviousYrDonationMax_1, CountOfCandidate
FROM `campaignanalytics-182101.Munidata.MuniHenAssocMst` 
--limit 5

In [23]:
# Create a dataframe 
df = muni_entity_dtl.execute(output_options=bq.QueryOutput.dataframe()).result()
df.head(10)

Unnamed: 0,AssociationName,AssociationFullAddr,DonationTotal,PreviousYrDonationMax,PreviousYrDonationMax_1,CountOfCandidate
0,Minneapolis Downtown Council Political Action Co,,0.0,$700.00,,1
1,BEW Local 292,,200.0,,,1
2,Association,,250.0,,,1
3,Dominium Politcal Fund,2905 Northwest Blvd Ste 150 Plymouth MN,250.0,,,1
4,Lawrence Redmond,,250.0,,,1
5,North Central Council of Carpenters,,250.0,,,1
6,Police Officers Federation,,250.0,,,1
7,IBEW Local 2929,312 Central Ave SE Minneapolis MN,500.0,,,1
8,Lawrence Schwanke,,500.0,,,1
9,IBEW Local 292,"312 Central Ave SE, Ste 292 Minneapolis MN",650.0,$400.00,,2


In [19]:
for AssociationName in df.AssociationName:
  try:
    Associationplaceid = getPlaceID(AssociationName)
    Associationplacedtl = getPlaceDtl(Associationplaceid)
    pprint.pprint(Associationplacedtl)
    
  except:
    pass

{'address_components': [{'long_name': '260',
                         'short_name': '260',
                         'types': ['subpremise']},
                        {'long_name': '81',
                         'short_name': '81',
                         'types': ['street_number']},
                        {'long_name': 'South 9th Street',
                         'short_name': 'S 9th St',
                         'types': ['route']},
                        {'long_name': 'Central Minneapolis',
                         'short_name': 'Central Minneapolis',
                         'types': ['neighborhood', 'political']},
                        {'long_name': 'Minneapolis',
                         'short_name': 'Minneapolis',
                         'types': ['locality', 'political']},
                        {'long_name': 'Hennepin County',
                         'short_name': 'Hennepin County',
                         'types': ['administrative_area_level_2', 'political']},
      

In [24]:
# Loop through the table values to get additional details for a given contributor name 

ListAllRows = []

for contriname in df.AssociationName:
  try:
    contriplaceid = getPlaceID(contriname)
    contriplacedtl = getPlaceDtl(contriplaceid)
    formatted_phone_number = contriplacedtl['formatted_phone_number']
    formatted_full_address = contriplacedtl['formatted_address']
    website = contriplacedtl['website']
    name = contriplacedtl['name']
    lat = contriplacedtl['geometry']['location']['lat']
    lang = contriplacedtl['geometry']['location']['lng']
    housenum = getStreetNum(contriplacedtl['address_components'])
    route = getRoute(contriplacedtl['address_components'])
    address = housenum + ' ' + route
    neighborhood = getNeighborhood(contriplacedtl['address_components'])
    city = getCity(contriplacedtl['address_components'])
    county = getCounty(contriplacedtl['address_components'])
    state = getState(contriplacedtl['address_components'])
    country = getCountry(contriplacedtl['address_components'])
  except:
    pass
  contri_dict = {'ContributorName' : contriname, 'Name': name, 'Website': website, 'FormattedFullAddress':formatted_full_address, 'FormattedPhoneNumber': formatted_phone_number,
                 'Latitute': lat, 'Longitude': lang, 'HouseNum': housenum, 'Route': route, 'Address': address, 'Neighborhood': neighborhood, 'City': city, 'County': county,
                'State': state, 'Country': country}
  ListAllRows.append(contri_dict)

In [25]:
print(len(ListAllRows))

199


In [28]:
# Construct a BigQuery client object.
datasetname = 'Munidata' 
#tablename = 'MuniHenAssocDtlETL' 

ListAllRowsDF = pd.DataFrame(ListAllRows)
tableschema = bq.Schema.from_data(ListAllRowsDF)

table = bq.Table(
    '{}.MuniHenAssocDtlETL'.format(datasetname)).create(schema=tableschema)
table.insert(ListAllRowsDF)  # Starts steaming insert of data

Address,City,ContributorName,Country,County,FormattedFullAddress,FormattedPhoneNumber,HouseNum,Latitute,Longitude,Name,Neighborhood,Route,State,Website
225 South 6th Street,Minneapolis,Winthrop & Weinstine PA Politcal Fund,United States,Hennepin County,"225 South 6th St, Minneapolis, MN 55402, USA",(612) 604-6400,225,44.9762362,-93.2686052,"Winthrop & Weinstine, P.A.",Central Minneapolis,South 6th Street,Minnesota,http://www.winthrop.com/
10956 Audelia Road,Dallas,Fire Fighters Association,United States,Dallas County,"10956 Audelia Rd, Dallas, TX 75243, USA",(214) 941-1354,10956,32.8984102,-96.7176783,Dallas Fire Fighters Association,Lake Highlands,Audelia Road,Texas,http://www.dffa.org/
11720 East 21st Street,Tulsa,Laborers District Council Political Fund #40712,United States,Tulsa County,"11720 E 21st St D, Tulsa, OK 74129, USA",(918) 585-1799,11720,36.133023,-95.845735,Southwest Laborers' District,,East 21st Street,Oklahoma,http://www.swlaborers.org/contact-us
81 South 9th Street,Minneapolis,Minneapolis Downtown Council Poli,United States,Hennepin County,"81 S 9th St #260, Minneapolis, MN 55402, USA",(612) 338-3807,81,44.9744939,-93.273659,Minneapolis Downtown Council,Central Minneapolis,South 9th Street,Minnesota,http://www.mplsdowntown.com/
6700 West Broadway Avenue,Brooklyn Park,JBEW Local 292 Political Education Fund,United States,Hennepin County,"6700 W Broadway Ave, Brooklyn Park, MN 55428, USA",(612) 379-1292,6700,45.07725909999999,-93.3750179,IBEW Local Union 292,,West Broadway Avenue,Minnesota,http://www.ibew292.org/
645 Randolph Avenue,Saint Paul,St Paul Fire Fighters Local 21PAC,United States,Ramsey County,"645 Randolph Ave, St Paul, MN 55102, USA",(651) 224-7811,645,44.9269456,-93.1268674,St Paul Fire Department,West Seventh,Randolph Avenue,Minnesota,https://www.stpaul.gov/departments/fire-paramedics
16083 Southwest Upper Boones Ferry Road,Tigard,Multi Housing PAC,United States,Washington County,"16083 SW Upper Boones Ferry Rd #105, Tigard, OR 97224, USA",(503) 213-1281,16083,45.4039107,-122.7511532,Multifamily NW (formerly Metro Multifamily Housing Association),Metzger,Southwest Upper Boones Ferry Road,Oregon,http://www.multifamilynw.org/
1603 North State Highway CC,Nixa,United Transportation Union PAC,United States,Christian County,"1603 N State Hwy CC, Nixa, MO 65714, USA",(417) 725-5661,1603,37.0754471,-93.2936864,United Transportation Union,,North State Highway CC,Missouri,https://smart-union.org/td/
67 8th Avenue Northeast,Minneapolis,MFT Local 59,United States,Hennepin County,"67 8th Ave NE, Minneapolis, MN 55413, USA",(612) 529-9621,67,44.9948285,-93.2683032,Minneapolis Federation of Teachers and Education Support Professionals,St. Anthony West,8th Avenue Northeast,Minnesota,http://mft59.org/
801 Grand Avenue,Des Moines,Faegre Baker Daniels State-Registered Political,United States,Polk County,"801 Grand Ave #3100, Des Moines, IA 50309, USA",(515) 248-9000,801,41.58750000000001,-93.628533,Faegre Drinker Biddle & Reath LLP,Downtown Des Moines,Grand Avenue,Iowa,http://www.faegredrinker.com/
