In [2]:
"""Hello Analytics Reporting API V4."""

import argparse

from apiclient.discovery import build
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools

import pandas as pd

from config import gkey
import gmaps
gmaps.configure(api_key=gkey)
import requests
import json

SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
DISCOVERY_URI = ('https://analyticsreporting.googleapis.com/$discovery/rest')
CLIENT_SECRETS_PATH = 'client_secrets.json' # Path to client_secrets.json file.
VIEW_ID = '183991785'


def initialize_analyticsreporting():
  """Initializes the analyticsreporting service object.

  Returns:
    analytics an authorized analyticsreporting service object.
  """
  # Parse command-line arguments.
  parser = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      parents=[tools.argparser])
  flags = parser.parse_args([])

  # Set up a Flow object to be used if we need to authenticate.
  flow = client.flow_from_clientsecrets(
      CLIENT_SECRETS_PATH, scope=SCOPES,
      message=tools.message_if_missing(CLIENT_SECRETS_PATH))

  # Prepare credentials, and authorize HTTP object with them.
  # If the credentials don't exist or are invalid run through the native client
  # flow. The Storage object will ensure that if successful the good
  # credentials will get written back to a file.
  storage = file.Storage('analyticsreporting.dat')
  credentials = storage.get()
  if credentials is None or credentials.invalid:
    credentials = tools.run_flow(flow, storage, flags)
  http = credentials.authorize(http=httplib2.Http())

  # Build the service object.
  analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)

  return analytics

def get_report(analytics):
  # Use the Analytics Service Object to query the Analytics Reporting API V4.
  return analytics.reports().batchGet(
      body={
        'reportRequests': [
        {
          'viewId': VIEW_ID,
          'dateRanges': [{'startDate': '2019-04-01', 'endDate': 'today'}],
          'metrics': [{'expression': 'ga:sessions'}],
          'dimensions':[{'name': 'ga:latitude'},{'name': 'ga:longitude'}]  
        }]
      }
  ).execute()



def print_response(response):
    """Parses and prints the Analytics Reporting API V4 response"""


    for report in response.get('reports', []):
        columnHeader = report.get('columnHeader', {})
        dimensionHeaders = columnHeader.get('dimensions', [])
        metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
        rows = report.get('data', {}).get('rows', [])
        #     create empty list to later append dictionaries into. 
        df_list = []
        for row in rows:
            dimensions = row.get('dimensions', [])
            dateRangeValues = row.get('metrics', [])
    #         Create empty dictionary which will contain key,value pairs for relevant metrics
            mt_dict = dict({})
            for header, dimension in zip(dimensionHeaders, dimensions):
                print (header + ': ' + dimension)
    #             Create an entry in the dictionary with the Dimension as the Key (splitting for formatting) and the dimension value as the value
                mt_dict[header.split(":")[-1]] = dimension
            for i, values in enumerate(dateRangeValues):
                print ('Date range (' + str(i) + ')')
                for metricHeader, value in zip(metricHeaders, values.get('values')):
                    print (metricHeader.get('name') + ': ' + value)
    #                 Create an entry in the dictionary with the Metric as the key and the metric value as the value
                    mt_dict[metricHeader.get('name').split(":")[-1]] = value

    # Append dictionary for each iteration into list
            df_list.append(mt_dict)

    # Return a DataFrame object that is created from the above list
    return pd.DataFrame(df_list)



def main():

    analytics = initialize_analyticsreporting()
    response = get_report(analytics)
    df = print_response(response)
    return df

# Include below if using this in a .py File
# if __name__ == '__main__':
#   main()

In [3]:
df = main()

ga:latitude: -0.0263
ga:longitude: 109.3425
Date range (0)
ga:sessions: 1
ga:latitude: -0.9003
ga:longitude: 119.8780
Date range (0)
ga:sessions: 2
ga:latitude: -16.4090
ga:longitude: -71.5375
Date range (0)
ga:sessions: 1
ga:latitude: -22.9329
ga:longitude: -47.0738
Date range (0)
ga:sessions: 1
ga:latitude: -27.4698
ga:longitude: 153.0251
Date range (0)
ga:sessions: 1
ga:latitude: -3.3186
ga:longitude: 114.5944
Date range (0)
ga:sessions: 2
ga:latitude: -37.5622
ga:longitude: 143.8503
Date range (0)
ga:sessions: 1
ga:latitude: -37.8136
ga:longitude: 144.9631
Date range (0)
ga:sessions: 1
ga:latitude: -5.1477
ga:longitude: 119.4327
Date range (0)
ga:sessions: 4
ga:latitude: -6.2088
ga:longitude: 106.8456
Date range (0)
ga:sessions: 8
ga:latitude: -6.7155
ga:longitude: 146.9999
Date range (0)
ga:sessions: 1
ga:latitude: -6.9175
ga:longitude: 107.6191
Date range (0)
ga:sessions: 1
ga:latitude: -7.2575
ga:longitude: 112.7521
Date range (0)
ga:sessions: 6
ga:latitude: -7.5755
ga:longitude

ga:longitude: -73.7982
Date range (0)
ga:sessions: 5
ga:latitude: 41.0262
ga:longitude: -73.6282
Date range (0)
ga:sessions: 17
ga:latitude: 41.0335
ga:longitude: -74.6364
Date range (0)
ga:sessions: 5
ga:latitude: 41.0340
ga:longitude: -73.7629
Date range (0)
ga:sessions: 149
ga:latitude: 41.0391
ga:longitude: -73.8670
Date range (0)
ga:sessions: 10
ga:latitude: 41.0434
ga:longitude: -73.7974
Date range (0)
ga:sessions: 3
ga:latitude: 41.0465
ga:longitude: -73.9496
Date range (0)
ga:sessions: 2
ga:latitude: 41.0468
ga:longitude: -74.0229
Date range (0)
ga:sessions: 7
ga:latitude: 41.0534
ga:longitude: -73.5387
Date range (0)
ga:sessions: 32
ga:latitude: 41.0551
ga:longitude: -73.8201
Date range (0)
ga:sessions: 10
ga:latitude: 41.0573
ga:longitude: -74.1410
Date range (0)
ga:sessions: 3
ga:latitude: 41.0584
ga:longitude: -74.0985
Date range (0)
ga:sessions: 1
ga:latitude: 41.0590
ga:longitude: -74.0218
Date range (0)
ga:sessions: 1
ga:latitude: 41.0648
ga:longitude: -72.4262
Date rang

In [4]:
# Create new column for joined lat-lng
df['latlng'] = df[df.columns[0:2]].apply(lambda x: ','.join(x.dropna().astype(float).astype(str)),axis=1)
df["zip"]=""
df["Formatted Address"]=""
x=1

# Loop through DF rows to call API for Zip and address matching lat-lng

for index, row in df.iterrows():
    dfloc = row["latlng"]
    url = ('https://maps.googleapis.com/maps/api/geocode/json?'
        'latlng={0}&result_type=postal_code&key={1}').format(dfloc, gkey)
    response = requests.get(url).json()
    try:
        df.loc[index, "zip"] = response['results'][0]['address_components'][0]["short_name"]
        df.loc[index, "Formatted Address"] = response['results'][0]['formatted_address']
    except:
        print(f" {x} Coordinates not found")
        x += 1
        
df.head()


 1 Coordinates not found
 2 Coordinates not found
 3 Coordinates not found
 4 Coordinates not found
 5 Coordinates not found
 6 Coordinates not found
 7 Coordinates not found
 8 Coordinates not found
 9 Coordinates not found
 10 Coordinates not found
 11 Coordinates not found
 12 Coordinates not found
 13 Coordinates not found
 14 Coordinates not found
 15 Coordinates not found
 16 Coordinates not found
 17 Coordinates not found
 18 Coordinates not found
 19 Coordinates not found
 20 Coordinates not found
 21 Coordinates not found
 22 Coordinates not found
 23 Coordinates not found
 24 Coordinates not found
 25 Coordinates not found
 26 Coordinates not found
 27 Coordinates not found
 28 Coordinates not found
 29 Coordinates not found
 30 Coordinates not found
 31 Coordinates not found
 32 Coordinates not found
 33 Coordinates not found
 34 Coordinates not found
 35 Coordinates not found


Unnamed: 0,latitude,longitude,sessions,latlng,zip,Formatted Address
0,-0.0263,109.3425,1,"-0.0263,109.3425",78243,"Pontianak, West Kalimantan 78243, Indonesia"
1,-0.9003,119.878,2,"-0.9003,119.878",94111,"Palu City, Central Sulawesi 94111, Indonesia"
2,-16.409,-71.5375,1,"-16.409,-71.5375",4001,"04001, Peru"
3,-22.9329,-47.0738,1,"-22.9329,-47.0738",13061,"Campinas - State of São Paulo, Brazil"
4,-27.4698,153.0251,1,"-27.4698,153.0251",4000,"Petrie Terrace QLD 4000, Australia"


In [5]:
# Filter results to only include those in the USA
usa = df[df['Formatted Address'].str.contains("USA")]
usa_df = usa[["latitude", "longitude", "sessions", "zip", "Formatted Address"]]
usa_df

usa_df.reset_index(drop=True, inplace=True)
usa_df

Unnamed: 0,latitude,longitude,sessions,zip,Formatted Address
0,21.3069,-157.8583,1,96813,"Honolulu, HI 96813, USA"
1,25.6104,-80.4295,1,33177,"Miami, FL 33177, USA"
2,25.6660,-80.3578,1,33176,"Miami, FL 33176, USA"
3,25.7492,-80.2635,1,33134,"Miami, FL 33134, USA"
4,25.7617,-80.1918,6,33131,"Miami, FL 33131, USA"
5,25.8651,-80.3245,2,33016,"Hialeah, FL 33016, USA"
6,25.9565,-80.1392,1,33180,"Aventura, FL 33180, USA"
7,25.9861,-80.3036,1,33025,"Miramar, FL 33025, USA"
8,26.0112,-80.1495,10,33020,"Hollywood, FL 33020, USA"
9,26.0765,-80.2521,2,33328,"Fort Lauderdale, FL 33328, USA"
