In [1]:
import argparse
from apiclient.discovery import build
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
import pandas as pd
import numpy as np
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
DISCOVERY_URI = 'https://analyticsreporting.googleapis.com/$discovery/rest'
CLIENT_SECRETS_PATH = 'data/client_secret.json'  # Path to client_secrets.json file.
VIEW_ID = 'XXXXXXXX' # Replace 'XXXXXXX' with your View ID

In [2]:
def authenticate_ga_api():
  """Initializes the analyticsreporting service object.

  Returns:
    analytics an authorized analyticsreporting service object.
  """
  # Parse command-line arguments.
  parser = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      parents=[tools.argparser])
  flags = parser.parse_args([])

  # Set up a Flow object to be used if we need to authenticate.
  flow = client.flow_from_clientsecrets(
      CLIENT_SECRETS_PATH, scope=SCOPES,
      message=tools.message_if_missing(CLIENT_SECRETS_PATH))

  '''Prepare credentials, and authorize HTTP object with them.
  If the credentials don't exist or are invalid run through the native client
  flow. The Storage object will ensure that if successful the good
  credentials will get written back to a file. '''
  storage = file.Storage('analyticsreporting.dat')
  credentials = storage.get()
  if credentials is None or credentials.invalid:
    credentials = tools.run_flow(flow, storage, flags)
  http = credentials.authorize(http=httplib2.Http())

  # Build the service object.
  analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)

  return analytics


In [3]:
ga_auth = authenticate_ga_api()

# 1. Dimensions and Metrics

In [4]:
DIMS = ['ga:pagePath']
METRICS = ['ga:pageviews', 'ga:uniquePageviews', 'ga:avgTimeOnPage']

In [5]:
requests_list =  [{
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': '2019-10-01', 'endDate': '2019-11-01'}],
        'dimensions': [{'name': name} for name in DIMS],
        'metrics': [{'expression': exp} for exp in METRICS]
      }]

In [6]:
data = ga_auth.reports().batchGet(body={'reportRequests':requests_list }).execute()

In [8]:
data_dic = {f"{i}": [] for i in DIMS + METRICS}

In [9]:
data_dic

{'ga:pagePath': [],
 'ga:pageviews': [],
 'ga:uniquePageviews': [],
 'ga:avgTimeOnPage': []}

In [10]:
def data_process(data):   
    data_dic = {f"{i}": [] for i in DIMS + METRICS}
    for report in data.get('reports', []):
        rows = report.get('data', {}).get('rows', [])
        for row in rows:
            for i, key in enumerate(DIMS):
                data_dic[key].append(row.get('dimensions', [])[i])
            dateRangeValues = row.get('metrics', [])
            for values in dateRangeValues:
                all_values = values.get('values', [])
                for i, key in enumerate(METRICS):
                    data_dic[key].append(all_values[i])
    return data_dic

In [11]:
        
df = pd.DataFrame(data=data_process(data))
df.columns = [col.split(':')[-1] for col in df.columns]

In [None]:
df.head()

# 2. Multiple Date Ranges

In [13]:
DIMS = ['ga:pagePath']
METRICS = ['ga:pageviews', 'ga:uniquePageviews', 'ga:avgTimeOnPage']

In [14]:
requests_list = [
          {
            'viewId': VIEW_ID,
            'dateRanges': [
                {'startDate': '2019-08-01', 'endDate': '2019-09-01'},
                {'startDate': '2019-09-01', 'endDate': '2019-10-01'}],
            'dimensions': [{'name': name} for name in DIMS],
            'metrics': [{'expression': exp} for exp in METRICS]
          }]

In [15]:
data = ga_auth.reports().batchGet(body={'reportRequests':requests_list }).execute()

In [16]:
def metric_value_reshape(value_list, n):
    """
    param value_list: list of metric values
    param n: number of date ranges
    return: a list 
    """
    new_list = []
    a = []
    j = 0
    for i in range(len(value_list)//n):
        a = value_list[j:j+n]
        new_list.append(a)
        j +=n
    return new_list

In [17]:
data_dic = data_process(data)
for key in METRICS:
    data_dic[key] = metric_value_reshape(data_dic[key], 2)
df = pd.DataFrame(data=data_dic)
df.columns = [col.split(':')[-1] for col in df.columns]

In [None]:
df.head()

# 3. Multiple Dimentions

In [19]:
DIMS = ['ga:userAgeBracket', 'ga:browser']
METRICS = ['ga:users',]

In [20]:
requests_list = [
          {
            'viewId': VIEW_ID,
            'dateRanges': [
                {'startDate': '2019-01-01', 'endDate': '2019-02-01'}],
             'dimensions': [{'name': name} for name in DIMS],
             'metrics': [{'expression': exp} for exp in METRICS]
                }]

In [21]:
data = ga_auth.reports().batchGet(body={'reportRequests':requests_list }).execute()

In [22]:
data_dic = data_process(data)
        
df = pd.DataFrame(data=data_dic)
df.columns = [col.split(':')[-1] for col in df.columns]

In [23]:
df.head()

Unnamed: 0,userAgeBracket,browser,users
0,18-24,Chrome,147
1,18-24,Edge,13
2,18-24,Internet Explorer,18
3,18-24,Safari,18
4,25-34,Android Webview,31


# 4. Histogram Buckets

In [24]:
DIMS = ['ga:sessionCount']
METRICS = ['ga:users',]

In [25]:
requests_list = [
          {
            'viewId': VIEW_ID,
            'dateRanges': [
                {'startDate': '2019-08-01', 'endDate': '2019-09-01'}],
            'dimensions': [
                          {'name': name,
                          "histogramBuckets":["1","10","50","100","200"]
                          } for name in DIMS],
            'metrics': [{'expression': exp} for exp in METRICS],
            "orderBys":[
                {
                  "fieldName":"ga:sessionCount",
                  "orderType":"HISTOGRAM_BUCKET"
                }],
          }]

In [None]:
requests_list

In [27]:
data = ga_auth.reports().batchGet(body={'reportRequests':requests_list }).execute()

In [28]:
data_dic = data_process(data)

In [29]:
df = pd.DataFrame(data=data_dic)
df.columns = [col.split(':')[-1] for col in df.columns]

In [30]:
df.head()

Unnamed: 0,sessionCount,users
0,1-9,20341
1,10-49,6429
2,50-99,3635
3,100-199,4769
4,200+,31560


# 5. Pivots

In [31]:
DIMS = ['ga:userAgeBracket']
METRICS = ['ga:sessions', 'pageviews']

In [32]:
requests_list = [
  {
    "viewId":VIEW_ID,
    "dateRanges":[
    {
      "endDate":"2019-11-09",
      "startDate":"2019-11-03"
    }],
    "dimensions":[
    {
      "name":"ga:browser"
    },
    {
      "name":"ga:date"
    }],
    "metrics":[
    {
      "alias":"sessions",
      "expression":"ga:sessions"
    }],
    "pivots":[
    {
      "dimensions":[
      {
        "name":"ga:userAgeBracket"
      }],
      "startGroup":"0",
      "maxGroupCount":"6",
      "metrics":[
      {
        "alias":"sessions",
        "expression":"ga:sessions"
      },
      {
        "alias":"pageviews",
        "expression":"ga:pageviews"
      }]
    }]
  }]

In [33]:
data = ga_auth.reports().batchGet(body={'reportRequests':requests_list }).execute()

In [None]:
data