# Google Analytics API requests

## Hieronder eerst diverse imports en variabelen aanmaken

Het eerste blok hieronder zorgt ervoor dat we de benodigde credentials hebben om een API request naar Analytics te mogen maken.

In [1]:
import argparse
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
from geheim import secrets
secrets = secrets()
SCOPES = secrets['SCOPES']
DISCOVERY_URI = secrets['DISCOVERY_URI']
KEY_FILE_LOCATION = secrets['KEY_FILE_LOCATION']
SERVICE_ACCOUNT_EMAIL = secrets['SERVICE_ACCOUNT_EMAIL']
VIEW_ID = '149216988'

## Twee belangrijke functies

Het eerste blok hieronder zorgt ervoor dat de response die we terugkrijgen van Analytics wordt omgezet in een Pandas DataFrame.
Het tweede blok zorgt ervoor dat we geen foutmelding krijgen als we in bulk requests naar de Analytics API gaan sturen. 
Deze functie zorgt ervoor dat als we een error krijgen voor teveel requests, we automatisch wat langzamer nog een keer proberen.

In [2]:
import pandas as pd

def response_to_df(response):
  list = []
  # get report data
  for report in response.get('reports', []):
    # set column headers
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    rows = report.get('data', {}).get('rows', [])
    
    for row in rows:
        # create dict for each row
        dict = {}
        dimensions = row.get('dimensions', [])
        dateRangeValues = row.get('metrics', [])

        # fill dict with dimension header (key) and dimension value (value)
        for header, dimension in zip(dimensionHeaders, dimensions):
          dict[header] = dimension

        # fill dict with metric header (key) and metric value (value)
        for i, values in enumerate(dateRangeValues):
          for metric, value in zip(metricHeaders, values.get('values')):
            #set int as int, float a float
            if ',' in value or '.' in value:
              dict[metric.get('name')] = float(value)
            else:
              dict[metric.get('name')] = int(value)

        list.append(dict)
    
    df = pd.DataFrame(list)
    return df

In [3]:
import random
import time
from apiclient.errors import HttpError

def makeRequestWithExponentialBackoff(analytics, request):
  """Wrapper to request Google Analytics data with exponential backoff.

  The makeRequest method accepts the analytics service object, makes API
  requests and returns the response. If any error occurs, the makeRequest
  method is retried using exponential backoff.

  Args:
    analytics: The analytics service object

  Returns:
    The API response from the makeRequest method.
  """
  for n in range(0, 5):
    try:
      return get_report(analytics, request)

    except error:
      if error.resp.reason in ['userRateLimitExceeded', 'quotaExceeded',
                               'internalServerError', 'backendError']:
        time.sleep((2 ** n) + random.random())
      else:
        break

  print ("There has been an error, the request never succeeded.")

## Functies die de daadwerkelijke call naar de Analytics API maken

Het blok hieronder doet het zware werk, namelijk:
- initialiseren, inloggen, e.d.
- de daadwerkelijke API call maken
- (de resultaten weergeven) (niet in gebruik)

In [4]:
def initialize_analyticsreporting():
  """Initializes an analyticsreporting service object.

  Returns:
    analytics an authorized analyticsreporting service object.
  """

  credentials = ServiceAccountCredentials.from_p12_keyfile(
    SERVICE_ACCOUNT_EMAIL, KEY_FILE_LOCATION, scopes=SCOPES)

  http = credentials.authorize(httplib2.Http())

  # Build the service object.
  analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)

  return analytics


def get_report(analytics, request):
  # Use the Analytics Service Object to query the Analytics Reporting API V4.
  return analytics.reports().batchGet(
        # Get sessions number from the last 7 days
#       body={
#         'reportRequests': [
#         {
#           'viewId': VIEW_ID,
#           'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
#           'metrics': [{'expression': 'ga:sessions'}]
#         }]
#       }
      # Get posts from last 7 days
      body=request
  ).execute()


def print_response(response):
  """Parses and prints the Analytics Reporting API V4 response"""

  for report in response.get('reports', []):
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    rows = report.get('data', {}).get('rows', [])

    for row in rows:
      dimensions = row.get('dimensions', [])
      dateRangeValues = row.get('metrics', [])

      for header, dimension in zip(dimensionHeaders, dimensions):
        print ( header + ': ' + dimension )

      for i, values in enumerate(dateRangeValues):
        print ('Date range (' + str(i) + ')' )
        for metricHeader, value in zip(metricHeaders, values.get('values')):
          print ( metricHeader.get('name') + ': ' + value )

## De request

Dit blok hieronder bevat de opdracht die we naar de Analytics API willen sturen en zet het hele proces in gang door bovenstaande functies aan te roepen
met de opdracht als input.

In [5]:
response_array = []

def main():

  analytics = initialize_analyticsreporting()

  daterange = pd.date_range('2018-12-01','2018-12-07')
  daterange = daterange.strftime('%Y-%m-%d')

  for date in daterange:
      startdate = str(date)
      enddate = str(date)
    
      request = {
        "reportRequests": [
          {
            "viewId": "149216988",
            "dateRanges": [
              {
                "startDate": startdate,
                "endDate": enddate
              }
            ],
            "samplingLevel": "LARGE",
            "dimensions": [
              {
                "name": "ga:date"
              },
              {
                "name": "ga:hour"
              },
              {
                  "name": "ga:eventCategory"
              },
              {
                  "name": "ga:eventLabel"
              }
            ],
            "dimensionFilterClauses": [
                {
                    "filters": 
                    [
                        {
                            "dimensionName": "ga:eventCategory",
                            "operator": "EXACT",
                            "expressions": ["contact"]
                        }
                    ]
                }
            ],
            "metrics": [
              {
                "expression": "ga:totalEvents",
                "alias": "analytics.totalEvents"
              },
              {
                "expression": "ga:uniqueEvents",
                "alias": "analytics.uniqueEventsTrue"
              }
            ],
            "metricFilterClauses": [],
            "orderBys": [
              {
                "fieldName": "ga:totalEvents",
                "orderType": "ORDER_TYPE_UNSPECIFIED",
                "sortOrder": "ASCENDING"
              }
            ],
            "segments": [],
            "pivots": [],
            "pageToken": "{{FILL-YOUR-OWN}}",
            "pageSize": 25,
            "includeEmptyRows": True,
            "hideTotals": False,
            "hideValueRanges": False
          }
        ]
      }
    
      response = makeRequestWithExponentialBackoff(analytics, request)
      response = response_to_df(response)
      response_array.append(response)
      print('Call uitgevoerd: ' + str(date))

if __name__ == '__main__':
  main()

Call uitgevoerd: 2018-12-01
Call uitgevoerd: 2018-12-02
Call uitgevoerd: 2018-12-03
Call uitgevoerd: 2018-12-04
Call uitgevoerd: 2018-12-05
Call uitgevoerd: 2018-12-06
Call uitgevoerd: 2018-12-07


## Verdere verwerking

Als alle bovenstaande cellen zijn uitgevoerd hebben we nu een variabele 'response_array' beschikbaar met de ontvangen data als Pandas DataFrama.
Hieronder kunnen we deze data verder gaan verwerken.

Eerst plakken we alle DataFrames aan elkaar, zodat het één grote DataFrame wordt.

In [6]:
df = pd.concat(response_array, ignore_index=True)
df.head()

Unnamed: 0,analytics.totalEvents,analytics.uniqueEventsTrue,ga:date,ga:eventCategory,ga:eventLabel,ga:hour
0,1,1,20181201,contact,contactsensor1,0
1,2,2,20181201,contact,contactsensor2,0
2,2,2,20181201,contact,contactsensor1,9
3,2,2,20181201,contact,contactsensor3,11
4,2,2,20181201,contact,contactsensor3,21
