In [1]:
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
import os
from pandas.io.json import json_normalize

class Reporting():
    def __init__(self, gte, lte):
            self.SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
            self.DISCOVERY_URI = ('https://analyticsreporting.googleapis.com/$discovery/rest')
            self.KEY_FILE_LOCATION = 'YOUR KEY_FILE_LOCATION'
            self.SERVICE_ACCOUNT_EMAIL = 'YOUR SERVICE_ACCOUNT_EMAIL'
            self.GA_WEB_VIEW_ID = "YOUR GA_WEB_VIEW_ID"
            self.gte = gte
            self.lte = lte
            self.analytics = self.initialize_analyticsreporting()

    def initialize_analyticsreporting(self):
      """
      Initializes an Analytics Reporting API V4 service object.

      Returns:
        An authorized Analytics Reporting API V4 service object.
      """
      credentials = ServiceAccountCredentials.from_json_keyfile_name(self.KEY_FILE_LOCATION, self.SCOPES)

      # Build the service object.
      analytics = build('analyticsreporting', 'v4', credentials=credentials)
      return analytics
    
    
    def get_report(self, analytics):
      """
      Queries the Analytics Reporting API V4.

      Args:
        analytics: An authorized Analytics Reporting API V4 service object.
      Returns:
        The Analytics Reporting API V4 response.
      """
      return analytics.reports().batchGet(
          body={
              'reportRequests' : [
                  {
                      'viewId' : self.GA_WEB_VIEW_ID,
                      'dateRanges' : [{'startDate': self.gte, 'endDate': self.lte}],
                      'dimensions' : [{'name' : "ga:date"}],
                      "metrics" : [{"expression" : "ga:sessions"}, 
                                   {"expression" : "ga:transactions"}, 
                                   {"expression" : "ga:transactionsPerSession"}],
                      "pageSize" : 10000,
                  }]
          }
      ).execute()


def ga2df(gte, lte):
    data_list =[]
    r = Reporting(gte, lte)
    analytics = r.analytics
    response = r.get_report(analytics)
    results = response['reports'][0]['data']['rows']
    for i in range(len(results)):
        data = {}
        result = results[i]
        data['date'] = result['dimensions'][0]
        data['session'] = result['metrics'][0]['values'][0]
        data['transactions'] = result['metrics'][0]['values'][1]
        data['conversion'] = result['metrics'][0]['values'][2]
        data_list.append(data)
    
    df = pd.DataFrame()
    for i in range(len(data_list)):
        data = data_list[i]
        data = json_normalize(data)
        df  = df.append(data, ignore_index=True)
    df = df[['date', 'session', 'transactions','conversion']]
    df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    df['session'] = df['session'].astype(int)
    df['transactions'] = df['transactions'].astype(int)
    df['conversion'] = np.round(df['conversion'].astype(float), 2)
    
    return df

In [2]:
df = ga2df('2017-10-01', '2017-10-31')

In [3]:
df.sample(3)

Unnamed: 0,date,session,transactions,conversion
26,2017-10-27,3207,97,3.02
6,2017-10-07,2389,73,3.06
29,2017-10-30,3515,115,3.27
