# Работаем с GA API

Все стандартно по докам, но нужно будет удалить установленный oauth2client и поставить oauth2client 3 версии. Иначе, пока не будет работать 4 версия API.

QuickStart по гайду здесь https://developers.google.com/analytics/devguides/reporting/core/v4/quickstart/service-py

### Сокращенная рабочая версия установки следом: 

Для установки выполните 

```sudo pip install --upgrade google-api-python-client``` 

Затем 

```sudo pip uninstall oauth2client``` 


Затем установите oauth2client 3 версии, можно скачать по ссылке https://github.com/google/oauth2client/archive/v3.0.0.zip

Чтобы его установить перейдите в директорию распакованного архива с oauth2client и выполните

```sudo python setup.py install ```

Далее можно приступать к работе с проектом.


### Заводим проект для работы с api google analytics
1. Завести проект для работы с api google analytics вот по этой ссылке https://console.developers.google.com/start/api?id=analyticsreporting.googleapis.com&credential=client_key

2. Добавляем в проекте сервисный аккаунт, при его заведении выберите обязательно тип ключа p12. Он сохранится на комп и мы его поместим в папку с этим ноутбуком.

3. У сервисного аккаунта сгенерировался email. Скопируйте его и дайте ему доступ на чтение в нужном аккаунте GA. Ну и пропишите чуть позже в самом коде.


### Код проекта. 
Вносим изменения только в 
KEY_FILE_LOCATION

SERVICE_ACCOUNT_EMAIL 

VIEW_ID - для своего счетчика GA можете найти по ссылке https://ga-dev-tools.appspot.com/account-explorer/

In [1]:
"""Hello Analytics Reporting API V4."""

import argparse

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools


SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
DISCOVERY_URI = ('https://analyticsreporting.googleapis.com/$discovery/rest')
KEY_FILE_LOCATION = 'My Project-e0d133087182.p12'
SERVICE_ACCOUNT_EMAIL = 'ffsetnew@inner-radius-153716.iam.gserviceaccount.com'
VIEW_ID = '39674352'

def initialize_analyticsreporting():
  """Initializes an analyticsreporting service object.

  Returns:
    analytics an authorized analyticsreporting service object.
  """

  credentials = ServiceAccountCredentials.from_p12_keyfile(
    SERVICE_ACCOUNT_EMAIL, KEY_FILE_LOCATION, scopes=SCOPES)

  http = credentials.authorize(httplib2.Http())

  # Build the service object.
  analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)
  print type(analytics)


  return analytics





### Формируем отчет
Для примера взял отчет за срок с определенной даты до сегодня, который выводит url страниц и просмотры по ним.

In [2]:
def get_report(analytics):
  # Use the Analytics Service Object to query the Analytics Reporting API V4.
  return analytics.reports().batchGet(
      body={
        'reportRequests': [
        {
          'viewId': VIEW_ID,
          'dateRanges': [{'startDate': '2016-11-24', 'endDate': 'today'}],
          'metrics': [{'expression': 'ga:pageviews'}],
            "dimensions":\
                    
      [
        {"name": "ga:pagepath"}
      ]        
        }]
      }
  ).execute()

### Функция вывода отчета 

In [3]:

def print_response(response):
  """Parses and prints the Analytics Reporting API V4 response"""

  for report in response.get('reports', []):
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    rows = report.get('data', {}).get('rows', [])

    for row in rows:
      dimensions = row.get('dimensions', [])
      dateRangeValues = row.get('metrics', [])

      for header, dimension in zip(dimensionHeaders, dimensions):
        print header + ': ' + dimension
        

      for i, values in enumerate(dateRangeValues):
        print 'Date range (' + str(i) + ')'
        for metricHeader, value in zip(metricHeaders, values.get('values')):
          print metricHeader.get('name') + ': ' + value

#### Выполним запрос и выведем полученные данные

In [4]:
def main():

  analytics = initialize_analyticsreporting()
  response = get_report(analytics)
  print_response(response)

if __name__ == '__main__':
  main()

IOError: [Errno 2] No such file or directory: 'My Project-e0d133087182.p12'

### Модифицируем функцию вывода, чтобы записать все в DataFrame Pandas


In [5]:
import pandas as pd
dff = pd.DataFrame()

def print_response_df(response, dff):

  df = dff

  """Parses and prints the Analytics Reporting API V4 response"""

  for report in response.get('reports', []):
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    rows = report.get('data', {}).get('rows', [])


    for row in rows:
      dimensions = row.get('dimensions', [])

      dateRangeValues = row.get('metrics', [])
      newline_df = {'metrics':int(dateRangeValues[0]['values'][0]), 'dimensions':dimensions[0] }
      df = df.append(newline_df, ignore_index=True) 

  return df


In [6]:
analytics = initialize_analyticsreporting()
response = get_report(analytics)
data = print_response_df(response, dff)

<class 'googleapiclient.discovery.Resource'>


### Примеры запросов 
По ссылке приведены распространенные примеры обращений к api Google Analytics
https://developers.google.com/analytics/devguides/reporting/core/v4/samples?hl=ru

Добавим в наш запрос фильтрацию, допустим у нас есть список определенных страниц и мы хотим вывести данные по этим страницам.
Если данных много, то описанный выше процесс не подойдет, т.к. он выводит лишь 1000 значений и в этом случае нам понадобится указывать в фильтре что мы хотим вывести.

Перечень операций приведен в документации https://developers.google.com/analytics/devguides/reporting/core/v3/reference?hl=ru#filters

Ниже в примере мы будем искать вхождение подстроки URL в ga:pagepath, который задается как фильтр Dimension. 

In [6]:
def get_report(analytics):
  # Use the Analytics Service Object to query the Analytics Reporting API V4.
  return analytics.reports().batchGet(
      body={
        'reportRequests': [
        {
          'viewId': VIEW_ID,
          'dateRanges': [{'startDate': '2017-02-01', 'endDate': 'today'}],
          'metrics': [{'expression': 'ga:pageviews'}],
          'dimensions':[{'name': 'ga:pagepath'}],
          'dimensionFilterClauses': [{
              'filters': [{
                  'dimensionName': 'ga:pagepath',
                  'operator': 'PARTIAL',
                  'expressions': ['zqsLD22FD']
                        }]
                                    }]          
                           
            }]
          }
   ).execute()



# https://www.googleapis.com/analytics/v3/data/ga
# ?ids=ga:12134
# &dimensions=ga:browser
# &metrics=ga:pageviews
# &filters=ga:browser%3D~%5EFirefox
# &start-date=2007-01-01
# &end-date=2007-12-31


#   "dimensionFilter":
#                     {
#                       "dimensionName":"ga:browser",
#                       "expressions":["Safari"],
#                       "operator":"EXACT"
#                     }

In [7]:
analytics = initialize_analyticsreporting()
response = get_report(analytics)
data = print_response_df(response, dff)

<class 'googleapiclient.discovery.Resource'>


In [8]:
data

In [9]:
sum(data.metrics)

AttributeError: 'DataFrame' object has no attribute 'metrics'

In [1]:
urlspart = [
    'yHTW84703',
'o77934',
'yFpe42630',
'yFpG72CAf',
'yESw3A9Ed',
'yEw393a6F',
'yCkKff575',
'yBw397284',
'yB0A404CC',
'yAT084619',
'yIhd1c334',
'yJTlcc883',
'yJN446f9f',
'yO9PBce6B',
'yQhud046f',
'yRa2A5580',
'ySib11Fa3',
'ySsx4e6cD',
'yUija6117',
'yUpU933bb',
'yY2sAf9db',
'yZbo02814',
'yZCe56824',
'yZAy1bb70',
'yZPFFa1Ba',
'za1w4866d',
'zasS9A3b4',
'zbrqA6024',
'zbUw48E5C',
'zch7Dc914',
'zcB639A32',
'zc,Kd52b8',
'zcMX87705',
'zd0q34768',
'zduKf05fb',
'zefAC2f5f',
'ze8YCC0AC',
'zflX271c8',
'zfa8685fA',
'zfzv93e1c',
'zf8C5bC01',
'zfRYbfA27',
'zg2nB2Ba2',
'zh5X39439',
'zh6l7FFF0',
'zioT470F1',
'ziak957bd',
'zi5L44c54',
'ziwF27B16',
'ziN869AEE',
'zj,9D7eaD',
'zjW5c37Ba',
'zkAaA8ddf',
'zkThaBDc2',
'zkOPD3FD4',
'zltrD622e',
'zlUW9E353',
'zmH4f3bd5',
'zno4b39CC',
'zn8EdE0b0',
'zn-U36913',
'zonkCEEd3',
'zoHhe8c4a',
'zoLM9EdbC',
'zp,q739E3',
'zqas35E7b',
'zqsLD22FD',
'zqAdF55DF',
'zqMQ750f3',
'zr5,a4c11',
'zrSIE4944',
'zso44d01E',
'zsPQC55A9',
'zt9Od61bA',
'zw2n87D1F',
'zu1ZBcecB',
'zu1wA28E4',
'zwEp79155',
'zyCO8A1f8',
'zw2n87D1F',
'zxHN471c8',
'zxMf66a59',
'z3W.0B4c5',
'z4ebFa00F',
'z6mwE71A4',
'z9gV866FD',
'z-bY6d813',
'z-8j196cF',
'z.d.D9a28',
'z.e77De3F',
'z.Bxe0DaF',
'zABJ542F7',
'zB6W3C3bE',
'zElz83805',
'zEl4bAAb4',
'zGGed9b4E',
'zH9V2B3ac',
'zKzde33e2',
'zLhB450a8',
'zS4M05bEE',
'zWRS51485',
'zY7Q3A2E0',
'zZL9c7aFB',
'0at87C404',
'0doM5797f',
'0epi55f67',
'0eAk0C7A3',
'0flC45Cb0',
'0ivxF6D84',
'0jj74cD3a',
'0kljD87FB',
'0k5af05b0',
'0kOG040f5',
'0mlvD2c73',
'0mEO25b9d',
'0oay55697',
'0oH7ac7De',
'0pnk3582b',
'0pG-31459',
'0p9b4a51B',
'0qjn21c4B',
'0qx0d9d01',
'0qAh3FcDF',
'0rgI91AfC',
'0rACbC17d',
'0s1X7F248',
'0tKjaF855',
'0vr329FD0',
'0vLMb7686',
'0wbEE1097',
'0w2Y8fC13',
'0wK161e23'
    
]

In [11]:
import pandas as pd
"""Hello Analytics Reporting API V4."""

import argparse

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools


SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
DISCOVERY_URI = ('https://analyticsreporting.googleapis.com/$discovery/rest')
KEY_FILE_LOCATION = 'My Project-e0d133087182.p12'
SERVICE_ACCOUNT_EMAIL = 'ffsetnew@inner-radius-153716.iam.gserviceaccount.com'
VIEW_ID = '39674352'

def initialize_analyticsreporting():
  """Initializes an analyticsreporting service object.

  Returns:
    analytics an authorized analyticsreporting service object.
  """

  credentials = ServiceAccountCredentials.from_p12_keyfile(
    SERVICE_ACCOUNT_EMAIL, KEY_FILE_LOCATION, scopes=SCOPES)

  http = credentials.authorize(httplib2.Http())

  # Build the service object.
  analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)
  print type(analytics)


  return analytics




dff = pd.DataFrame()

def print_response_df(response):

  df = dff

  """Parses and prints the Analytics Reporting API V4 response"""

  for report in response.get('reports', []):
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    rows = report.get('data', {}).get('rows', [])


    for row in rows:
      dimensions = row.get('dimensions', [])

      dateRangeValues = row.get('metrics', [])
      newline_df = {'metrics':int(dateRangeValues[0]['values'][0]), 'dimensions':dimensions[0] }
      df = df.append(newline_df, ignore_index=True)      
  return df


def get_report(analytics, expression):
  # Use the Analytics Service Object to query the Analytics Reporting API V4.
  return analytics.reports().batchGet(
      body={
        'reportRequests': [
        {
          'viewId': VIEW_ID,
          'dateRanges': [{'startDate': '2016-11-24', 'endDate': 'today'}],
          'metrics': [{'expression': 'ga:pageviews'}],
          'dimensions':[{'name': 'ga:pagepath'}],
          'dimensionFilterClauses': [{
              'filters': [{
                  'dimensionName': 'ga:pagepath',
                  'operator': 'PARTIAL',
                  'expressions': [expression]
                        }]
                                    }]          
                           
            }]
          }
   ).execute()


p = pd.DataFrame()
analytics = initialize_analyticsreporting()
for expression in urlspart:
    response = get_report(analytics, expression)
    data = print_response_df(response)
    print p
    
    if data.size == 0:
        newline = {'urlpart':expression, 'value':0}
    else:    
        value = sum(data.metrics)
        newline = {'urlpart':expression, 'value':value}
    p = p.append(newline, ignore_index=True)
    
    

<class 'googleapiclient.discovery.Resource'>
Empty DataFrame
Columns: []
Index: []
     urlpart  value
0  yIkwd7081  168.0
     urlpart  value
0  yIkwd7081  168.0
1  yISgd15Ab    8.0
     urlpart  value
0  yIkwd7081  168.0
1  yISgd15Ab    8.0
2  yPo-ffAf6   16.0
     urlpart  value
0  yIkwd7081  168.0
1  yISgd15Ab    8.0
2  yPo-ffAf6   16.0
3  yR7a0fE3d    4.0
     urlpart  value
0  yIkwd7081  168.0
1  yISgd15Ab    8.0
2  yPo-ffAf6   16.0
3  yR7a0fE3d    4.0
4  yZlg751Ad   15.0
     urlpart  value
0  yIkwd7081  168.0
1  yISgd15Ab    8.0
2  yPo-ffAf6   16.0
3  yR7a0fE3d    4.0
4  yZlg751Ad   15.0
5  yZ4J97726    0.0
     urlpart  value
0  yIkwd7081  168.0
1  yISgd15Ab    8.0
2  yPo-ffAf6   16.0
3  yR7a0fE3d    4.0
4  yZlg751Ad   15.0
5  yZ4J97726    0.0
6  yZYS24CCd    4.0
     urlpart  value
0  yIkwd7081  168.0
1  yISgd15Ab    8.0
2  yPo-ffAf6   16.0
3  yR7a0fE3d    4.0
4  yZlg751Ad   15.0
5  yZ4J97726    0.0
6  yZYS24CCd    4.0
7  zavBcF378    1.0
     urlpart  value
0  yIkwd7081  168

In [12]:
p.size

118

In [6]:
p.to_csv('hightech.csv', sep=',', header=True, index=False)

In [2]:
urlspart2 = [
    '0L3k24515',
'0G3EbA4CC',
'0Ajl14F33',
'0ZAP8B33e',
'0xs2b4EfE',
'1e38ACb9f',
'X97e4e',
'02..4c27D'
]


In [7]:
import pandas as pd
"""Hello Analytics Reporting API V4."""

import argparse

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools


SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
DISCOVERY_URI = ('https://analyticsreporting.googleapis.com/$discovery/rest')
KEY_FILE_LOCATION = 'My Project-e0d133087182.p12'
SERVICE_ACCOUNT_EMAIL = 'ffsetnew@inner-radius-153716.iam.gserviceaccount.com'
VIEW_ID = '39674352'

def initialize_analyticsreporting():
  """Initializes an analyticsreporting service object.

  Returns:
    analytics an authorized analyticsreporting service object.
  """

  credentials = ServiceAccountCredentials.from_p12_keyfile(
    SERVICE_ACCOUNT_EMAIL, KEY_FILE_LOCATION, scopes=SCOPES)

  http = credentials.authorize(httplib2.Http())

  # Build the service object.
  analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)
  print type(analytics)


  return analytics




dff = pd.DataFrame()

def print_response_df(response):

  df = dff

  """Parses and prints the Analytics Reporting API V4 response"""

  for report in response.get('reports', []):
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    rows = report.get('data', {}).get('rows', [])


    for row in rows:
      dimensions = row.get('dimensions', [])

      dateRangeValues = row.get('metrics', [])
      newline_df = {'metrics':int(dateRangeValues[0]['values'][0]), 'dimensions':dimensions[0] }
      df = df.append(newline_df, ignore_index=True)      
  return df


def get_report(analytics, expression):
  # Use the Analytics Service Object to query the Analytics Reporting API V4.
  return analytics.reports().batchGet(
      body={
        'reportRequests': [
        {
          'viewId': VIEW_ID,
          'dateRanges': [{'startDate': '2017-04-13', 'endDate': '2017-04-25'}],
          'metrics': [{'expression': 'ga:pageviews'}],
          'dimensions':[{'name': 'ga:pagepath'}],
          'dimensionFilterClauses': [{
              'filters': [{
                  'dimensionName': 'ga:pagepath',
                  'operator': 'PARTIAL',
                  'expressions': [expression]
                        }]
                                    }]          
                           
            }]
          }
   ).execute()



analytics = initialize_analyticsreporting()

### Вставим сюда набор url
def readyData(urlspart):
    p = pd.DataFrame()
    for expression in urlspart:
        response = get_report(analytics, expression)
        data = print_response_df(response)
        if data.size == 0:
            newline = {'urlpart':expression, 'value':0}
        else:    
            value = sum(data.metrics)
            newline = {'urlpart':expression, 'value':value}
        p = p.append(newline, ignore_index=True)
    return p
    
    

Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/googleapiclient/discovery_cache/__init__.py", line 41, in autodetect
    from . import file_cache
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/googleapiclient/discovery_cache/file_cache.py", line 41, in <module>
    'file_cache is unavailable when using oauth2client >= 4.0.0')
ImportError: file_cache is unavailable when using oauth2client >= 4.0.0


<class 'googleapiclient.discovery.Resource'>


In [8]:
z = readyData(urlspart2)
z.to_csv('griffonmarth.csv', sep=',', header=True, index=False)