### Import libraries

In [1]:
from datetime import date, timedelta
import pandas as pd
from gaapi4py import GAClient

### Define constants

In [2]:
PATH_TO_SERVICE_ACCOUNT = 'PATH/TO/SERVICE_ACCOUNT.json'
VIEW_ID = '<YOUR_VIEW_ID>'

SESSION_ID_CD_INDEX = '1'
HIT_ID_CD_INDEX = '2'

### Instantiate the client

In [3]:
c = GAClient(PATH_TO_SERVICE_ACCOUNT)
c.set_view_id(VIEW_ID)

In [4]:
request_body = {
    'view_id': VIEW_ID,
    'start_date': '2019-01-01',
    'end_date': '2019-01-31',
    'dimensions': {
        'ga:sourceMedium',
        'ga:date'
    },
    'metrics': {
        'ga:sessions'
    }
}
response = c.get_all_data(request_body)

In [5]:
response['info']

{'isDataGolden': True,
 'nextPageToken': None,
 'samplesReadCounts': None,
 'samplingSpaceSizes': None}

In [6]:
response['data'].head(2) # Pandas dataframe that contains data from GA

Unnamed: 0,date,sourceMedium,sessions
0,20190101,(direct) / (none),1996
1,20190101,(not set) / (not set),1


### Get data daily to avoid sampling

In [7]:
start_date = date(2019,7,1)
end_date = date(2019,7,14)

df_list = []
iter_date = start_date
while iter_date <= end_date:
    c.set_dateranges(iter_date, iter_date)
    response = c.get_all_data({
        'dimensions': {
            'ga:sourceMedium',
            'ga:deviceCategory'
        },
        'metrics': {
            'ga:sessions'
        }
    })
    df = response['data']
    df['date'] = iter_date
    df_list.append(response['data'])
    iter_date = iter_date + timedelta(days=1)
    
all_data = pd.concat(df_list, ignore_index=True)

### Get per-session data (using session_id custom dimension)

In [8]:
one_day = date(2019,7,1)
c.set_dateranges(one_day, one_day)

session_id = 'dimension' + SESSION_ID_CD_INDEX
hit_id = 'dimension' + HIT_ID_CD_INDEX

response_1 = c.get_all_data({
    'dimensions': {
        'ga:' + session_id,
        'ga:sourceMedium',
        'ga:campaign',
        'ga:keyword',
        'ga:adContent',
        'ga:userType',
        'ga:deviceCategory'
    },
    'metrics': {
        'ga:sessions'
    }
})

response_2 = c.get_all_data({
    'dimensions': {
        'ga:' + session_id,
        'ga:landingPagePath',
        'ga:secondPagePath',
        'ga:exitPagePath',
        'ga:pageDepth',
        'ga:daysSinceLastSession',
        'ga:sessionCount'
    },
    'metrics': {
        'ga:hits',
        'ga:totalEvents',
        'ga:bounces',
        'ga:sessionDuration'
    }
})
all_data = response_1['data'].merge(response_2['data'], on=session_id, how='left')
all_data.rename(index=str, columns={
    session_id: 'session_id'
}, inplace=True)

In [9]:
all_data.head(2)

Unnamed: 0,adContent,sourceMedium,deviceCategory,userType,keyword,campaign,session_id,sessions,sessionCount,pageDepth,secondPagePath,exitPagePath,daysSinceLastSession,landingPagePath,sessionDuration,hits,totalEvents,bounces
0,(not set),(direct) / (none),desktop,New Visitor,(not set),(not set),1013324599.1562008915_1562009225129,1,1,14,/login,/profile,0,/,310.0,24,10,0
1,(not set),(direct) / (none),desktop,New Visitor,(not set),(not set),1015630571.1561985110_1561985307686,1,1,3,/,/,0,/,198.0,3,0,0


### Get hit-level data (using hit_id custom dimension)

In [10]:
hit_id = 'dimension' + HIT_ID_CD_INDEX

one_day = date(2019,7,1)
c.set_dateranges(one_day, one_day)

hits_response_1 = c.get_all_data({
    'dimensions': {
        'ga:' + session_id,
        'ga:' + hit_id,
        'ga:pagePath',
        'ga:previousPagePath',
        'ga:dateHourMinute'
    },
    'metrics': {
        'ga:hits',
        'ga:totalEvents',
        'ga:pageviews'
    }
})

hits_response_2 = c.get_all_data({
    'dimensions': {
        'ga:' + session_id,
        'ga:' + hit_id,
        'ga:eventCategory',
        'ga:eventAction',
        'ga:eventLabel'
    },
    'metrics': {
        'ga:totalEvents'
    }
})
all_hits_data = hits_response_1['data'].merge(hits_response_2['data'], 
                                              on=[session_id, hit_id], 
                                              how='left')
all_hits_data.rename(index=str, columns={
    session_id: 'session_id',
    hit_id: 'hit_id'
}, inplace=True)
all_hits_data.head(2)

Unnamed: 0,dateHourMinute,previousPagePath,pagePath,hit_id,session_id,pageviews,hits,totalEvents_x,eventAction,eventCategory,eventLabel,totalEvents_y
0,201907010000,(entrance),/,2019-07-01T02:00:11.257-05:00,980484191.1558123542_1561964715302,1,1,0,,,,
1,201907010000,(entrance),/,2019-07-01T09:00:43.77+02:00,25596428.1561921173_1561964593959,1,1,0,,,,
