In [1]:

### import all the dependencies 
import requests
import csv
import json
import pandas as pd
import requests, json
import re
import configparser
import time 
import logging
import os


In [2]:
## Define all variables 
configFile = "config.cfg"
config = configparser.ConfigParser()
config.read(configFile)

TOKEN= config.get('FullStory', 'Token')


HEADERS = {'Authorization': 'Basic {}'.format(TOKEN)}
START_DATE1 = 1536364800 # 2018-09-07 17:00:00
START_DATE2 = 1538161320 # 2018-09-27 17:02:00
LIST_REQUEST_URL = 'https://export.fullstory.com/api/v1/export/list?start={}'
GET_REQUEST_URL = 'https://export.fullstory.com/api/v1/export/get?id={}'
LOG_FILE = 'fs_api_run.log'

In [3]:
##Setting up logger 
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# create a file handler
handler = logging.FileHandler(LOG_FILE)
handler.setLevel(logging.INFO)

# create a logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# add the handlers to the logger
logger.addHandler(handler)


In [4]:
##Function to get the list of export bundles after given date 
def get_exportData(start_date):
    '''the function obtain List of data export
    from the target date onwards.'''
    exports_data = []
   # HEADERS = {'Authorization': 'Basic {}'.format(TOKEN)}
    with requests.session() as session:
        session.headers.update(HEADERS)
        # Getting the list of data exports from fullstory
        logger.info('Getting the exports list from fullstory')
        exports_list_response = session.get(LIST_REQUEST_URL.format(start_date))
        status_code = exports_list_response.status_code
   
        # Check status code
        if status_code == 200:
            print('List API call has been successful.')
            logger.info('List API call has been successful.')
            export_list_json = exports_list_response.json()
            # print(output)
        else:
            print('List API call unsuccessful with status code:\
             {}'.format(status_code))
            logger.exception('List API call unsuccessful with status code:\
             {}'.format(status_code))
        #print(export_list_json)

        exports = export_list_json["exports"] 
        print(exports)
        # Downloading the exports from fullstory
        print("Starting downlaod of the Fullstory data exports")
        logger.info("Starting downlaod of the Fullstory data exports")
        counter = 1
        for export in exports:
            print("Starting download for the bundle {}".format(counter))
            logger.info("Starting download for the bundle {}".format(counter))
            export_data = session.get(GET_REQUEST_URL.format(str(export['Id']))).json()
            counter = counter + 1
            print(counter)
            exports_data = exports_data + export_data
        print("Downloaded {} dumps with {} total records".format(counter, len(exports_data)))
        logger.info("Downloaded {} dumps with {} total records".format(counter, len(exports_data)))



    return exports_data

    

In [5]:
logger.info ("The Main run start to download all export bundles .")
exports1  = get_exportData(START_DATE1)
exports2 = get_exportData(START_DATE2)


List API call has been successful.
[{'Start': 1536364800, 'Stop': 1536451200, 'Id': 153636480048}, {'Start': 1536451200, 'Stop': 1536537600, 'Id': 153645120048}, {'Start': 1536537600, 'Stop': 1536624000, 'Id': 153653760048}, {'Start': 1536624000, 'Stop': 1536710400, 'Id': 153662400048}, {'Start': 1536710400, 'Stop': 1536796800, 'Id': 153671040048}, {'Start': 1536796800, 'Stop': 1536883200, 'Id': 153679680048}, {'Start': 1536883200, 'Stop': 1536969600, 'Id': 153688320048}, {'Start': 1536969600, 'Stop': 1537056000, 'Id': 153696960048}, {'Start': 1537056000, 'Stop': 1537142400, 'Id': 153705600048}, {'Start': 1537142400, 'Stop': 1537228800, 'Id': 153714240048}, {'Start': 1537228800, 'Stop': 1537315200, 'Id': 153722880048}, {'Start': 1537315200, 'Stop': 1537401600, 'Id': 153731520048}, {'Start': 1537401600, 'Stop': 1537488000, 'Id': 153740160048}, {'Start': 1537488000, 'Stop': 1537574400, 'Id': 153748800048}, {'Start': 1537574400, 'Stop': 1537660800, 'Id': 153757440048}, {'Start': 153766080

In [6]:
# Loading the exports data to Pandas DataFrames for further processing 
print("Dataset from API is very large This cell may take a 1-2 minutes to run")
exports_df1 = pd.DataFrame(exports1)
print('Shape of first export dataframe: {}'.format(exports_df1.shape))
logger.info('Shape of first export dataframe: {}'.format(exports_df1.shape))

exports_df2 = pd.DataFrame(exports2)
print('Shape of second export dataframe: {}'.format(exports_df2.shape))
logger.info('Shape of second export dataframe: {}'.format(exports_df2.shape))

Dataset from API is very large This cell may take a 1-2 minutes to run
Shape of first export dataframe: (527535, 34)
Shape of second export dataframe: (227991, 38)


In [7]:
##Merging 2 dataframes 
combined_exports_df = pd.concat([exports_df1,exports_df2], join='inner')
print('Shape of All Combined exports df : {}'.format(combined_exports_df.shape))

Shape of All Combined exports df : (755526, 34)


In [8]:
combined_exports_df.columns

Index(['EventModDead', 'EventModError', 'EventModFrustrated', 'EventStart',
       'EventTargetSelectorTok', 'EventTargetText', 'EventType', 'IndvId',
       'PageActiveDuration', 'PageAgent', 'PageBrowser', 'PageDevice',
       'PageDuration', 'PageId', 'PageIp', 'PageLatLong', 'PageNumErrors',
       'PageRefererUrl', 'PageUrl', 'SessionId', 'UserAppKey',
       'UserDisplayName', 'UserEmail', 'UserId', 'user_anonymousId_str',
       'user_firstName_str', 'user_lastName_str', 'user_serviceName_str',
       'user_userHash_str', 'user_userId_str', 'user_username_str'],
      dtype='object')

In [10]:
# # create data directory if needed
if not os.path.isdir('../data'):
    os.makedirs('../data')

In [11]:
## Writing the dataframe to CSV file to avoid API call before every program run 
filepath = "../data/Combined_data.csv"
print("Start writing the export_df to localCSV")
logger.info("Start writing the export_df to localCSV")
combined_exports_df.to_csv(filepath)
print("Data Successfully loaded to local CSV")
logger.info("Data Successfully loaded to local CSV")

Data Successfully loaded to local CSV
