In [None]:
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
from PIL import Image, ImageDraw, ImageFont
from pyvis.network import Network

In [None]:
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE = 'bbe-web-cro-key.json'
VIEW_ID = '236051486'

start_dt = '14daysAgo'
end_dt = 'yesterday'

In [None]:
dimensions = ['ga:PagePath', 'ga:PreviousPagePath']
metrics = ['ga:Pageviews', 'ga:exits', 'ga:entrances', 'ga:avgTimeOnPage']

In [None]:
def initialize_analyticsreporting():
    credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE, SCOPES)
    analytics = build('analyticsreporting', 'v4', credentials=credentials)
    
    return analytics

In [None]:
def get_report(analytics, dimensions, metrics, start_dt='yesterday', end_dt='7daysAgo', nextPageToken=None):
    return analytics.reports().batchGet(
        body={
            'reportRequests': [
                {
                    'viewId': VIEW_ID,
                    
                    'dateRanges': [
                        {
                            'startDate': start_dt,
                            'endDate': end_dt
                        }],
                    
                    'dimensions':[{'name': name} for name in dimensions],
                    'metrics':[{'expression': exp} for exp in metrics],
                    
                    'pageToken': nextPageToken,
                    'pageSize': '10000',
                    
                    'samplingLevel': 'LARGE'
                }
                
            ]}
    ).execute()

In [None]:
def response_to_df(response, dimensions, metrics, nextPageToken = None):
    response = get_report(analytics, dimensions, metrics, start_dt, end_dt, nextPageToken)
    nextPageToken = response.get('reports')[0].get('nextPageToken')
    
    data_dict = {f"{i}": [] for i in dimensions + metrics}
    _data = pd.DataFrame(data=data_dict)
    
    while nextPageToken != None:
        print(nextPageToken)
        
        for report in response.get('reports', []):
            
            for row in report.get('data', {}).get('rows', []):
                for i, key in enumerate(dimensions):
                    data_dict[key].append(row.get('dimensions', [])[i])
                
                dateRangeValues = row.get('metrics', [])
                for values in dateRangeValues:
                    all_values = values.get('values', [])
                
                for i, key in enumerate(metrics):
                    data_dict[key].append(all_values[i])
        
        _data = _data.append(pd.DataFrame(data=data_dict))
        
        response = get_report(analytics, dimensions, metrics, start_dt, end_dt, nextPageToken)
        nextPageToken = response.get('reports')[0].get('nextPageToken')
        
    _data.columns = [col.split(':')[-1] for col in _data.columns]
    _data = _data.reset_index(drop=True)
            
    return _data

In [None]:
analytics = initialize_analyticsreporting()

In [None]:
response = get_report(analytics, dimensions, metrics, start_dt, end_dt, None)

In [None]:
analytics_df = response_to_df(response, dimensions, metrics)

In [None]:
analytics_df['PagePath'] = analytics_df['PagePath'].apply(lambda x: x.split('?')[0])
analytics_df['PreviousPagePath'] = analytics_df['PreviousPagePath'].apply(lambda x: x.split('?')[0])

In [None]:
analytics_df['Pageviews'] = analytics_df['Pageviews'].astype('int64')
analytics_df['exits'] = analytics_df['exits'].astype('int64')
analytics_df['entrances'] = analytics_df['entrances'].astype('int64')
analytics_df['avgTimeOnPage'] = analytics_df['avgTimeOnPage'].astype('float')

In [None]:
analytics_df = analytics_df.groupby(['PagePath', 'PreviousPagePath'], as_index=False).agg({'Pageviews':'sum', 'exits':'sum', 'entrances':'sum', 'avgTimeOnPage':'mean'}).sort_values('Pageviews', ascending=False)
analytics_df = analytics_df.reset_index(drop=True)

In [None]:
analytics_df['exitrate'] = analytics_df['exits'] / analytics_df['Pageviews']

analytics_df['s_grouping'] = 'default'
analytics_df['d_grouping']= 'default'
for i in analytics_df.index:
    if 'application' in analytics_df.loc[i, 'PagePath']:
        analytics_df.loc[i, 'd_grouping'] = 'application'
    elif '/program' in analytics_df.loc[i, 'PagePath']:
        analytics_df.loc[i, 'd_grouping'] = 'programs'
    elif '/blog' in analytics_df.loc[i, 'PagePath']:
        analytics_df.loc[i, 'd_grouping'] = 'blog'
        
    if 'application' in analytics_df.loc[i, 'PreviousPagePath']:
        analytics_df.loc[i, 's_grouping'] = 'application'
    elif '/program' in analytics_df.loc[i, 'PreviousPagePath']:
        analytics_df.loc[i, 's_grouping'] = 'programs'
    elif '/blog' in analytics_df.loc[i, 'PreviousPagePath']:
        analytics_df.loc[i, 's_grouping'] = 'blog'

In [None]:
# Remote year histogram
# Set watermark features
im = Image.open(r'W:\My Documents\Remote Year\ry-logo.png')
im = im.resize((277,210))
    
# Define the size of the plot area
fig, ax1 = plt.subplots(figsize=(10, 6), dpi=75)
    
# Plot data for y-axis 
(analytics_df[(analytics_df['avgTimeOnPage'] < 200)]['avgTimeOnPage']).plot(kind='hist', density=False, bins=25, color='#26B4B2')
    
# Set the labels and formatting for y-axis 1
ax1.set_ylabel("Avg. Time on Page", fontname='Lato', fontsize='12')
ax1.set_yticklabels(ax1.get_yticks(), fontsize=12)
ax1.yaxis.grid(which="major")
ax1.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
    
# Define legends for both axes
ax1.legend(loc='upper right', fontsize='12')
    
# Set x axes formatting
ax1.set_xlabel("Count", fontname='Lato', fontsize='12')
plt.xticks(fontsize=12)
    
# Set watermark and overall graphic formatting
plt.tight_layout()
plt.subplots_adjust(top=0.93)
plt.suptitle("Avg. Time on Page", fontname='Lato', fontsize='16', x=0.56)
plt.figimage(im, 280, 150, alpha=0.20, zorder=10)
plt.figtext(0.56, 0.910, 'www.RemoteYear.com', ha='center', va='center', fontname='Lato', 
            fontsize='9',color='#686C6D')
    
plt.show()
#plt.savefig(r'W:\My Documents\Remote Year\Onboarding\age.png')

In [None]:
# Remote year histogram
# Set watermark features
im = Image.open(r'W:\My Documents\Remote Year\ry-logo.png')
im = im.resize((277,210))
    
# Define the size of the plot area
fig, ax1 = plt.subplots(figsize=(10, 6), dpi=75)
    
# Plot data for y-axis 
(analytics_df['exitrate']).plot(kind='hist', density=False, bins=25, color='#26B4B2')
    
# Set the labels and formatting for y-axis 1
ax1.set_ylabel("Exit Rate", fontname='Lato', fontsize='12')
ax1.set_yticklabels(ax1.get_yticks(), fontsize=12)
ax1.yaxis.grid(which="major")
ax1.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
    
# Define legends for both axes
ax1.legend(loc='upper right', fontsize='12')
    
# Set x axes formatting
ax1.set_xlabel("Count", fontname='Lato', fontsize='12')
plt.xticks(fontsize=12)
    
# Set watermark and overall graphic formatting
plt.tight_layout()
plt.subplots_adjust(top=0.93)
plt.suptitle("Exit Rate", fontname='Lato', fontsize='16', x=0.56)
plt.figimage(im, 280, 150, alpha=0.20, zorder=10)
plt.figtext(0.56, 0.910, 'www.RemoteYear.com', ha='center', va='center', fontname='Lato', 
            fontsize='9',color='#686C6D')
    
plt.show()
#plt.savefig(r'W:\My Documents\Remote Year\Onboarding\age.png')

In [None]:
var_options = """ var options = {
  "nodes": {
    "font": {
      "strokeWidth": 1
    }
  },
  "edges": {
    "arrows": {
      "to": {
        "enabled": true,
        "scaleFactor": 0.5
      }
    },
    "color": {
      "inherit": true
    },
    "font": {
      "strokeWidth": 1
    },
    "smooth": false
  },
  "physics": {
    "forceAtlas2Based": {
      "springLength": 150,
      "gravitationalConstant": -400,
      "springConstant": 0.20,
      "damping": 0.99,
      "avoidOverlap": 0.84
    },
    "minVelocity": 0.75,
    "solver": "forceAtlas2Based",
    "timestep": 0.93
  }
}

"""

In [None]:
color_dict = {
    'default': {
        'border': '#98C8ED',
        'background': '#98C8ED',
        'highlight': {
            'border': '#192E4E',
            'background': '#192E4E'
        },
        'hover': {
            'border': '#192E4E',
            'background': '#192E4E'
        }
    },
    'application': {
        'border': '#26B4B2',
        'background': '#26B4B2',
        'highlight': {
            'border': '#192E4E',
            'background': '#192E4E'
        },
        'hover': {
            'border': '#192E4E',
            'background': '#192E4E'
        }
    },
    'blog': {
        'border': '#F7C793',
        'background': '#F7C793',
        'highlight': {
            'border': '#192E4E',
            'background': '#192E4E'
        },
        'hover': {
            'border': '#192E4E',
            'background': '#192E4E'
        }
    },
    'programs': {
        'border': '#AB6027',
        'background': '#AB6027',
        'highlight': {
            'border': '#192E4E',
            'background': '#192E4E'
        },
        'hover': {
            'border': '#192E4E',
            'background': '#192E4E'
        }
    }
}

In [None]:
ry_net = Network(height='750px', width='100%', bgcolor='white', font_color='black', notebook=True)

ry_net.set_options(var_options)

# set the physics layout of the network
views_min_int = int(len(analytics_df) * 0.10)
max_width_int = int(analytics_df.iloc[2, 4])
ry_data_df = pd.DataFrame()
ry_data_df = analytics_df[(analytics_df['grouping'] == 'application') | (analytics_df.index < 500)]

for i in ry_data_df.index:
    src, dst, w, v, sg, dg = ry_data_df.loc[i][['PreviousPagePath', 'PagePath', 'Pageviews', 'exitrate', 's_grouping', 'd_grouping']]
    
    v = 1 - v
    w = w / max_width_int
    if w > 1:
        w = 1
    
    ry_net.add_node(dst, dst, title=dst, color=color_dict.get(dg), value=v, group=g)
    ry_net.add_node(src, src, title=src, color=color_dict.get(sg), value=v, group=g)
    ry_net.add_edge(src, dst, value=w, width=w)

#neighbor_map = ry_net.get_adj_list()

# add neighbor data to node hover data
for node in ry_net.nodes:
    node['title'] += '<br>Exit rate: ' + str(round(abs(node['value'] - 1)  * 100,2)) + '%<br> Top Origins:'
    
    _df = pd.DataFrame()
    _df = analytics_df[analytics_df['PagePath'] == node['id']][:5]
    for i in _df.index:
        node['title'] += '<br>' + _df.loc[i, 'PreviousPagePath'] + ': ' + str('{:,}'.format(_df.loc[i, 'Pageviews']))

#ry_net.show_buttons(filter_=True)
ry_net.show(r'W:\My Documents\Remote Year\web-cro\ry-traffic.html')