In [1]:
!pip install oauth2client
!pip install google-api-python-client
!pip install ipywidgets
!pip install plotly
!pip install tqdm -U





# Pre-requisite installations if needed

# --------------------------------------------------------------------------------------------------

In [18]:
import argparse
import pandas as pd
import json
import os
import ipywidgets as widgets
from pathlib import Path
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import calendar

import uuid

from tqdm.notebook import tqdm, trange

import plotly
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
from helper_functions import initialize_analyticsreporting, get_report, print_response, VIEW_ID, next_date_interval, progress_bar_counter

pio.renderers.default = "iframe"

analytics = initialize_analyticsreporting()

# --------------------------------------------------------------------------------------------------

#### The cell below will render the widgets needed to select the items in the graph. This cell only needs to be run ONCE (to show the widgets only). After display, you don't have to run this cell. The report/graph will include the end date.

# --------------------------------------------------------------------------------------------------

In [90]:
feature = widgets.Dropdown(
    options=[
        'New/Returning Users',
        'App Launched - OS', 
        'App Launched - SODA', 
        'Manage Dataset - Create Empty Dataset', 
        'Manage Dataset - Rename Existing Dataset', 
        'Manage Dataset - Change PI owner', 
        'Manage Dataset - Add User Permission',
        'Manage Dataset - Add/Edit Subtitle',
        'Manage Dataset - Add/Edit Description', 
        'Manage Dataset - Upload Banner Image', 
        'Manage Dataset - Assign License',
        'Manage Dataset - Upload Local Dataset', 
        'Manage Dataset - Change Dataset Status',
        
        
        'Manage Datasets - Create a new dataset',
        'Manage Datasets - Rename an existing dataset',
        'Manage Datasets - Make PI owner of dataset',
        'Manage Datasets - Add/Edit Permissions',
        'Manage Datasets - Add/Edit Permissions - Add User Permissions',
        'Manage Datasets - Add/Edit Permissions - Add Team Permissions',
        "Manage Datasets - Add/Edit Subtitle",
        "Manage Datasets - Add/Edit Subtitle - Get Subtitle",
        "Manage Datasets - Add/Edit Readme",
        "Manage Datasets - Add/Edit Readme - Get Readme",
        "Manage Datasets - Add/Edit Readme - Parse Readme",  
        "Manage Datasets - Upload a Banner Image",
        "Manage Datasets - Upload a Banner Image - Size",
        "Manage Datasets - Upload a Banner Image - Importing Banner Image",
        "Manage Datasets - Upload a Banner Image - Get Banner Image",
        "Manage Datasets - Add/Edit Tags",
        "Manage Datasets - Add/Edit Tags - Get Tags",
        "Manage Datasets - Assign a License",
        "Manage Datasets - Assign a License - Get License",
        "Manage Datasets - Upload Local Dataset",
        "Manage Datasets - Upload Local Dataset - size",
        "Manage Datasets - Upload Local Dataset - name - size",
        "Manage Datasets - Upload Local Dataset - Number of Folders",
        "Manage Datasets - Upload Local Dataset - name - Number of Folders",
        "Manage Datasets - Upload Local Dataset - Number of Files",
        "Manage Datasets - Upload Local Dataset - name - Number of Files",
        "Manage Datasets - Change Dataset Status",
        "Manage Datasets - Change Dataset Status - Get Dataset Status"
        
        
        
        'Prepare Metadata - Add Airtable account',
        'Prepare Metadata - Add DDD',
        'Prepare Metadata - Create Submission',
        'Prepare Metadata - Create dataset_description',
        'Prepare Metadata - samples',
        'Prepare Metadata - samples - Generate',
        'Prepare Metadata - samples - Generate - Local',
        'Prepare Metadata - samples - Generate - Pennsieve',
        'Prepare Metadata - samples - Existing',
        'Prepare Metadata - samples - Existing - Local',
        'Prepare Metadata - samples - Existing - Pennsieve',
        'Prepare Metadata - submission',
        'Prepare Metadata - submission - Generate',
        'Prepare Metadata - submission - Generate - Local',
        'Prepare Metadata - submission - Generate - Pennsieve',
        'Prepare Metadata - submission - Existing',
        'Prepare Metadata - submission - Existing - Local',
        'Prepare Metadata - submission - Existing - Pennsieve',
        'Prepare Metadata - dataset_description',
        'Prepare Metadata - dataset_description - Generate',
        'Prepare Metadata - dataset_description - Generate - Local',
        'Prepare Metadata - dataset_description - Generate - Pennsieve',
        'Prepare Metadata - dataset_description - Existing',
        'Prepare Metadata - dataset_description - Existing - Local',
        'Prepare Metadata - dataset_description - Existing - Pennsieve',
        'Prepare Metadata - subjects',
        'Prepare Metadata - subjects - Generate',
        'Prepare Metadata - subjects - Generate - Local',
        'Prepare Metadata - subjects - Generate - Pennsieve',
        'Prepare Metadata - subjects - Existing',
        'Prepare Metadata - subjects - Existing - Local',
        'Prepare Metadata - subjects - Existing - Pennsieve',
        'Prepare Metadata - readme',
        'Prepare Metadata - readme - Generate',
        'Prepare Metadata - readme - Generate - Local',
        'Prepare Metadata - readme - Generate - Pennsieve',
        'Prepare Metadata - readme - Existing',
        'Prepare Metadata - readme - Existing - Local',
        'Prepare Metadata - readme - Existing - Pennsieve',
        'Prepare Metadata - changes',
        'Prepare Metadata - changes - Generate',
        'Prepare Metadata - changes - Generate - Local',
        'Prepare Metadata - changes - Generate - Pennsieve',
        'Prepare Metadata - changes - Existing',
        'Prepare Metadata - changes - Existing - Local',
        'Prepare Metadata - changes - Existing - Pennsieve',
        'Prepare Metadata - manifest',
        'Prepare Metadata - manifest - Generate',
        'Prepare Metadata - manifest - Generate - Local',
        'Prepare Metadata - manifest - Generate - Pennsieve',
        'Prepare Metadata - manifest - Existing',
        'Prepare Metadata - manifest - Existing - Local',
        'Prepare Metadata - manifest - Existing - Pennsieve',
        
        
        'Generate Dataset',
        'Generate Dataset - Local',
        'Generate Dataset - Blackfynn',
        'Generate Dataset - Pennsieve',
        
        
        'Manifest Files Created',
        'Manifest Files Created - Blackfynn', 
        'Manifest Files Created - Pennsieve', 
        'Manifest Files Created - Local',
        
        
        'Download Template - manifest.xlsx',
        'Download Template - manifest.xlsx',
        'Download Template - dataset_description.xlsx',
        'Download Template - subjects.xlsx',
        'Download Template - samples.xlsx',
        'Download Template - submission.xlsx',
        
        
        'Disseminate Dataset - Share with Curation Team', 
        'Disseminate Dataset - Share with Consortium',
        'Disseminate Dataset - Pre-publishing Review',
        
        
        'Prepare Datasets - Organize dataset',
        'Prepare Datasets - Organize dataset - Existing',
        'Prepare Datasets - Organize dataset - Existing - Pennsieve',
        'Prepare Datasets - Organize dataset - Existing - Local',
        'Prepare Datasets - Organize dataset - Existing - Saved',
        
        
        'Prepare Datasets - Organize dataset - Step 3',
        'Prepare Datasets - Organize dataset - Step 3 - Import',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4',
        'Prepare Datasets - Organize dataset - Step 4 - Import',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        
        
        'Prepare Datasets - Organize dataset - Step 7',
        'Prepare Datasets - Organize dataset - Step 7 - Generate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Local',
        
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Create a duplicate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Replace',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Merge',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Skip',
        
        'Disseminate Datasets - Show current dataset permission',
        'Disseminate Datasets - Show current dataset status',
        'Disseminate Datasets - Pre-publishing Review - Integrate ORCID iD',
        'Disseminate Datasets - Pre-publishing Review - Get Excluded Files',
        'Disseminate Datasets - Pre-publishing Review - Get Metadata Files',
        'Disseminate Datasets - Pre-publishing Review - Update excluded files',
        'Disseminate Datasets - Pre-publishing Review - Publish',
        'Disseminate Datasets - Pre-publishing Review - Submit dataset',
        'Disseminate Datasets - Pre-publishing Review - Withdraw dataset',
        'Disseminate Datasets - Pre-publishing Review - Fetch Pre-publishing Checklist Statuses',
        "Disseminate Datasets - Pre-publishing Review - Determine User's Dataset Role",
        "Disseminate Datasets - Pre-publishing Review - Show publishing status"
        
        'Disseminate Datasets - Share with Curation Team',
        "Disseminate Datasets - Share with Curation Team - Remove Consortium's Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Give Consortium Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Work In Progress",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Ready for Curation",
        
        'Disseminate Datasets - Share with Consortium',
        'Disseminate Datasets - Share with Consortium - Removed Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Add Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Curated & Awaiting PI Approval',
        'Disseminate Datasets - Share with Consortium - Change Dataset Status to Under Embargo'
        'Disseminate Datasets - Pre-publishing Review',
        ],
    value='New/Returning Users',
    description='Option:',
    disabled=False,
)

start_date = widgets.DatePicker(description='Start Date:', disabled=False)
end_date = widgets.DatePicker(description='End Date:', disabled=False)

update_interval = widgets.Dropdown(options=['Daily', 'Weekly', 'Monthly', "No Separation"], description='Update Interval:', disabled=False)

display(feature, start_date, end_date, update_interval)

Dropdown(description='Option:', options=('New/Returning Users', 'App Launched - OS', 'App Launched - SODA', 'M…

DatePicker(value=None, description='Start Date:')

DatePicker(value=None, description='End Date:')

Dropdown(description='Update Interval:', options=('Daily', 'Weekly', 'Monthly', 'No Separation'), value='Daily…

# --------------------------------------------------------------------------------------------------

#### The cell below is a basic function that uses the widgets in the cell above to create a graph. If the widgets are not showing, run the widget cell. You don't have to run it again after selecting a value. Changing the value of the dropdown will dynamically change the value of the variable in the next cell.

# --------------------------------------------------------------------------------------------------

In [None]:
dt = start_date.value
ds = end_date.value

data, new_user_data, returning_user_data = [], [], []
column_headers = []
file_name = ""
bar_counter = 0

if update_interval.value == "Daily":
    bar_counter = progress_bar_counter(dt, ds, "Daily")
    start = end = dt
    column_headers = ['Day', 'Frequency']
    file_name = "daily"
if update_interval.value == "Weekly":
    bar_counter = progress_bar_counter(dt, ds, "Weekly")    
    start = dt - timedelta(days=dt.weekday())
    end = start + timedelta(days=6)
    column_headers = ['Week', 'Frequency']
    file_name = "weekly"
if update_interval.value == "Monthly":
    bar_counter = progress_bar_counter(dt, ds, "Monthly")
    start = end = dt
    end = end.replace(day = calendar.monthrange(start.year, start.month)[1])
    column_headers = ['Month', 'Frequency']
    file_name = "monthly"
if update_interval.value == "No Separation":
    bar_counter = 1
    start = dt
    end = ds
    column_headers = ['Time Period', 'Frequency']
    file_name = "no_Separation"
    
for i in trange(bar_counter):
    if start <= ds:
        if feature.value == "New/Returning Users":
            query = {
                'reportRequests': [
                {
                    'viewId': VIEW_ID,
                    'dateRanges': [{'startDate': start.strftime('%Y-%m-%d'), 'endDate': end.strftime('%Y-%m-%d')}],
                    'metrics': [{'expression': 'ga:users'}],
                    'dimensions': [{'name': 'ga:userType'}]
                }]
            }
        else:
            query = {
                'reportRequests': [
                {
                    'viewId': VIEW_ID,
                    'dateRanges': [{'startDate': start.strftime('%Y-%m-%d'), 'endDate': end.strftime('%Y-%m-%d')}],
                    'metrics': [{'expression': 'ga:totalEvents'}],
                    'dimensions': [{'name': 'ga:eventAction'}]
                }]
            }
            
        
            
        cell_data, new_user_cell_data, returning_user_cell_data = [], [], []
        
        if update_interval.value == "Daily":
            cell_data_date = start.strftime("%d %b, %Y")     
        if update_interval.value == "Weekly" or update_interval.value == "No Separation":
            cell_data_date = start.strftime("%d %b, %Y") + " - " + end.strftime("%d %b, %Y")    
        if update_interval.value == "Monthly":
            cell_data_date = start.strftime("%b %Y")
        
        response = response_rows = []
        response = get_report(analytics, query)
        if "rows" in response["reports"][0]["data"]:
            response_rows = response["reports"][0]["data"]["rows"]
            
        else:
            response_rows = []
            if feature.value == "New/Returning Users":
                new_user_cell_data = [cell_data_date, 0]
                new_user_data.append(new_user_cell_data)
                returning_user_cell_data = [cell_data_date, 0]
                returning_user_data.append(returning_user_cell_data)
            else:
                cell_data = [cell_data_date, 0]
                data.append(cell_data)
        
        if feature.value == "New/Returning Users":
            if response_rows != []:
                new_user = False
                returning_user = False
                for res in response_rows:
                    if (res["dimensions"][0] == "New Visitor"):
                        new_user_cell_data = [cell_data_date, int(res["metrics"][0]["values"][0])]
                        new_user_data.append(new_user_cell_data)
                        new_user = True
                    if (res["dimensions"][0] == "Returning Visitor"):
                        returning_user_cell_data = [cell_data_date, int(res["metrics"][0]["values"][0])]
                        returning_user_data.append(returning_user_cell_data)
                        returning_user = True
                if new_user == False:
                    new_user_cell_data = [cell_data_date, 0]
                    new_user_data.append(new_user_cell_data)
                if returning_user == False:
                    returning_user_cell_data = [cell_data_date, 0]
                    returning_user_data.append(returning_user_cell_data)
        else:
            if response_rows != []:
                response_present = False
                for res in response_rows:
                    if res["dimensions"][0] == feature.value:
                        cell_data = [cell_data_date, int(res["metrics"][0]["values"][0])]
                        data.append(cell_data)
                        response_present = True
                if response_present == False:
                    cell_data = [cell_data_date, 0]
                    data.append(cell_data)
        
        start, end = next_date_interval(start, end, update_interval.value)
        
folder_path = os.path.join("result_csv", "graph_data")
Path(folder_path).mkdir(parents=True, exist_ok=True)

df = new_df = returning_df = None
if feature.value == "New/Returning Users":
    
    new_df = pd.DataFrame(new_user_data, columns = column_headers)
    returning_df = pd.DataFrame(returning_user_data, columns = column_headers)
    
    new_action_column = new_df.iloc[:, 0]
    new_frequency_column = new_df.iloc[:, 1]
    new_x_markers = pd.Series(new_action_column).array
    new_y_markers = pd.Series(new_frequency_column).array
    new_y_markers = new_y_markers.astype(int)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x = new_x_markers, y = new_y_markers, mode = 'lines', name = 'New Users'))
    
    ret_action_column = returning_df.iloc[:, 0]
    ret_frequency_column = returning_df.iloc[:, 1]
    ret_x_markers = pd.Series(ret_action_column).array
    ret_y_markers = pd.Series(ret_frequency_column).array
    ret_y_markers = ret_y_markers.astype(int)
    
    fig.add_trace(go.Scatter(x = ret_x_markers, y = ret_y_markers, mode = 'lines', name = 'Returning Users'))
    
    fig.show()
    
else:
    df = pd.DataFrame(data, columns = column_headers)
    result_path = os.path.join(folder_path, file_name + "_graph-" + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y") + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)    
    df.astype({'Frequency': 'int32'}).dtypes
    
    fig = None

    if update_interval.value == "Daily":
        fig = px.line(df, x = "Day", y = "Frequency", render_mode = "auto", labels = {"Day": "Date","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
    if update_interval.value == "Weekly":
        fig = px.line(df, x = "Week", y = "Frequency", render_mode = "auto", labels = {"Day": "Week","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
    if update_interval.value == "Monthly":
        fig = px.line(df, x = "Month", y = "Frequency", render_mode = "auto", labels = {"Day": "Month","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
    if update_interval.value == "No Separation":
        fig = px.scatter(df, x = "Time Period", y = "Frequency", render_mode = "auto", labels = {"Day": "Time Period","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
        fig.update_traces(marker={'size': 15})

    fig.show()



# --------------------------------------------------------------------------------------------------

In [11]:
start = start_date.value
end = end_date.value
category_dict = {
    "Manage Dataset": {},
    "App": {},
    "Disseminate Dataset": {},
    "Generate Dataset": {},
    "Prepare Metadata": {},
    "Other": {}
}

ignore_list = ["Establishing Python Connection", 
               "App Launched - OS", 
               "App Launched - SODA",
               "App Restarted",
               "Update Downloaded",
               "Update Requested",
               "Generate Dataset - Size"
              ]

data = []

query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start.strftime('%Y-%m-%d'), 'endDate': end.strftime('%Y-%m-%d')}],
        'metrics': [{'expression': 'ga:totalEvents'}],
        'dimensions': [{'name': 'ga:eventAction'}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]

for res in response_rows:
    response_action = res["dimensions"][0]
    response_value = res["metrics"][0]["values"][0]
    
    if(response_action in ignore_list):
        continue
    
    action_found = False
    for key in category_dict:
        if (response_action.find(key) != -1 ):
            if response_action in category_dict[key]:
                category_dict[key][response_action] += response_value
            else:
                category_dict[key][response_action] = response_value
            action_found = True
            
    if action_found == False:
        if response_action.find("Manifest Files Created") != -1:
            if response_action in category_dict["Generate Dataset"]:
                category_dict["Generate Dataset"][response_action] += response_value
            else:
                category_dict["Generate Dataset"][response_action] = response_value
        elif response_action in category_dict["Other"]:
            category_dict["Other"][response_action] += response_value
        else:
            category_dict["Other"][response_action] = response_value
    
for key in category_dict:
    for action_key in category_dict[key]:
        cell_data = [key, action_key, category_dict[key][action_key]]
        data.append(cell_data)
    
df = pd.DataFrame(data, columns = ["Action", "Subaction", "Total"])
result_path = os.path.join("test.csv")
df.to_csv(result_path, encoding='utf-8', index=False)

fig = px.sunburst(df, path=['Action', 'Subaction'], values='Total',
                  color='Subaction', hover_data=['Total'])
fig.show()

#fig.write_image("fig1.png")

In [117]:
# Using the dropdown values here
dt = start_date.value
ds = end_date.value

In [118]:
# Date format in 'YYYY-MM-DD'
# You can also use relative dates for simplicity
# start_date = "50daysAgo"
# end_date = "today"
# end_date = "yesterday"
# start_date = "2021-01-23"
# end_date = "2021-04-23"

# Comment this out to use the  regular format dates above 
start = dt.strftime('%Y-%m-%d')
end = ds.strftime('%Y-%m-%d')

In [30]:
### Create dataset ID to dataset name mapping for all datasets 

In [93]:
idNameMap = {}
# mapping only exists from January xx, 2022 onward
def createDatasetIdToNameMapping(start, end):
    query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
    }]
    }
    
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    
    for res in response_rows:
        # check if the res category is "Dataset ID to Dataset Name Map"
        if res["dimensions"][0] == "Dataset ID to Dataset Name Map":
            # get the action (the datasetId)
            did = res["dimensions"][1]
            
            # get the label (dataset name)
            dname = res["dimensions"][2]
        
            # assign the action to the label (dataset name) in the idNameMap 
            idNameMap[did] = dname
        
    
 
createDatasetIdToNameMapping(start, end)
print(idNameMap)
    

{'N:dataset:023cc33f-78c3-4ef4-92d9-e1f54b596593': 'sf', 'N:dataset:622c8302-ce0f-4de6-9b64-53c892aeb979': 'enteric studies', 'N:dataset:ada590fe-3556-4fa4-8476-0f085a00d781': 'cheerios', 'N:dataset:adb7e48c-9999-4cd7-8259-37d8a62c424d': 'Canine Epilepsy Dataset'}


### Get a list of all datasets for which the actions below have been done on.

In [24]:
all_actions = [
        #'New/Returning Users',
        #'App Launched - OS', 
        #'App Launched - SODA', 
        'Manage Dataset - Create Empty Dataset', 
        'Manage Dataset - Rename Existing Dataset', 
        'Manage Dataset - Change PI owner', 
        'Manage Dataset - Add User Permission',
        'Manage Dataset - Add/Edit Subtitle',
        'Manage Dataset - Add/Edit Description', 
        'Manage Dataset - Upload Banner Image', 
        'Manage Dataset - Assign License',
        'Manage Dataset - Upload Local Dataset', 
        'Manage Dataset - Change Dataset Status',
        
        
        'Manage Datasets - Create a new dataset',
        'Manage Datasets - Rename an existing dataset',
        'Manage Datasets - Make PI owner of dataset',
        'Manage Datasets - Add/Edit Permissions',
        'Manage Datasets - Add/Edit Permissions - Add User Permissions',
        'Manage Datasets - Add/Edit Permissions - Add Team Permissions',
        "Manage Datasets - Add/Edit Subtitle",
        "Manage Datasets - Add/Edit Subtitle - Get Subtitle",
        "Manage Datasets - Add/Edit Readme",
        "Manage Datasets - Add/Edit Readme - Get Readme",
        "Manage Datasets - Add/Edit Readme - Parse Readme",  
        "Manage Datasets - Upload a Banner Image",
        "Manage Datasets - Upload a Banner Image - Size",
        "Manage Datasets - Upload a Banner Image - Importing Banner Image",
        "Manage Datasets - Upload a Banner Image - Get Banner Image",
        "Manage Datasets - Add/Edit Tags",
        "Manage Datasets - Add/Edit Tags - Get Tags",
        "Manage Datasets - Assign a License",
        "Manage Datasets - Assign a License - Get License",
        "Manage Datasets - Upload Local Dataset",
        "Manage Datasets - Upload Local Dataset - size",
        "Manage Datasets - Upload Local Dataset - name - size",
        "Manage Datasets - Upload Local Dataset - Number of Folders",
        "Manage Datasets - Upload Local Dataset - name - Number of Folders",
        "Manage Datasets - Upload Local Dataset - Number of Files",
        "Manage Datasets - Upload Local Dataset - name - Number of Files",
        "Manage Datasets - Change Dataset Status",
        "Manage Datasets - Change Dataset Status - Get Dataset Status"
        
        
        
        'Prepare Metadata - Add Airtable account',
        'Prepare Metadata - Add DDD',
        'Prepare Metadata - Create Submission',
        'Prepare Metadata - Create dataset_description',
        'Prepare Metadata - samples',
        'Prepare Metadata - samples - Generate',
        'Prepare Metadata - samples - Generate - Local',
        'Prepare Metadata - samples - Generate - Pennsieve',
        'Prepare Metadata - samples - Existing',
        'Prepare Metadata - samples - Existing - Local',
        'Prepare Metadata - samples - Existing - Pennsieve',
        'Prepare Metadata - submission',
        'Prepare Metadata - submission - Generate',
        'Prepare Metadata - submission - Generate - Local',
        'Prepare Metadata - submission - Generate - Pennsieve',
        'Prepare Metadata - submission - Existing',
        'Prepare Metadata - submission - Existing - Local',
        'Prepare Metadata - submission - Existing - Pennsieve',
        'Prepare Metadata - dataset_description',
        'Prepare Metadata - dataset_description - Generate',
        'Prepare Metadata - dataset_description - Generate - Local',
        'Prepare Metadata - dataset_description - Generate - Pennsieve',
        'Prepare Metadata - dataset_description - Existing',
        'Prepare Metadata - dataset_description - Existing - Local',
        'Prepare Metadata - dataset_description - Existing - Pennsieve',
        'Prepare Metadata - subjects',
        'Prepare Metadata - subjects - Generate',
        'Prepare Metadata - subjects - Generate - Local',
        'Prepare Metadata - subjects - Generate - Pennsieve',
        'Prepare Metadata - subjects - Existing',
        'Prepare Metadata - subjects - Existing - Local',
        'Prepare Metadata - subjects - Existing - Pennsieve',
        'Prepare Metadata - readme',
        'Prepare Metadata - readme - Generate',
        'Prepare Metadata - readme - Generate - Local',
        'Prepare Metadata - readme - Generate - Pennsieve',
        'Prepare Metadata - readme - Existing',
        'Prepare Metadata - readme - Existing - Local',
        'Prepare Metadata - readme - Existing - Pennsieve',
        'Prepare Metadata - changes',
        'Prepare Metadata - changes - Generate',
        'Prepare Metadata - changes - Generate - Local',
        'Prepare Metadata - changes - Generate - Pennsieve',
        'Prepare Metadata - changes - Existing',
        'Prepare Metadata - changes - Existing - Local',
        'Prepare Metadata - changes - Existing - Pennsieve',
        'Prepare Metadata - manifest',
        'Prepare Metadata - manifest - Generate',
        'Prepare Metadata - manifest - Generate - Local',
        'Prepare Metadata - manifest - Generate - Pennsieve',
        'Prepare Metadata - manifest - Existing',
        'Prepare Metadata - manifest - Existing - Local',
        'Prepare Metadata - manifest - Existing - Pennsieve',
        
        
        'Generate Dataset',
        'Generate Dataset - Local',
        'Generate Dataset - Blackfynn',
        'Generate Dataset - Pennsieve',
        
        
        'Manifest Files Created',
        'Manifest Files Created - Blackfynn', 
        'Manifest Files Created - Pennsieve', 
        'Manifest Files Created - Local',
        
        
        'Download Template - manifest.xlsx',
        'Download Template - manifest.xlsx',
        'Download Template - dataset_description.xlsx',
        'Download Template - subjects.xlsx',
        'Download Template - samples.xlsx',
        'Download Template - submission.xlsx',
        
        
        'Disseminate Dataset - Share with Curation Team', 
        'Disseminate Dataset - Share with Consortium',
        'Disseminate Dataset - Pre-publishing Review',
        
        
        'Prepare Datasets - Organize dataset',
        'Prepare Datasets - Organize dataset - Existing',
        'Prepare Datasets - Organize dataset - Existing - Pennsieve',
        'Prepare Datasets - Organize dataset - Existing - Local',
        'Prepare Datasets - Organize dataset - Existing - Saved',
        
        
        'Prepare Datasets - Organize dataset - Step 3',
        'Prepare Datasets - Organize dataset - Step 3 - Import',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4',
        'Prepare Datasets - Organize dataset - Step 4 - Import',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        
        
        'Prepare Datasets - Organize dataset - Step 7',
        'Prepare Datasets - Organize dataset - Step 7 - Generate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Local',
        
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Create a duplicate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Replace',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Merge',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Skip',
        
        'Disseminate Datasets - Show current dataset permission',
        'Disseminate Datasets - Show current dataset status',
        'Disseminate Datasets - Pre-publishing Review - Integrate ORCID iD',
        'Disseminate Datasets - Pre-publishing Review - Get Excluded Files',
        'Disseminate Datasets - Pre-publishing Review - Get Metadata Files',
        'Disseminate Datasets - Pre-publishing Review - Update excluded files',
        'Disseminate Datasets - Pre-publishing Review - Publish',
        'Disseminate Datasets - Pre-publishing Review - Submit dataset',
        'Disseminate Datasets - Pre-publishing Review - Withdraw dataset',
        'Disseminate Datasets - Pre-publishing Review - Fetch Pre-publishing Checklist Statuses',
        "Disseminate Datasets - Pre-publishing Review - Determine User's Dataset Role",
        "Disseminate Datasets - Pre-publishing Review - Show publishing status"
        
        'Disseminate Datasets - Share with Curation Team',
        "Disseminate Datasets - Share with Curation Team - Remove Consortium's Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Give Consortium Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Work In Progress",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Ready for Curation",
        
        'Disseminate Datasets - Share with Consortium',
        'Disseminate Datasets - Share with Consortium - Removed Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Add Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Curated & Awaiting PI Approval',
        'Disseminate Datasets - Share with Consortium - Change Dataset Status to Under Embargo'
        'Disseminate Datasets - Pre-publishing Review',
               
               
               
               
               
              ]

# start_date = "2021-05-01"
# end_date = "2021-05-30"

In [17]:
dataset_list = []
def datasets_and_actions(start, end, category, action):
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    data = []

    for res in response_rows:
        if res["dimensions"][0] == category:
            if res["dimensions"][1] == action:
                cell_data = [res["dimensions"][2], res["metrics"][0]["values"][0]]
                val = res["dimensions"][2]
                
                if val not in dataset_list:
                    dataset_list.append(val)
#                 data.append(cell_data)

        
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)

#     df = pd.DataFrame(data, columns = ['Dataset_name', 'Values']
#     result_path = os.path.join(folder_path, action + "-" + start_date + "_" + end_date + ".csv")
#     df.to_csv(result_path, encoding='utf-8', index=False)
    return

for i in trange(len(all_actions)):
    action = all_actions[i]
    datasets_and_actions(start, end, "Success", action)    

filtered_dataset_list = list(filter(lambda item: len(item) > 4, dataset_list))
print(len(filtered_dataset_list), "|||||", filtered_dataset_list)

  0%|          | 0/242 [00:00<?, ?it/s]

33 ||||| ['darwin-20.6.0', 'win32-10.0.19042', 'win32-10.0.19043', 'win32-10.0.22000', '5.0.1', '5.1.0', 'onemoretest', 'Test ds 2', 'testingFORSODA', 'testingHERE', 'testingtesttest', 'enteric studies', 'cheerios', 'half-squat', 'Number of folders local dataset', 'Number of files local dataset', '(not set)', 'Generate', 'Existing', 'DatasetTemplate', 'Guided mode', 'test soda 3', 'testing', 'testing321', 'Step 3', 'Import', 'Step 4', 'Local', 'Step 7', 'Dataset', 'Pennsieve', 'Saved', 'Number of Files']


### Get all unique datasets that have had actions performed on them update 2022

In [86]:
def is_valid_uuid(value):
    try:
        uuid.UUID(value)
        return True
    except ValueError:
        return False
    

dataset_list = []

def datasets_and_actions_update(start, end):
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    data = []

    for res in response_rows:
       #  print(res)
        if res["dimensions"][0] == "Success":
            lb = res["dimensions"][2]
                
            # check if the label is a UUID
            is_valid = is_valid_uuid(lb[10:])
            
                
            # if a UUID convert to dataset name 
            if is_valid:
                print(lb)
                print(idNameMap[lb])
                lb = idNameMap[lb]
                
            # check if the Action indicates a Local dataset generated in the Organize Dataset step
            if res["dimensions"][1] == "Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local" and res["dimensions"][2] != "Local":
                if lb not in dataset_list:
                    dataset_list.append(lb)
                
            # check if the label is a dataset name stored in the mapping
            if lb not in idNameMap.values():
                # any dataset that has been worked on will have been selected; creating a dataset id to name mapping
                # if the current label is not a value in that mapping then it is not a dataset name.
                # One side effect is that any 'saved' datasets in the Organize datasets section will not be
                # considered a touched dataset until they have been finalized by either being generated 
                # locally or on Pennsieve.
                # IMP: When a local dataset is created in Organize section we log its name; therefore 
                #      to track those datasets there is a check for ... - Generate - Local to grab the name out of the label
                #      and include it in the dataset list. This is the only kind of 'touched dataset' that will not
                #      have a value in the idNameMap as of January 5th, 2022
                # TODO:When a user creates a dataset in Organize datasets (either a new dataset or a saved one is finalized)
                #      create a datasetID to dataset name log.
                continue
                
            if lb not in dataset_list:
                dataset_list.append(lb)

        
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)


    return


datasets_and_actions_update(start, end)    

print(len(dataset_list), "|||||", dataset_list)

N:dataset:622c8302-ce0f-4de6-9b64-53c892aeb979


KeyError: 'N:dataset:622c8302-ce0f-4de6-9b64-53c892aeb979'

### Get a report of all the events that occurred with a status of either "success" or "error" within a given time frame

In [None]:
query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        'metrics': [{'expression': 'ga:totalEvents'}],
        'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]
both_data, success_data, error_data = [], [], []

for res in response_rows:
    both_cell_data = [res["dimensions"][0], res["dimensions"][1], res["metrics"][0]["values"][0]]
    both_data.append(both_cell_data)
    if res["dimensions"][0] == "Success":
        success_cell_data = [res["dimensions"][1], res["metrics"][0]["values"][0]]
        success_data.append(success_cell_data)
    if res["dimensions"][0] == "Error":
        error_cell_data = [res["dimensions"][1], res["metrics"][0]["values"][0]]
        error_data.append(error_cell_data)

folder_path = os.path.join("result_csv", "status_count")
Path(folder_path).mkdir(parents=True, exist_ok=True)
        
df = pd.DataFrame(both_data, columns = ['Status', 'Action', 'Values'])
result_path = os.path.join(folder_path, "Both-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

df = pd.DataFrame(success_data, columns = ['Action', 'Values'])
result_path = os.path.join(folder_path, "Success-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

df = pd.DataFrame(error_data, columns = ['Action', 'Values'])
result_path = os.path.join(folder_path, "Error-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

### Get a report of all app launches within a given time frame

In [70]:
query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        'metrics': [{'expression': 'ga:totalEvents'}],
        'dimensions': [{'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]
app_launch_os, app_launch_soda = [], []

for res in response_rows:
    if res["dimensions"][0] == "App Launched - OS":
        app_launch_os_cell_data = [res["dimensions"][1], res["metrics"][0]["values"][0]]
        app_launch_os.append(app_launch_os_cell_data)
    if res["dimensions"][0] == "App Launched - SODA":
        app_launch_soda_cell_data = [res["dimensions"][1], res["metrics"][0]["values"][0]]
        app_launch_soda.append(app_launch_soda_cell_data)

folder_path = os.path.join("result_csv", "app_launched")
Path(folder_path).mkdir(parents=True, exist_ok=True)
        
df = pd.DataFrame(app_launch_os, columns = ['OS', 'Values'])
result_path = os.path.join(folder_path, "os-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

df = pd.DataFrame(app_launch_soda, columns = ['SODA Version', 'Values'])
result_path = os.path.join(folder_path, "soda_version-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

### Get a report of all unique users within a given time frame

In [83]:
query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        'metrics': [{'expression': 'ga:users'}],
        'dimensions': [{'name': 'ga:userType'}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]
data = []

for res in response_rows:
    cell_data = [res["dimensions"][0], res["metrics"][0]["values"][0]]
    data.append(cell_data)
    
folder_path = os.path.join("result_csv", "users")
Path(folder_path).mkdir(parents=True, exist_ok=True)
        
df = pd.DataFrame(data, columns = ['Type', 'Values'])
result_path = os.path.join(folder_path, "users-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

### Get a report of all new users within a given time frame

In [81]:
query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        'metrics': [{'expression': 'ga:users'}, {'expression': 'ga:newUsers'}],
        'dimensions': [{'name': 'ga:userType',}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]
data = []

for res in response_rows:
    if res["dimensions"][0] == "New Visitor":
        cell_data = ["New Users", res["metrics"][0]["values"][1]]
        data.append(cell_data)
    
folder_path = os.path.join("result_csv", "new_users")
Path(folder_path).mkdir(parents=True, exist_ok=True)
        
df = pd.DataFrame(data, columns = ['Type', 'Values'])
result_path = os.path.join(folder_path, "users-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

### Get a report of all dataset names for a specific action for a given time frame

In [None]:
def number_of_actions(start, end, category, action):
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    data = []

    for res in response_rows:
        if res["dimensions"][0] == category:
            if res["dimensions"][1] == action:
                cell_data = [res["dimensions"][2], res["metrics"][0]["values"][0]]
                data.append(cell_data)
        
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)

    df = pd.DataFrame(data, columns = ['Dataset_name', 'Values'])
    result_path = os.path.join(folder_path, action + "-" + start + "_" + end + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)
    return

## useful for getting the names of datasets where an action is applicable
## all responses go to the custom folder
# number_of_actions(start_date, end_date, <type>, <action_name>)
number_of_actions(start, end, "Success", "Manage Dataset - Create Empty Dataset")

### Get a report of all actions done on a specific dataset for a given time frame

In [None]:
def dataset_actions(start, end, dataset_name):
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    data = []

    for res in response_rows:
        if res["dimensions"][2].find(dataset_name) != -1 or res["dimensions"][1].find(dataset_name) != -1:
            cell_data = [res["dimensions"][0], res["dimensions"][1], res["metrics"][0]["values"][0]]
            data.append(cell_data)
        
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)

    df = pd.DataFrame(data, columns = ['Status', 'Action', 'Values'])
    result_path = os.path.join(folder_path, dataset_name + "(actions)-" + start + "_" + end + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)
    return

## useful for getting all the actions for a specific dataset
## all responses go to the custom folder
# dataset_actions(start_date, end_date, <Dataset_name>)
dataset_actions(start, end, "test-ps-SODA")

### Get the number of files and the size of all datasets that was uploaded through SODA for a given time frame

In [9]:
def dataset_statistics(start, end):
    data = []
            
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:uniqueEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    
    for res in response_rows:
#         print(res)
        if res["dimensions"][1] == "Upload Local Dataset - Number of Files":
            cell_data = [res["dimensions"][0], res["dimensions"][2], 0]
            data.append(cell_data)
        if res["dimensions"][1] == "Upload Local Dataset - size":
            cell_data = [res["dimensions"][0], 0, res["dimensions"][2]]
            data.append(cell_data)
        if res["dimensions"][1] == "Generate Dataset - Number of Files":
            cell_data = [res["dimensions"][0], res["dimensions"][2], 0]
            data.append(cell_data)
        if res["dimensions"][1] == "Generate Dataset - Size":
            print(res)
            print(res["dimensions"])
            cell_data = [res["dimensions"][0], 0, res["dimensions"][2]]
            data.append(cell_data)
        
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)

    df = pd.DataFrame(data, columns = ['Status', 'Number of Files', 'Size in (bytes)'])
    result_path = os.path.join(folder_path, "dataset_statistics-" + start + "_" + end + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)
    return

## useful for getting all details for upload to Pennsieve for a specific time period
## all responses go to the custom folder
# num_of_files_folders_in_dataset(start_date, end_date)
dataset_statistics(start, end)

{'dimensions': ['Error', 'Generate Dataset - Size', '1'], 'metrics': [{'values': ['2']}]}
['Error', 'Generate Dataset - Size', '1']
{'dimensions': ['Error', 'Generate Dataset - Size', '102290'], 'metrics': [{'values': ['2']}]}
['Error', 'Generate Dataset - Size', '102290']
{'dimensions': ['Error', 'Generate Dataset - Size', '262446205018'], 'metrics': [{'values': ['1']}]}
['Error', 'Generate Dataset - Size', '262446205018']
{'dimensions': ['Success', 'Generate Dataset - Size', 'U01 Test'], 'metrics': [{'values': ['1']}]}
['Success', 'Generate Dataset - Size', 'U01 Test']
{'dimensions': ['Success', 'Generate Dataset - Size', 'U01 Test2'], 'metrics': [{'values': ['1']}]}
['Success', 'Generate Dataset - Size', 'U01 Test2']


### Get the number of files and the size of a specific dataset that was uploaded through SODA

In [None]:
def num_of_files_folders_in_dataset(start_date, end, dataset_name):
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    data = []
    
    for res in response_rows:
        if res["dimensions"][1].find("Upload Local Dataset") != -1:
            if res["dimensions"][1].find(dataset_name) != -1:
                if res["dimensions"][1].find('Number of Files') != -1:
                    cell_data = [res["dimensions"][0], res["dimensions"][2], 0]
                    data.append(cell_data)
                if res["dimensions"][1].find('- size') != -1:
                    cell_data = [res["dimensions"][0], 0, res["dimensions"][2]]
                    data.append(cell_data)
            
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:eventValue'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    res["metrics"][0]["values"][0]
    for res in response_rows:
        if res["dimensions"][1].find("Generate Dataset") != -1:
            if res["dimensions"][2].find(dataset_name) != -1:
                if res["dimensions"][1] == 'Generate Dataset - Number of Files':
                    cell_data = [res["dimensions"][0], res["metrics"][0]["values"][0], 0]
                    data.append(cell_data)
                if res["dimensions"][1] == 'Generate Dataset - Size':
                    cell_data = [res["dimensions"][0], 0, res["metrics"][0]["values"][0]]
                    data.append(cell_data)
                
                    
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)

    df = pd.DataFrame(data, columns = ['Status', 'Number of Files', 'Size in (bytes)'])
    result_path = os.path.join(folder_path, dataset_name + "(details)-" + start + "_" + end + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)
    return

## useful for getting all the number of files and size for a specific dataset
## all responses go to the custom folder
# num_of_files_folders_in_dataset(start_date, end_date, <Dataset_name>)
num_of_files_folders_in_dataset(start, end, "test-ps-SODA")

### Get the number of files and the size of all datasets that was uploaded through SODA for a given time frame 2022 Update

In [133]:
def dataset_statistics_update(start, end):
            
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:uniqueEvents'}, {'expression': 'ga:eventValue'}, {'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, 
                           {'name': 'ga:eventLabel'}],

        }]
    }
    data = []
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    
    for res in response_rows:
        
        
        if res["dimensions"][0] == "Error":
            continue

        if res["dimensions"][1] == "Manage Datasets - Upload Local Dataset - Number of Files" and res["dimensions"][2] == "Number of files local dataset":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], value , res["dimensions"][2]]
            data.append(cell_data)
        if res["dimensions"][1] == "Manage Datasets - Upload Local Dataset - size" and res["dimensions"][2] == "Size":
            value =  int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][2] , value]
            data.append(cell_data)
        if res["dimensions"][1] == "Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Number of Files" and res["dimensions"][2] == "Number of Files":
            value =  int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], value , res["dimensions"][2]]
            data.append(cell_data)
        if res["dimensions"][1] == "Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Size" and res["dimensions"][2] == "Size":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][2], value]
            data.append(cell_data)
            
        # get number of banner image files uploaded
        if res["dimensions"][1] == "Manage Datasets - Upload a Banner Image":
            value = res["metrics"][0]["values"][2]
            cell_data = [res["dimensions"][0], value, res["dimensions"][2]]
        # aggregate size of uploaded banner image files uploaded through SODA
        if res["dimensions"][1] == "Manage Dataset - Upload Banner Image - Size":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][2], value]
            data.append(cell_data)
            
        # count amount of metadata files created -- equivalent to the amount of times a generate action was emitted
        if res["dimensions"][1] == "Prepare Metadata - Generate":
            totalEvents = int(res["metrics"][0]["values"][2])
            cell_data = [res["dimensions"][0], totalEvents, res["dimensions"][2]]
            data.append(cell_data)
        # aggregate size of metadata files created through SODA
        if res["dimensions"][1] == "Prepare Metadata - Generate" and res["dimensions"][2] == "Size of Total Metadata Files Generated":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][2], value]
            data.append(cell_data)
           
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)

    df = pd.DataFrame(data, columns = ['Status', 'Number of Files', 'Size in (bytes)'])
    result_path = os.path.join(folder_path, "dataset_statistics-update" + start + "_" + end + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)
    return

## useful for getting all details for upload to Pennsieve for a specific time period
## all responses go to the custom folder
# num_of_files_folders_in_dataset(start_date, end_date)
dataset_statistics_update(start, end)

{'dimensions': ['Success', 'Manage Datasets - Upload a Banner Image', 'N:dataset:777cb422-2e49-4b6b-ad07-0c0bab277156'], 'metrics': [{'values': ['1', '0', '2']}]}
{'dimensions': ['Success', 'Manage Datasets - Upload a Banner Image', 'N:dataset:adb7e48c-9999-4cd7-8259-37d8a62c424d'], 'metrics': [{'values': ['1', '0', '1']}]}
