In [139]:
# this is a parameter that will get overwritten when run by papermill on a schedules
is_local_development = True

In [140]:
!python -m pip install gitlabdata --upgrade



In [141]:
import configparser

# import sys
import pandas as pd
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import json, os
from pyprojroot import here
from os import environ as env

In [142]:
from gitlabdata.orchestration_utils import (
    data_science_engine_factory,
    query_dataframe,
    snowflake_engine_factory,
    snowflake_stage_load_copy_remove,
    get_env_from_profile,
    dataframe_uploader,
    write_to_gsheets,
    read_from_gsheets,
    query_executor,
    query_from_file,
)

## Create Snowflake engine

In [143]:
# engine factory can be created using a local role from output
# depending on this notebook being run locally or remotely, the
# engine is creation process is different

if is_local_development:
    snowflake_engine = data_science_engine_factory(
        profile_target="sales_analytics_local"
    )
else:
    snowflake_engine = snowflake_engine_factory(env, "SALES_ANALYTICS")

    raw_db_name = env["SNOWFLAKE_LOAD_DATABASE"]
    prod_db_name = env["SNOWFLAKE_PROD_DATABASE"]

snowflake_engine

Engine(snowflake://nfiguera%40gitlab.com:***@gitlab/RAW/?authenticator=externalbrowser&role=NFIGUERA&warehouse=DEV_XS)

## Credentials for Gsheet manipulation

Remember to give access to the following two users:

- Data Team runner: data-team-sheets-sa@gitlab-analysis.iam.gserviceaccount.com
- Sales Strategy service account: service-revenue-strat-analytic@revenue-strategy-anal-411d5a72.iam.gserviceaccount.com

In [144]:
# read the credentials of the google service account
if is_local_development:
    credentials_path = here("credentials/rsa_gcloud_service_account.json")
    # credentials_path = here("credentials/gsheet_service_file.json")

    with open(credentials_path) as f:
        service_account_credentials = f.read().replace("\n", "")

    # set the credential as a enviroment variable
    os.environ["GSHEETS_SERVICE_ACCOUNT_CREDENTIALS"] = service_account_credentials

# SAL Heatmap daily ETL update process

This notebook is run daily to update the SAL Heatmap source file. 

The process is responsible of updating the following datasets:

1. QTD SAL Extracts
2. Quotas
3. Regional Hist Coverages

In [145]:
# SOURCE SAL Heatmap Sheet ID
SOURCE_GSHEET_ID = '1Ufh2DKwTq25IHzZeaCo0bFyQcFNvCtC3bVnR3oOZz-g'

## Excute Snowflake query

In [146]:
# QTD metrics
qtd_metrics = query_from_file(snowflake_engine, "sh_qtd_metrics_for_sal_heatmap.sql")
qtd_metrics["last_extracted_etl"] = date.today()

In [147]:
# Test
qtd_metrics.qtd_booked_net_arr.sum()

21551678.799999997

In [148]:
# Historical coverages
hist_coverage = query_from_file(snowflake_engine, "sh_hist_qtr_coverages_for_sal_heatmap.sql")
hist_coverage["last_extracted_etl"] = date.today()

In [149]:
# Quotas
fy24_quota = query_from_file(snowflake_engine, "sh_xactly_fy24_quotas.sql")
fy24_quota["last_extracted_etl"] = date.today()

In [150]:
# Test
fy24_quota.fy24.sum()

277513293.53999996

In [151]:
# Employee ID
empl_list = query_from_file(snowflake_engine, "sh_empid_list.sql")
empl_list["last_extracted_etl"] = date.today()

## QTD Metrics Extracts

### Write to SAL HEatmap source gSheet file

In [152]:
# Write to GSheets
sheet_id = SOURCE_GSHEET_ID
sheet_name = "jupyter_qtd_sal"
qtd_metrics.columns = qtd_metrics.columns.str.upper()
write_to_gsheets(sheet_id, sheet_name, qtd_metrics)

ERROR:root:<HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/1Ufh2DKwTq25IHzZeaCo0bFyQcFNvCtC3bVnR3oOZz-g:batchUpdate?fields=replies%2FaddSheet&alt=json returned "Invalid requests[0].addSheet: A sheet with the name "jupyter_qtd_sal" already exists. Please enter another name.">


## Historical QTR Coverages Data Extract

In [153]:
# Write to GSheets
sheet_id = SOURCE_GSHEET_ID
sheet_name = "jupyter_hist_qtr_aggregated"
hist_coverage.columns = hist_coverage.columns.str.upper()
write_to_gsheets(sheet_id, sheet_name, hist_coverage)

ERROR:root:<HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/1Ufh2DKwTq25IHzZeaCo0bFyQcFNvCtC3bVnR3oOZz-g:batchUpdate?fields=replies%2FaddSheet&alt=json returned "Invalid requests[0].addSheet: A sheet with the name "jupyter_hist_qtr_aggregated" already exists. Please enter another name.">


## FY24 Quota

In [154]:
# Write to GSheets
sheet_id = '1Pa3bh2wp1mPCtln0_3mH-r627IEJA5yofAFAir_kvBo'
sheet_name = "jupyter_fy_quota"
write_to_gsheets(sheet_id, sheet_name, fy24_quota)

ERROR:root:<HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/1Pa3bh2wp1mPCtln0_3mH-r627IEJA5yofAFAir_kvBo:batchUpdate?fields=replies%2FaddSheet&alt=json returned "Invalid requests[0].addSheet: A sheet with the name "jupyter_fy_quota" already exists. Please enter another name.">


## Employee List 

In [155]:
# Write to GSheets
sheet_id = SOURCE_GSHEET_ID
sheet_name = "jupyter_empl_id"
write_to_gsheets(sheet_id, sheet_name, empl_list)

ERROR:root:<HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/1Ufh2DKwTq25IHzZeaCo0bFyQcFNvCtC3bVnR3oOZz-g:batchUpdate?fields=replies%2FaddSheet&alt=json returned "Invalid requests[0].addSheet: A sheet with the name "jupyter_empl_id" already exists. Please enter another name.">
