In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

import pandas as pd
import tentaclio

from phoenix.common import artifacts, run_params, utils
from phoenix.tag.topic import single_feature_match_topic_config as sfm_topic_config
from phoenix.tag.labelling import generate_label_sheet 
from phoenix.tag.labelling import utils as labelling_utils
from phoenix.tag import normalise

In [None]:
utils.setup_notebook_output()
utils.setup_notebook_logging()

In [None]:
# Parameters
# See phoenix/common/run_datetime.py expected format of parameter
RUN_DATETIME = None
TENANT_ID = None

# See phoenix/common/artifacts/registry_environment.py expected format of parameter
ARTIFACTS_ENVIRONMENT_KEY = "local"

# Filters for batch
YEAR_FILTER = 2021
# Without zero padding
MONTH_FILTER = 8
OBJECT_TYPE = "facebook_posts"


In [None]:
# OUTPUT
SPREADSHEET_NAME = f"{TENANT_ID}_manual_data_labelling"
ACCOUNT_OBJECT_TYPE = labelling_utils.get_account_object_type(OBJECT_TYPE)
WORKSHEET_NAME = f"{ACCOUNT_OBJECT_TYPE}_to_label"

cur_run_params = run_params.general.create(ARTIFACTS_ENVIRONMENT_KEY, TENANT_ID, RUN_DATETIME)
    
url_config = {
    "YEAR_FILTER": int(YEAR_FILTER),
    "MONTH_FILTER": int(MONTH_FILTER),
    "OBJECT_TYPE": OBJECT_TYPE,
}

TAGGING_RUNS_URL_FEATURES_FOR_TAGGING_FOLDER = cur_run_params.art_url_reg.get_url("tagging_runs-objects_for_tagging", url_config)
TENANT_FOLDER_ID = cur_run_params.tenant_config.google_drive_folder_id

In [None]:
# Display params.
print(
TAGGING_RUNS_URL_FEATURES_FOR_TAGGING_FOLDER,
cur_run_params.run_dt.dt,
cur_run_params.tenant_config,
YEAR_FILTER,
MONTH_FILTER,
ACCOUNT_OBJECT_TYPE,
WORKSHEET_NAME,
sep='\n',
)

In [None]:
google_client = artifacts.google_sheets.get_client()

In [None]:
labeled_objects_df = artifacts.google_sheets.get(
    google_client, TENANT_FOLDER_ID, SPREADSHEET_NAME, WORKSHEET_NAME
)

In [None]:
df = normalise.merge(TAGGING_RUNS_URL_FEATURES_FOR_TAGGING_FOLDER)

In [None]:
df

In [None]:
appendable_data_df = labelling_utils.filter_out_duplicates(labeled_objects_df, df, "object_user_url")

In [None]:
appendable_data_df

In [None]:
account_labelling_df = generate_label_sheet.create_account_labelling_dataframe(
    appendable_data_df, with_user_notes=(len(labeled_objects_df)==0)
).fillna("")

In [None]:
labelling_df_to_push = labeled_objects_df.append(account_labelling_df).fillna("")

In [None]:
artifacts.google_sheets.persist(
    google_client, TENANT_FOLDER_ID, SPREADSHEET_NAME, WORKSHEET_NAME, labelling_df_to_push
)