# Generate and plot `class_cooccurence` graph for `object_type`s

Can be configured to use `topic` instead of `class`.
Configurable to make the graph for all `object_types`


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from typing import Dict

import datetime
import pprint

import pandas as pd
from IPython.display import display

from phoenix.common import artifacts, utils
from phoenix.tag.graphing import graph_utilities
from phoenix.tag.graphing import run_params as graphing_run_params
from phoenix.tag.graphing import phoenix_graphistry
from phoenix.tag.graphing import class_cooccurences_count

In [None]:
utils.setup_notebook_output()
utils.setup_notebook_logging()

In [None]:
# Parameters
# See phoenix/common/run_datetime.py expected format of parameter
RUN_DATETIME = None
TENANT_ID = None
# See phoenix/common/artifacts/registry_environment.py expected format of parameter
ARTIFACTS_ENVIRONMENT_KEY = "local"

# Filters
YEAR_FILTER = 2022
MONTH_FILTER = 1

OBJECT_TYPE = "facebook_posts"


# Output URL overides (if `None` then will form URLs as per URL mappers and config)
EDGES_URL = None
NODES_URL = None
GRAPHISTRY_REDIRECT_HTML_URL = None

In [None]:
# Constants

# Types
GRAPH_TYPE = "class_cooccurrence"

# Input artifact keys
INPUT_DATASETS_ARTIFACT_KEYS = [
    f"final-{OBJECT_TYPE}_classes", 
    "final-accounts",
]



In [None]:
run_params = graphing_run_params.create(
    artifacts_environment_key=ARTIFACTS_ENVIRONMENT_KEY,
    tenant_id=TENANT_ID,
    run_datetime_str=RUN_DATETIME,
    object_type=OBJECT_TYPE,
    year_filter=YEAR_FILTER,
    month_filter=MONTH_FILTER,
    graph_type=GRAPH_TYPE,
    input_datasets_artifact_keys=INPUT_DATASETS_ARTIFACT_KEYS,
    edges_url=EDGES_URL,
    nodes_url=NODES_URL,
    graphistry_redirect_html_url=GRAPHISTRY_REDIRECT_HTML_URL,
)

In [None]:
pprint.pprint(run_params)

In [None]:
input_datasets: Dict[str, pd.DataFrame] = {
    key: artifacts.dataframes.get(url).dataframe
    for key, url in run_params.urls.input_datasets.items()
}

In [None]:
edges, nodes = class_cooccurences_count.process(
    objects_df=input_datasets[object_type_dataset_key],
    object_id_col="object_id",
    class_col="class",
)

In [None]:
display(edges.info())
display(edges.head())
display(nodes.info())
display(nodes.head())

In [None]:
plot_config = class_cooccurences_count.get_plot_config(class_col="class", object_type=OBJECT_TYPE)

In [None]:
_ = artifacts.dataframes.persist(run_params.urls.edges, edges)
_ = artifacts.dataframes.persist(run_params.urls.nodes, nodes)

In [None]:
graph_url = phoenix_graphistry.plot(
    edges=edges,
    nodes=nodes,
    config=plot_config,
    graph_name_prefix=run_params.general.tenant_config.id
)
graph_url

In [None]:
redirect_html = phoenix_graphistry.form_redirect_html(graph_url)
redirect_html

In [None]:
_ = graph_utilities.save_str_as_html(redirect_html, run_params.urls.graphistry_redirect_html)