In [None]:
!pip install -qU openai tiktoken langchain_openai pydantic_settings

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "..."
os.environ["OPENAI_API_BASE"] = "..."

In [None]:
from experiments.exp2025_03_27_create_graphs_by_keys.keys2graph.pipeline import Pipeline
import json
import numpy as np
import matplotlib.pyplot as plt

In [None]:
key_list = ["topic", "sub_topic", "bot_goal", "success_criteria", "context_info", "language", "formality_level", "emotional_tone", "lexical_diversity", "use_of_jargon",
"max_dialog_depth", "max_branching_factor", "mandatory_nodes", "optional_nodes", "start_node", "user_intents", "intent_hierarchy", "required_slots",
"follow_up_questions", "fallback_strategy", "confirmation_needed", "max_dialog_length", "alternate_paths", "escalation_policy", "user_feedback_collection",
"user_persona", "dynamic_content"]

In [None]:
#remove keys for the experiment
key_list.remove("mandatory_nodes")

In [None]:
ROOT = "/content/experiments/exp2025_03_27_create_graphs_by_keys"
src_graphs   = f"{ROOT}/test_original_graphs/graphs_level_1.json"
similar_json = f"{ROOT}/triplet_similarity.json"

In [None]:
pipe = Pipeline()

In [None]:
#select a file with original dialog graphs
all_data = pipe.load_dialog_graphs(src_graphs)

In [None]:
#load annotated data (if it's ready)
with open("/content/annotated_data.json", encoding="utf-8") as f:
    pipe._original_graphs_annotation = json.load(f)

In [None]:
#select original dialog graphs: "all" or list if ids
test_graphs = pipe.add_graphs_to_test("all", source_file)

In [None]:
# generate graphs by keys and save data
gen_out = "generated_data.json"
pipe.generate_graphs_by_keys(
    #keys=["topic", "sub_topic"],  # or "all"
    keys=key_list,
    model_name="o1-mini",
    temperature=0.0, #set 0 for "o1-mini"
    output_file=gen_out
)

In [None]:
#calculate metrics
pipe.calculate_graphs_similarity(similar_json)

In [None]:
#metrics visualisation
with open(similar_json, encoding="utf-8") as f:
    data = json.load(f)

similarity_values = [p["similarity_avg"] for p in data.get("pairs", [])]

mean_val = np.mean(similarity_values) if similarity_values else 0.0
var_val  = np.var(similarity_values)  if similarity_values else 0.0

print(f"Average similarity_avg : {mean_val:.4f}")
print(f"Dispersion             : {var_val:.6f}")

plt.figure(figsize=(8, 4))
bins = max(5, min(10, len(similarity_values)//2 or 5))
plt.hist(similarity_values,
         bins=bins,
         edgecolor="black",        # контур столбца
         linewidth=1.2,
         alpha=0.85)
plt.title("Distribution of similarity_avg across graph pairs")
plt.xlabel("similarity_avg")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()
