In [1]:
import ast

import json

import pandas as pd

raw_path = f"./data/schema/ontology.csv"
output_path = f"./data/schema/ontology.json"

colors = [
    "#ff6961",
    "#ffb480",
    "#f8f38d",
    "#42d6a4",
    "#08cad1",
    "#59adf6",
    "#5BC236",
    "#c780e8",
    "#f66d9b",
    "#9561e2",
]

# Renaming.
col_map = {
    f"domain": "domain",
    f"slot_name": "slot",
    f"possible_slot_values": "value",
    "categorical_values": "categorical_values",
    "extractive_values": "extractive_values",
    "abstractive_values": "abstractive_values",
    "time_values": "time_values",
    "Group ID": "group_id",
    "Must have": "must_have",
    "Global": "global",
    "Important slot(have chance to be improved)": "important_slot",
    "Parking Lot": "Parking Lot",
    "Driveway": "Driveway",
    "Highway": "Highway",
    "Roadway": "Roadway",
    "Intersection": "Intersection",
}
df = pd.read_csv(raw_path)[col_map.keys()]
df = df.rename(columns=col_map)
df["value"] = df["value"].apply(
    lambda x: list(map(lambda x: x if x.isupper()
                   else x.title(), ast.literal_eval(x)))
)
df = df.drop_duplicates(subset=["domain", "slot"])
df

Unnamed: 0,domain,slot,value,categorical_values,extractive_values,abstractive_values,time_values,group_id,must_have,global,important_slot,Parking Lot,Driveway,Highway,Roadway,Intersection
0,Adjuster,Explain Coverages,[],0,1,0,0,,,,,,,,,
1,Adjuster,Permission to Record,"[Yes, No]",1,0,0,0,,1.0,,,,,,,
2,Adjuster,Set up Inspection,"[Photo Claim, Field Assignment]",1,0,0,0,,,,,,,,,
3,Adjuster,Set up Rental,"[Yes, No]",1,0,0,0,,,,,,,,,
4,ContactInfo,First Name,[],0,1,0,0,1.0,1.0,,1.0,,,,,
5,ContactInfo,Last Name,[],0,1,0,0,1.0,1.0,,1.0,,,,,
6,ContactInfo,Home Address,[],0,1,0,0,1.0,,,,,,,,
7,ContactInfo,Phone Number,[],0,1,0,0,1.0,,,,,,,,
8,ContactInfo,Email Address,[],0,1,0,0,,,,,,,,,
9,ContactInfo,Policy Number,[],0,1,0,0,,,,,,,,,


In [2]:
schema = []
for domain_idx, (domain, g) in enumerate(df.groupby("domain")):
    slots = []
    for slot_idx, (_, row) in enumerate(g.iterrows()):
        slot = row["slot"]
        values = []
        for value_idx, value in enumerate(row["value"]):
            values.append({
                "id": f"{domain_idx}-{slot_idx}-{value_idx}",
                "val": value,
            })
        slots.append({
            "id": f"{domain_idx}-{slot_idx}",
            "val": slot,
            "categorical_value": bool(row["categorical_values"]),
            "abstractive_value": bool(row["abstractive_values"]),
            "extractive_value": bool(row["extractive_values"]),
            "time_value": bool(row["time_values"]),
            "values": values,
        })
    schema.append({
        "id": f"{domain_idx}",
        "val": domain,
        "color": colors[domain_idx],
        "slots": slots,
    })


In [3]:
referents = [
    "Global",
    "Caller",
    "Other Driver",
    "Caller's Passenger",
    "Other Driver's Passenger",
    "Witness",
]

referent_schema = []
for ref_id, ref in enumerate(referents):
    referent_schema.append(
        {
            "id": ref_id,
            "val": ref,
            "color": "#f9800",
        }
    )

schema = {
    "schema": schema,
    "referent_schema": referent_schema,
}

with open(output_path, "w") as f:
    json.dump(schema, f, indent=4)