## Setup

Requires a HubSpot Private app and an API access token with the following scopes:

- `crm.schemas.custom.read`
- `crm.objects.custom.read`
- `crm.objects.custom.write`
- `crm.objects.companies.read`
- `crm.schemas.contacts.read`
- `crm.objects.contacts.read`
- `crm.schemas.companies.read`
- `sales-email-read`

In [None]:
import os

from hubspot import HubSpot
import pandas as pd


ACCESS_TOKEN = os.environ["HUBSPOT_ACCESS_TOKEN"]
ASSOCIATION_TYPES = ["calls", "emails", "meetings", "notes", "tasks"]

hubspot = HubSpot(access_token=ACCESS_TOKEN)

In [None]:
def write_json_records(df: pd.DataFrame, file_path: str):
    """Helper writes the DataFrame into a JSON file."""
    df.to_json(f"data/{file_path}", orient="records", indent=2)

In [None]:
# request company properties
company_props = hubspot.crm.properties.core_api.get_all(object_type="companies", archived=False)

In [None]:
# read company properties into DataFrame
company_props_df = pd.json_normalize(company_props.to_dict(), "results")
write_json_records(company_props_df, "company_props.json")

In [None]:
# request companies data
companies = hubspot.crm.companies.get_all(properties=["company_type", "domain", "name"], associations=ASSOCIATION_TYPES)
companies = [c.to_dict() for c in companies]

In [None]:
# read companies data into DataFrame
companies_df = pd.json_normalize(companies)
companies_df["properties.domain"] = companies_df["properties.domain"].astype("category")
write_json_records(companies_df, "companies.json")

In [None]:
# look at the unique company types defined
companies_df["properties.company_type"].unique().tofile("data/company_types.txt", sep=os.linesep)

In [None]:
# filter vendor companies into new DataFrame
vendor_companies_df = companies_df[companies_df["properties.company_type"] == "Vendor"]
write_json_records(vendor_companies_df, "companies_vendors.json")

In [None]:
# request vendor properties
vendor_props = hubspot.crm.properties.core_api.get_all(object_type="vendors", archived=False)

In [None]:
# read vendor properties into DataFrame
vendor_props_df = pd.json_normalize(vendor_props.to_dict(), "results")
write_json_records(vendor_props_df, "vendor_props.json")

In [None]:
# request vendor data
vendors = hubspot.crm.objects.get_all("vendors", properties=["domain", "vendor_name"])
vendors = [v.to_dict() for v in vendors]

In [None]:
# read vendor data into DataFrame
vendors_df = pd.json_normalize(vendors)
vendors_df["properties.domain"] = vendors_df["properties.domain"].astype("category")
write_json_records(vendors_df, "vendors.json")

# filter any custom object vendors missing their domain property
# these were used for testing the vendor object import
vendors_df = vendors_df[~vendors_df["properties.domain"].isna()]
write_json_records(vendors_df, "vendors_with_domains.json")

In [None]:
# confirming associations exist for the vendor custom object
vendor_associations_df = pd.DataFrame(columns=["name", "id"])
for association_type in ASSOCIATION_TYPES:
    vendor_associations = hubspot.crm.associations.schema.types_api.get_all("vendors", association_type)
    df = pd.json_normalize(vendor_associations.to_dict(), "results")
    vendor_associations_df = vendor_associations_df.merge(df, how="outer")
write_json_records(vendor_associations_df, "vendor_associations.json")

In [None]:
# confirming reverse associations exist for the vendor custom object
reverse_vendor_associations_df = pd.DataFrame(columns=["name", "id"])
for association_type in ASSOCIATION_TYPES:
    reverse_vendor_associations = hubspot.crm.associations.schema.types_api.get_all(association_type, "vendors")
    df = pd.json_normalize(reverse_vendor_associations.to_dict(), "results")
    reverse_vendor_associations_df = reverse_vendor_associations_df.merge(df, how="outer")
write_json_records(reverse_vendor_associations_df, f"vendor_associations_reverse.json")