In [None]:
import json

import pandas as pd

**Read the raw JSON data**

For each type: `tickets`, `companies`, `customers`, `statuses`, `users` -- build
an index for easier lookup later

In [None]:
with open("data/desk/tickets.json") as fp:
    desk_tickets_json = json.load(fp)
    print("tickets:", len(desk_tickets_json))
    
desk_tickets_json_index = { ticket["id"]: ticket for ticket in desk_tickets_json}
desk_threads_json = [{ **{"ticketId": ticket["id"]}, **thread} for ticket in desk_tickets_json for thread in ticket["threads"]]

In [None]:
with open("data/desk/companies.json") as fp:
    desk_companies_json = json.load(fp)
    print("companies:", len(desk_companies_json))

desk_companies_json_index = {
    company["name"]: company
    for company in desk_companies_json
    if company["name"]
}

In [None]:
desk_customers_json = []

for file in ["data/desk/customers-1.json", "data/desk/customers-2.json"]:
    with open(file) as fp:
        desk_customers_json.extend(json.load(fp))

print("customers:", len(desk_customers_json))

desk_customers_json_index = { customer["id"]: customer for customer in desk_customers_json}

In [None]:
with open("data/desk/statuses.json") as fp:
    desk_statuses_json = json.load(fp)

desk_statuses_json_index = { status["id"]: status["name"] for status in desk_statuses_json}

In [None]:
with open("data/desk/users.json") as fp:
    desk_users_json = json.load(fp)
    print("users:", len(desk_users_json))

desk_users_json_index = { user["id"]: user for user in desk_users_json}

**Helpers for creating Hubspot formatted data**

Using the raw JSON data from above, convert to Hubspot field names/format

In [None]:
def get_ticket_data(ticket):
    ticket_data = {
        "ticket_id": ticket["id"],
        "ticket_status": desk_statuses_json_index.get(ticket["ticketStatusID"], "CLOSED"),
        "ticket_name": f"{ticket['subject']} ({ticket['id']})",
        "ticket_pipeline": "Support Pipeline"
    }
    return ticket_data

In [None]:
def get_contact_data(ticket):
    contact_data = {}
    customer_id = ticket.get("customerID")
    if customer_id and customer_id in desk_customers_json_index:
        customer = desk_customers_json_index[customer_id]
        contact_data = {
            "contact_first_name": customer["firstName"],
            "contact_last_name": customer["lastName"],
            "contact_email": customer["email"],
            "contact_job_title": customer["jobTitle"],
        }
    return contact_data

In [None]:
def get_company_data(ticket):
    company_data = {}
    customer_id = ticket.get("customerID")
    if customer_id and customer_id in desk_customers_json_index:
        customer = desk_customers_json_index[customer_id]
        company = desk_companies_json_index.get(customer["company"])
        if company:
            company_data = {
                "company_name": company["name"],
                "company_domain_name": company["website"]
            }
    return company_data

In [None]:
def get_user_data(ticket):
    user_data = {}
    assigned_id = ticket.get("assignedToID")
    if assigned_id and assigned_id in desk_users_json_index:
        user = desk_users_json_index[assigned_id]
        user_data = {
            "ticket_owner": user["email"],
        }
    return user_data

**Tickets conversion**

Generate new Hubspot ticket records

In [None]:
hubspot_tickets_records = []

for ticket in desk_tickets_json:
    ticket_data = get_ticket_data(ticket)
    contact_data = get_contact_data(ticket)
    company_data = get_company_data(ticket)
    user_data = get_user_data(ticket)

    new_record = {
        **ticket_data,
        **contact_data,
        **company_data,
        **user_data
    }

    hubspot_tickets_records.append(new_record)

print(len(hubspot_tickets_records))

ticket_columns = list(set([t for tk in hubspot_tickets_records for t in tk.keys()]))
ticket_columns.sort(reverse=True)
print(ticket_columns)

**Convert Hubspot ticket data to CSV**

Using `pandas`

In [None]:
ticket_columns.remove("ticket_id")
tickets_df = pd.DataFrame.from_records(hubspot_tickets_records, columns=ticket_columns)
tickets_df.info()

In [None]:
tickets_df.to_csv("data/hubspot/tickets.csv", index=False)

**Threads conversion**

Generate new Hubspot ticket thread (notes) records

In [None]:
hubspot_tickets_records_index = {ticket["ticket_id"]: ticket for ticket in hubspot_tickets_records}

In [None]:
hubspot_note_records = []

for thread in desk_threads_json:
    hubspot_ticket = hubspot_tickets_records_index[thread["ticketId"]]

    new_record = {
        "note_activity_date": thread["createdAt"],
        "ticket_status": hubspot_ticket["ticket_status"],
        "ticket_name": hubspot_ticket["ticket_name"],
        "ticket_pipeline": hubspot_ticket["ticket_pipeline"],
        "note_body": str(thread["body"]).replace("\r\n", "\n").replace("\r", "\n").replace("\n", "")
    }

    hubspot_note_records.append(new_record)

print(len(hubspot_note_records))

note_columns = list(set([n for nt in hubspot_note_records for n in nt.keys()]))
note_columns.sort(reverse=True)

print(note_columns)

**Convert Hubspot notes data to CSV**

Using `pandas`

In [None]:
notes_df = pd.DataFrame.from_records(hubspot_note_records, columns=note_columns)
notes_df.info()

In [None]:
notes_df.to_csv("data/hubspot/notes.csv", index=False)