In [0]:
pip install hubspot

In [0]:
pip install hubspot-api-client

In [0]:
import pandas as pd

In [0]:
export_companies = spark.read.csv('dbfs:/mnt/alphaplanexports/AdressenExport230712.csv', sep=';', header=True, encoding='latin1')

In [0]:
# companies_df = pd.read_csv('/dbfs/mnt/alphaplanexports/AdressenExport230706.CSV', sep=';',  encoding='latin1')

In [0]:
companies_df = export_companies.toPandas()

In [0]:
companies_df = companies_df.drop('SuchName', axis=1)

In [0]:
len(companies_df), len(companies_df[companies_df['Name1'].notnull()])

In [0]:
def convert_scientific_notation(phone_value):
    phone_string = str(phone_value)
    if ('E+' in phone_string) or ('e+' in phone_string):
        phone_string = phone_string.replace(',','.')
        phone_float = float(phone_string)
        return str(phone_float).split('.')[0]

In [0]:
companies_df['Telefon'] = companies_df['Telefon'].apply(convert_scientific_notation)
companies_df['Telefax'] = companies_df['Telefax'].apply(convert_scientific_notation)

In [0]:
companies_df = companies_df[companies_df['Name1'].notnull()]

In [0]:
mapping = {'AdressenID': 'k_alphaplan_addressenid_hubspot',
           'AdressNummer': 'k_alphaplan_adressnummer_hubspot',
           'Name1' : 'name',
           'Ort' : 'city',
           'Postleitzahl' : 'zip',
           'Strasse' : 'address',
           'Telefon': 'phone',
           'LaenderKuerzel': 'laenderkuerzel',
            'Postfach': 'postfach',
            'PostfachPostleitzahl': 'postfach_postleitzahl',
            'PostfachOrt': 'postfach_ort',
            'Telefax': 'telefax' ,
            'Email': 'email',
            'Information': 'information',
            'Internet': 'website',
            'LieferantenKundenNummer': 'lieferanten_kunden_nummer',
            'PreisStufe1': 'preisstufe_1',
            'PreisStufe2': 'preisstufe_2',
            'SVPrioritaet': 'prioritaet'
}
companies_df = companies_df.rename(columns=mapping)

In [0]:
companies_records = companies_df.to_dict(orient='records')

In [0]:
n_records = len(companies_records)
print('--------companies records to be uploaded :',n_records)

In [0]:
def split_list_into_chunks(lst, chunk_size):
    """
    Split a list into chunks of a specified size.
    
    Args:
        lst (list): The list to split.
        chunk_size (int): The size of each chunk.
    
    Returns:
        list: A list of lists, where each inner list represents a chunk.
    """
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

In [0]:
companies_batches = split_list_into_chunks(companies_records, 100)

In [0]:
def get_hubspot_upload_format(batch):
    upload_list = []
    for batch_entry in batch:
        upload_list_entry = { "properties": batch_entry }
        upload_list.append(upload_list_entry)   
    return upload_list

In [0]:
import hubspot
from pprint import pprint
from hubspot.crm.companies import BatchInputSimplePublicObjectInputForCreate, ApiException

api_upload_key = dbutils.secrets.get(scope="key-vault-secrets",key="upload-api-accesstoken")

client = hubspot.Client.create(access_token=api_upload_key)

def upload_batch_companies(upload_list, client):
    batch_input_simple_public_object_input_for_create =   \
                    BatchInputSimplePublicObjectInputForCreate(inputs= upload_list
                                                                )
    try:
        api_response = client.crm.companies.batch_api.create(batch_input_simple_public_object_input_for_create=batch_input_simple_public_object_input_for_create)
        n_uploads = len(api_response.results)
        return n_uploads
    except ApiException as e:
        print("Exception when calling batch_api->create: %s\n" % e)

In [0]:
# test_batch = companies_batches[0][:30]
# upload_list = get_hubspot_upload_format(test_batch)
# n_uploads = upload_batch_companies(upload_list, client)
# print('--------companies records uploaded successfully:',n_uploads)

In [0]:
n_uploads = 0
counter = 1
for b in companies_batches:    
    upload_list = get_hubspot_upload_format(b)
    n_uploads_inc = upload_batch_companies(upload_list, client)
    print('-----uploaded batch :', counter, ',number of records :', n_uploads_inc)
    n_uploads += n_uploads_inc
    counter += 1

In [0]:
print('--------companies records uploaded successfully:',n_uploads)