In [0]:
pip install hubspot

In [0]:
pip install hubspot-api-client

## AdressNummer vs. HSContactID

In [0]:
import hubspot
from pprint import pprint
from hubspot.crm.contacts import ApiException
import pandas as pd

api_key = dbutils.secrets.get(scope="key-vault-secrets",key="api-accesstoken")
client = hubspot.Client.create(access_token=api_key)

def get_adressnummer_vs_contactid():
    batches_list = []
    next_after = None

    while True:
        api_response = client.crm.contacts.basic_api.get_page(limit=100, archived=False, after=next_after, properties=['k_alphaplan_adressnummer_hubspot'])
        paging = api_response.paging
        results = api_response.results 
        if results == []:
            break
        else:
            if paging != None:   
                next_after = api_response.paging.next.after    
                contacts_batch = []        
                for c in results:
                    obj = { 'adressnummer': c.properties['k_alphaplan_adressnummer_hubspot'],
                            'hs_contact_id': c.id
                    }
                    contacts_batch.append( obj)
                batches_list.append(contacts_batch)
            else:
                contacts_batch = []        
                for c in results:
                    obj = { 'adressnummer': c.properties['k_alphaplan_adressnummer_hubspot'],
                            'hs_contact_id': c.id
                    }
                    contacts_batch.append(obj)
                batches_list.append(contacts_batch)
                break
    flattened_list = [element for sublist in batches_list for element in sublist]
    df = pd.DataFrame(flattened_list)
    df = df.rename(columns={'adressnummer':'AdressNummer'})
    return df

In [0]:
df_adressnummer_vs_contactid = get_adressnummer_vs_contactid()
df_adressnummer_vs_contactid

Unnamed: 0,AdressNummer,hs_contact_id
0,11259,63201
1,14122,63202
2,IN0027,63203
3,16020,63204
4,10105,63205
...,...,...
9480,IN7621,75056
9481,IN7644,75101
9482,IN7631,75151
9483,16222,75201


## AdressenID vs. HSCompanyID

In [0]:
def get_adressenid_vs_hscompanyid():
    batches_list = []
    next_after = None

    while True:
        api_response = client.crm.companies.basic_api.get_page(limit=100, archived=False, after=next_after, properties=['k_alphaplan_addressenid_hubspot'])
        paging = api_response.paging
        results = api_response.results 
        if results == []:
            break
        else:
            if paging != None:   
                next_after = api_response.paging.next.after    
                contacts_batch = []        
                for c in results:
                    obj = { 'addressenid': c.properties['k_alphaplan_addressenid_hubspot'],
                            'hs_company_id': c.id
                    }
                    contacts_batch.append( obj)
                batches_list.append(contacts_batch)
            else:
                contacts_batch = []        
                for c in results:
                    obj = { 'addressenid': c.properties['k_alphaplan_addressenid_hubspot'],
                            'hs_company_id': c.id
                    }
                    contacts_batch.append(obj)
                batches_list.append(contacts_batch)
                break
    flattened_list = [element for sublist in batches_list for element in sublist]
    AdressenID_vs_HSCompanyID = pd.DataFrame(flattened_list)
    AdressenID_vs_HSCompanyID = AdressenID_vs_HSCompanyID.rename(columns={'addressenid':'AdressenID'})
    AdressenID_vs_HSCompanyID['AdressenID'] = AdressenID_vs_HSCompanyID['AdressenID'].astype('int')
    return AdressenID_vs_HSCompanyID

In [0]:
AdressenID_vs_HSCompanyID = get_adressenid_vs_hscompanyid()

In [0]:
AdressenID_vs_HSCompanyID

Unnamed: 0,AdressenID,hs_company_id
0,1901,8047972080
1,1387,8047972081
2,70,8047972082
3,2862,8047972083
4,2561,8047972084
...,...,...
8426,11588,8049607105
8427,11610,8049607106
8428,11631,8049607107
8429,11590,8049607108


## AdressenID vs. AdressNummer

In [0]:
def get_adressenid_vs_adressnummer():
    AdressenID_vs_AdressNummer = pd.read_csv('/dbfs/mnt/alphaplanexports/Adressnummern230710.CSV', sep=';',  encoding='latin1')
    AdressenID_vs_AdressNummer = AdressenID_vs_AdressNummer[AdressenID_vs_AdressNummer['AdressNummer'].notnull()]
    return AdressenID_vs_AdressNummer

In [0]:
AdressenID_vs_AdressNummer = get_adressenid_vs_adressnummer()
AdressenID_vs_AdressNummer

Unnamed: 0,AdressenID,AdressNummer
1,2,70001
2,3,70003
3,4,70004
4,5,70006
5,6,70007
...,...,...
8453,11621,16449
8454,11622,16450
8455,11623,16451
8456,11624,16452


## AdressNummer vs. HSCompanyID

In [0]:
AdressNummer_vs_HSCompanyID = AdressenID_vs_AdressNummer.merge(AdressenID_vs_HSCompanyID, on='AdressenID', how='inner')[['AdressNummer', 'hs_company_id']]

In [0]:
AdressNummer_vs_HSCompanyID

Unnamed: 0,AdressNummer,hs_company_id
0,70001,8049506788
1,70003,8049582797
2,70004,8047996377
3,70006,8049583070
4,70007,8049539273
...,...,...
8423,16440,8049607101
8424,16441,8049606891
8425,16442,8049606901
8426,16444,8049607100


## HSContactID vs. HSCompanyID

In [0]:
HSContactID_vs_HSCompanyID = df_adressnummer_vs_contactid.merge(AdressNummer_vs_HSCompanyID, on='AdressNummer', how='inner')[['hs_contact_id', 'hs_company_id']]

In [0]:
HSContactID_vs_HSCompanyID

Unnamed: 0,hs_contact_id,hs_company_id
0,63204,8049606872
1,74086,8049606872
2,74279,8049606872
3,63207,8049538500
4,68072,8049538500
...,...,...
8586,75006,8049540066
8587,75052,8049607099
8588,75055,8049540074
8589,75056,8049540061


In [0]:
associations_df = HSContactID_vs_HSCompanyID
associations_df = associations_df.rename(columns={'hs_contact_id':'fromObjectId', 'hs_company_id': 'toObjectId'})
associations_df['category'] = 'HUBSPOT_DEFINED'
associations_df['definitionId'] = 1

In [0]:
associations_df

Unnamed: 0,fromObjectId,toObjectId,category,definitionId
0,63204,8049606872,HUBSPOT_DEFINED,1
1,74086,8049606872,HUBSPOT_DEFINED,1
2,74279,8049606872,HUBSPOT_DEFINED,1
3,63207,8049538500,HUBSPOT_DEFINED,1
4,68072,8049538500,HUBSPOT_DEFINED,1
...,...,...,...,...
8586,75006,8049540066,HUBSPOT_DEFINED,1
8587,75052,8049607099,HUBSPOT_DEFINED,1
8588,75055,8049540074,HUBSPOT_DEFINED,1
8589,75056,8049540061,HUBSPOT_DEFINED,1


In [0]:
associations_records = associations_df.to_dict(orient='records')

In [0]:
def split_list_into_chunks(lst, chunk_size):
    """
    Split a list into chunks of a specified size.
    
    Args:
        lst (list): The list to split.
        chunk_size (int): The size of each chunk.
    
    Returns:
        list: A list of lists, where each inner list represents a chunk.
    """
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

In [0]:
import requests
import json


UPLOAD_API_TOKEN = dbutils.secrets.get(scope="key-vault-secrets",key="upload-api-accesstoken")

blob_storage_key = dbutils.secrets.get(scope="key-vault-secrets",key="blob-storage-key")

url = 'https://api.hubapi.com/crm-associations/v1/associations/create-batch'


def associate_batch(batch, url):
    # Convert the contact data to JSON format
    data = json.dumps(batch)

    # Set up the request headers
    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer ' + UPLOAD_API_TOKEN
    }

    # Make the API request
    response = requests.put(url, headers=headers, data=data)

    # Print the response status code and content
    if response.status_code == 204:
        print("Associations were successfully created in Hubspot.") 
    else:
        print("An error occurred while importing contacts to Hubspot. Status code: ", response.status_code, response.content)

In [0]:
counter = 1
batches_to_associate = split_list_into_chunks(associations_records, 100)
print('---------number of batches:', len(batches_to_associate))
for batch in batches_to_associate:
    associate_batch(batch, url)
    print('---------processed batch:', counter)
    counter+=1