In [3]:
from salesforce_bulk import SalesforceBulk, CsvDictsAdapter
from simple_salesforce import Salesforce
import pandas as pd
import os

ss_client = Salesforce(
    username=os.getenv("SF_USERNAME"),
    password=os.getenv("SF_PASSWORD"),
    security_token=os.getenv("SF_TOKEN")
)

sb_client = SalesforceBulk(
    username=os.getenv("SF_USERNAME"),
    password=os.getenv("SF_PASSWORD"),
    security_token=os.getenv("SF_TOKEN")
)

# Validate Schema

In [4]:
df = pd.read_csv("../data/property.csv").head(100000).drop('Unnamed: 0', axis=1)

In [5]:
schema = [x['name'] for x in ss_client.Property__c.describe()['fields']]
schema

['Id',
 'OwnerId',
 'IsDeleted',
 'Name',
 'CreatedDate',
 'CreatedById',
 'LastModifiedDate',
 'LastModifiedById',
 'SystemModstamp',
 'LastViewedDate',
 'LastReferencedDate',
 'Price__c',
 'Agent_ID__c',
 'Street_Address__c',
 'Current_Status__c',
 'Unique_Street_Adddress__c']

In [6]:
df.columns

Index(['Name', 'Price__c', 'Agent_ID__c', 'Current_Status__c',
       'Street_Address__c', 'Unique_Street_Adddress__c'],
      dtype='object')

In [7]:
assert set(df.columns).difference(schema) == set()

# Upload

In [9]:
def upload(sb_client, object_name, records, chunk_size=10000):
    job_id = sb_client.create_insert_job(object_name, contentType='CSV')
    batch_ids = []

    for index in range(0, len(records), chunk_size):
        record_batch = records[index:index+chunk_size]
        csv_iter = CsvDictsAdapter(iter(record_batch))
        batch_id = sb_client.post_batch(job_id, csv_iter)
        sb_client.wait_for_batch(job_id, batch_id)
        batch_ids.append(batch_id)


    return batch_ids

In [10]:
records = df.to_dict('records')

batch_ids = upload(sb_client, 'Property__c', records)

KeyboardInterrupt: 

In [25]:
records = df.to_dict('records')
csv_iter = CsvDictsAdapter(iter(records))

In [18]:
csv_iter.next()

b'"Name","Price__c","Agent_ID__c","Current_Status__c","Street_Address__c","Unique_Street_Adddress__c"\r\n"34537 Jason Drive Suite 281, North Ashleyborough, NE 57689",678335,"a05Hp000014GIXoIAO","Listed","34537 Jason Drive Suite 281, North Ashleyborough, NE 57689","34537 Jason Drive Suite 281, North Ashleyborough, NE 57689"\r\n'

In [26]:
job = sb_client.create_insert_job("Property__c", contentType='CSV')

In [14]:
batch = sb_client.post_batch(job, csv_iter)
sb_client.wait_for_batch(job, batch)

BulkBatchFailed: Batch 751Hp00001fx5hhIAA of job 750Hp00001CR1pFIAT failed: InvalidBatch : Field name not found : 58794 Sarah Point, Moorebury, ND 30749

In [None]:
# sb_client.close_job(job)

In [None]:
sb_client.is_batch_done(batch)

In [12]:
sb_client.get_batch_results(batch)

NameError: name 'batch' is not defined

# Check counts

In [19]:
ss_client.query("Select count(Name) from Property__c")

OrderedDict([('totalSize', 1),
             ('done', True),
             ('records',
              [OrderedDict([('attributes',
                             OrderedDict([('type', 'AggregateResult')])),
                            ('expr0', 10007)])])])