In [11]:
from salesforce_bulk import SalesforceBulk
from simple_salesforce import Salesforce
from faker import Faker
import pandas as pd
import random
import os

ss_client = Salesforce(
    username=os.getenv("SF_USERNAME"),
    password=os.getenv("SF_PASSWORD"),
    security_token=os.getenv("SF_TOKEN")
)

sb_client = SalesforceBulk(
    username=os.getenv("SF_USERNAME"),
    password=os.getenv("SF_PASSWORD"),
    security_token=os.getenv("SF_TOKEN")
)

In [5]:
property_spec = ss_client.Property__c.describe()

In [6]:
property_spec['fields'][0]

OrderedDict([('aggregatable', True),
             ('aiPredictionField', False),
             ('autoNumber', False),
             ('byteLength', 18),
             ('calculated', False),
             ('calculatedFormula', None),
             ('cascadeDelete', False),
             ('caseSensitive', False),
             ('compoundFieldName', None),
             ('controllerName', None),
             ('createable', False),
             ('custom', False),
             ('defaultValue', None),
             ('defaultValueFormula', None),
             ('defaultedOnCreate', True),
             ('dependentPicklist', False),
             ('deprecatedAndHidden', False),
             ('digits', 0),
             ('displayLocationInDecimal', False),
             ('encrypted', False),
             ('externalId', False),
             ('extraTypeInfo', None),
             ('filterable', True),
             ('filteredLookupInfo', None),
             ('formulaTreatNullNumberAsZero', False),
             ('g

In [7]:
[x['name'] for x in property_spec['fields']]

['Id',
 'OwnerId',
 'IsDeleted',
 'Name',
 'CreatedDate',
 'CreatedById',
 'LastModifiedDate',
 'LastModifiedById',
 'SystemModstamp',
 'LastViewedDate',
 'LastReferencedDate',
 'Price__c',
 'Agent_ID__c',
 'Street_Address__c',
 'Current_Status__c',
 'Unique_Street_Adddress__c']

In [47]:
agents = [x['Id'] for x in ss_client.query("Select id from Agent__c")['records']]
agents

['a05Hp000014GDkCIAW',
 'a05Hp000014GIXiIAO',
 'a05Hp000014GIXjIAO',
 'a05Hp000014GIXnIAO',
 'a05Hp000014GIXoIAO',
 'a05Hp000014GIXsIAO',
 'a05Hp000014GIXtIAO',
 'a05Hp000014GIbBIAW']

In [63]:
fake = Faker()

def generate_data(rows):
    data = []
    for _ in range(rows):
        row = {'Name': fake.unique.address().replace('\n', ', '),
               'Price__c': random.randint(50000, 1000000),
               'Agent_ID__c': random.choice(agents),
               'Current_Status__c': random.choice(['Listed', 'Offered', 'Pending Sale', 'Sold'])
              }
        data.append(row)
    return data

df = pd.DataFrame(generate_data(100000))
df.shape

(100000, 4)

In [64]:
df.iloc[0]

Name                 4319 Contreras Park Apt. 112, Mayfurt, GU 03362
Price__c                                                      348467
Agent_ID__c                                       a05Hp000014GIXiIAO
Current_Status__c                                            Offered
Name: 0, dtype: object

In [65]:
columns = ['Name','Price__c','Agent_ID__c','Street_Address__c', 'Current_Status__c', 'Unique_Street_Adddress__c']

In [66]:
df['Street_Address__c'] = df['Name']
df['Unique_Street_Adddress__c'] = df['Name']

In [67]:
assert set(df.columns).difference(columns) == set()

In [68]:
df['Street_Address__c'].nunique()

100000

In [69]:
df.to_csv("../data/property.csv")

In [70]:
df['Agent_ID__c'][0]

'a05Hp000014GIXiIAO'