# Bid Winner
add explantation in getting messy data to the salesforce ecosystem

All data contained in this repository, as well as any information queried or conveyed through its use, is publicly accessible and readily available on the internet. No proprietary, confidential, or restricted data is included or utilized.

This concept was created in my free time and is not used at my employer.

## Imports
and some configuration

In [42]:
import sys
sys.path.insert(0, '../scripts')
from govspend_search import GoveSpendSearch
from rfp_parser import RFPParse
from flis_search import FlisSearch
from parts_base import PartsBase
from simple_salesforce import Salesforce
import pandas as pd
import os
from dotenv import load_dotenv
import io
load_dotenv()

search_id = "679aba32c9c299c7b31ade4f"
PATH_TO_DLL = "C:\\Users\\jackmchugh\\Downloads\\PublogDVD\\TOOLS\\MS12\\DecompDl64.dll"
PATH_TO_FEDLOG = "C:\\Users\\jackmchugh\\Downloads\\PublogDVD"

output_dir = "../datasets/demo_output/"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, "target_list.csv")
output_manufacturer_file = os.path.join(output_dir, "manufacturer_information.csv")


## Helper Classes/Methods

In [9]:
pb = PartsBase()
govspend = GoveSpendSearch()
flis = FlisSearch(path_to_dll=PATH_TO_DLL, path_to_fedlog=PATH_TO_FEDLOG)
rfp_parser = RFPParse()

In [27]:

sf_domain = os.getenv('sf_domain')
sf_username = os.getenv('sf_username')
sf_password = os.getenv('sf_password')
sf_token = os.getenv('sf_token')


sf = Salesforce(
    username=sf_username,
    password=sf_password,
    security_token=sf_token,
    domain=sf_domain
   
)

In [28]:
def get_bulk2_results(result):
    combined_failed = pd.DataFrame()
    combined_success = pd.DataFrame()
    
    for job in result:
        job_id = job['job_id']
        failed = sf.bulk2.Product2.get_failed_records(job_id)
        success = sf.bulk2.Product2.get_successful_records(job_id)
        
        #since the results are returned as CSV strings, we need to convert them to DataFrames
        success = pd.read_csv(io.StringIO(success))
        failed = pd.read_csv(io.StringIO(failed))
        
        failed['job_id'] = job_id
        success['job_id'] = job_id
        
        combined_failed = pd.concat([combined_failed, failed], ignore_index=True)
        combined_success = pd.concat([combined_success, success], ignore_index=True)
        
        
        return combined_success, combined_failed

## Process Data

### Run GovSpend Search

In [10]:
govspend_result = govspend.search(search_id)
combined_result_list = []
manufacturer_result_list = []

### Process Bids

In [11]:
count = 0

for bid in govspend_result['result']:
    parse_result = rfp_parser.parse_rfp(bid['description'])
    flis_result = flis.get_part_number_and_description(parse_result["niin"])

    ref_pn = (
        parse_result.get("part_number", "").strip()
        or flis_result.get("part_number", "").split(",")[0].strip()
        or "NO PART NUMBER"
    )

    #num_pb_results = pb.get_live_qty(ref_pn) I no longer have access to the PartsBase API, so this line is commented out.
    num_pb_results = 0  # Placeholder for PartsBase results

    result = {
        "bid_number": bid["bidNumber"],
        "ref_pn": ref_pn,
        "parse_desc": parse_result["description"],
        "qty_requested": parse_result["quantity"],
        "num_qty_on_pb": num_pb_results,
        "approved_source": parse_result["approved_source"],
        "lead_time": parse_result["lead_time"],
        "incumbent": parse_result["incumbent"],
        "multiple_award": parse_result["multiple_award"],
        "approved_source_codes": parse_result["approved_source_codes"],
        "date_create": bid["postedDate"],
        "date_due": bid["dueDate"],
        "psc_code": bid["PSCCode"],
        "set_aside": bid["setAside"],
        "NSN": parse_result["nsn"],
        "flis_pn": flis_result["part_number"],
        "parse_pn": parse_result["part_number"],
        "flis_desc": parse_result["description"]
    }

    print(result)
    count += 1
    print(f"Number of BIDs processed", count)
    combined_result_list.append(result)

    manufacturer_data = flis.get_cage_codes_and_pricing(parse_result["niin"])
    if not manufacturer_data.empty:
        manufacturer_result_list.append(manufacturer_data)

{'bid_number': 'SPE4A725T591A', 'ref_pn': 'C5B205333', 'parse_desc': 'RELEASE, MECHANISM', 'qty_requested': 8, 'num_qty_on_pb': 0, 'approved_source': '', 'lead_time': '473 DAYS ADO for Line 0001, 180 DAYS ADO for Line 0002', 'incumbent': '', 'multiple_award': 'no', 'approved_source_codes': [], 'date_create': '2025-07-02T12:00:00Z', 'date_due': '2025-07-10T12:00:00Z', 'psc_code': '16 - AEROSPACE CRAFT COMPONENTS AND ACCESSORIES', 'set_aside': 'SBA - Total Small Business Set-Aside (FAR 19.5)', 'NSN': '1680012305966', 'flis_pn': 'C5B205333', 'parse_pn': '', 'flis_desc': 'RELEASE, MECHANISM'}
Number of BIDs processed 1
{'bid_number': 'SPE4A725T591G', 'ref_pn': '9R3886', 'parse_desc': 'NRP, RESTRICTOR CHEC', 'qty_requested': 3, 'num_qty_on_pb': 0, 'approved_source': '1WAC1 9R3886; 99240 9R3886', 'lead_time': '114 days', 'incumbent': '', 'multiple_award': 'no', 'approved_source_codes': ['1WAC1 9R3886', '99240 9R3886'], 'date_create': '2025-07-02T12:00:00Z', 'date_due': '2025-07-10T12:00:00Z'

### Save the Results

In [46]:
df = pd.DataFrame(combined_result_list)
df.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")

if manufacturer_result_list:
    df_manufacturer = pd.concat(manufacturer_result_list, ignore_index=True)
    df_manufacturer.to_csv(output_manufacturer_file, index=False)
    print(f"Manufacturer data saved to {output_manufacturer_file}")

Data saved to ../datasets/demo_output/target_list.csv
Manufacturer data saved to ../datasets/demo_output/manufacturer_information.csv


## Bring Into Salesforce

We will be adding these bids to salesforce as a customer quote for Acme Inc.

In [47]:
#Look at what we are dealing with in the dataframe and count the number of bids processed
print(f"Total number of bids processed: {len(df)}")
df.head(1)

Total number of bids processed: 3


Unnamed: 0,bid_number,ref_pn,parse_desc,qty_requested,num_qty_on_pb,approved_source,lead_time,incumbent,multiple_award,approved_source_codes,date_create,date_due,psc_code,set_aside,NSN,flis_pn,parse_pn,flis_desc
0,SPE4A725T591A,C5B205333,"RELEASE, MECHANISM",8,0,,"473 DAYS ADO for Line 0001, 180 DAYS ADO for L...",,no,[],2025-07-02T12:00:00Z,2025-07-10T12:00:00Z,16 - AEROSPACE CRAFT COMPONENTS AND ACCESSORIES,SBA - Total Small Business Set-Aside (FAR 19.5),1680012305966,C5B205333,,"RELEASE, MECHANISM"


### Wrangle the Data

We will need to create a datframe what can be used to upload into the customer quote line, let's start by renaming some of the columns

In [60]:
df['combined_string'] = df.astype(str).agg(' | '.join, axis=1)

df_quote_lines = df.rename(columns={
    "bid_number": "inscor__Customer_Reference_Line__c",
    "ref_pn": "Name",
    "parse_desc": "inscor__Keyword__c",
    "qty_requested": "inscor__Quantity_Requested__c",
    "num_qty_on_pb": "Number of Quantity on PartsBase",
    "approved_source": "Approved Source",
    "lead_time": "inscor__Lead_Time__c",
    "incumbent": "Incumbent",
    "multiple_award": "Multiple Award",
    "approved_source_codes": "Approved Source Codes",
    "date_create": "Date Created",
    "date_due": "inscor__Core_Due__c",
    "psc_code": "PSC Code",
    "set_aside": "Set Aside",
    "NSN": "inscor__NSN1__c",
    "flis_pn": "FLIS Part Number",
    "parse_pn": "Parsed Part Number",
    "flis_desc": "FLIS Description"
})

df_parts = df_quote_lines[['Name', 'inscor__Keyword__c', 'inscor__NSN1__c']].copy() #hard copy
df_parts['inscor__Keyword__c'] = df_parts['inscor__Keyword__c'].astype(str).str[:14] #max length salesforce will allow is 15
df_quote_lines.head()

Unnamed: 0,inscor__Customer_Reference_Line__c,Name,inscor__Keyword__c,inscor__Quantity_Requested__c,Number of Quantity on PartsBase,Approved Source,inscor__Lead_Time__c,Incumbent,Multiple Award,Approved Source Codes,Date Created,inscor__Core_Due__c,PSC Code,Set Aside,inscor__NSN1__c,FLIS Part Number,Parsed Part Number,FLIS Description,combined_string
0,SPE4A725T591A,C5B205333,"RELEASE, MECHANISM",8,0,,"473 DAYS ADO for Line 0001, 180 DAYS ADO for L...",,no,[],2025-07-02T12:00:00Z,2025-07-10T12:00:00Z,16 - AEROSPACE CRAFT COMPONENTS AND ACCESSORIES,SBA - Total Small Business Set-Aside (FAR 19.5),1680012305966,C5B205333,,"RELEASE, MECHANISM","SPE4A725T591A | C5B205333 | RELEASE, MECHANISM..."
1,SPE4A725T591G,9R3886,"NRP, RESTRICTOR CHEC",3,0,1WAC1 9R3886; 99240 9R3886,114 days,,no,"[1WAC1 9R3886, 99240 9R3886]",2025-07-02T12:00:00Z,2025-07-10T12:00:00Z,16 - AEROSPACE CRAFT COMPONENTS AND ACCESSORIES,SBA - Total Small Business Set-Aside (FAR 19.5),1680016225767,9R3886,,"NRP, RESTRICTOR CHEC","SPE4A725T591G | 9R3886 | NRP, RESTRICTOR CHEC ..."
2,SPE4A725T590U,5030T92P03,"SEAL ASSEMBLY,CARBO",18,0,75370 5030T92P03; 99207 5030T92P03,163 DAYS ADO,,no,"[75370, 99207]",2025-07-02T12:00:00Z,2025-07-10T12:00:00Z,28 - ENGINES AND TURBINES AND COMPONENT,SBA - Total Small Business Set-Aside (FAR 19.5),2840011506660,"5030T92P03,201100,10158655",,"SEAL ASSEMBLY,CARBO","SPE4A725T590U | 5030T92P03 | SEAL ASSEMBLY,CAR..."


Error handling could be improved in this example, but our merging strategy involves enabling a trigger to prevent duplicate Product2 names, then pulling down the full list of Product2 IDs to merge against. While you could filter using LastModifiedDate or CreatedDate, even with two million rows.  However, it’s not recommended to pull down that many rows on every run, especially in production environment. Please form a caching stratagy if you have to run this frequently (delta pulling CDC)


In [44]:
sql_query = """
SELECT Id, Name
FROM Product2
"""

result = sf.bulk2.Account.download(
    sql_query,
    path='../datasets',
    max_records=2000000,
)

file_path = result[0]['file']
sf_parts = pd.read_csv(file_path)

In [61]:

df_insert = df_parts[~df_parts['Name'].isin(sf_parts['Name'])]
df_insert.head()


Unnamed: 0,Name,inscor__Keyword__c,inscor__NSN1__c
0,C5B205333,"RELEASE, MECHA",1680012305966
2,5030T92P03,"SEAL ASSEMBLY,",2840011506660


In [None]:
records=df_insert.to_dict(orient='records')
result = sf.bulk2.Product2.insert(records=records, concurrency=100)
print(result)

#check the results
success, fail = get_bulk2_results(result)
print(f"Total records inserted: {len(success)}")
display(success.head(3))
print(f"Total records failed: {len(fail)}")
display(fail.head(3))

#Yes you could (should) certainly use the rest API for this amount of data, but I already put BulkAPI in the presentation title
#There is no backing out now.

#add success results to the DataFrame
sf_parts = pd.concat([sf_parts, success['sf__Id', 'Name']], ignore_index=True)

[{'numberRecordsFailed': 0, 'numberRecordsProcessed': 2, 'numberRecordsTotal': 2, 'job_id': '750ep000003F8Q9AAK'}]
Total records inserted: 2


Unnamed: 0,sf__Id,sf__Created,inscor__Keyword__c,inscor__NSN1__c,Name,job_id
0,01tep000002OHWSAA4,True,"RELEASE, MECHA",1680012305966,C5B205333,750ep000003F8Q9AAK
1,01tep000002OHWTAA4,True,"SEAL ASSEMBLY,",2840011506660,5030T92P03,750ep000003F8Q9AAK


Total records failed: 0


Unnamed: 0,sf__Id,sf__Error,inscor__Keyword__c,inscor__NSN1__c,Name,job_id


In [None]:
#drop dupes then merge, then remove uneeded columns