Loading leads and opportunities records to match up these ID's to develop a dataset containing:

- lead id
- opportunity id
- lead date created
- opportunity date created
- opportunity close date
- opportunity close stage (win, loss)

stored to S3:sfc-export/leads_to_opportunities.csv

In [1]:
import pandas as pd
import numpy as np
import sys

sys.path.insert(1, '../../../scripts/')
from s3_support import *

### Looking at opportunity IDs

In [2]:
_ = list_files("sfc-export", search_key="opp")

Opportunity.csv (8MB)
OpportunityContactRole.csv (1MB)
OpportunityHistory.csv (14MB)
OpportunityLineItem.csv (2MB)
OpportunityLineItemSchedule.csv (1MB)
--------------------------------------------------
Matched files: 5 files (0.0GB)
Bucket sfc-export contains 98 files (3.5GB)


In [3]:
opps_url = get_file_url("sfc-export", "Opportunity.csv")
opps = pd.read_csv(opps_url, encoding="ISO-8859-1", low_memory=False)
opps.head(3)

Unnamed: 0,Id,IsDeleted,AccountId,RecordTypeId,IsPrivate,Name,Description,StageName,StageSortOrder,Amount,...,Auction_Size_of_Event__c,Auction_Beta_Tester__c,Auction_Goal_Amount__c,Auction_Competitor_Used__c,Auction_Comments__c,n2de__DE_Status__c,n2de__DE_Territory__c,n2de__Time_to_action_mins__c,Interested_in_Auctions__c,Parent_Account__c
0,0063100000XynhfAAB,0,0013100001Xns2hAAB,,0,One Lord One Body Ministries-,,Closed Won,4,199.0,...,,,,,,,,,,
1,0063100000XyoGSAAZ,0,0013100001XnsVMAAZ,,0,J.L. Mann Spirit Week,,Closed Lost,5,199.0,...,,,,,,,,,,
2,0063100000XypDpAAJ,0,0013100001XntERAAZ,,0,"Cat Coalition, Inc.",,Closed Won,4,199.0,...,,,,,,,,,,


In [4]:
len(opps)

7972

In [5]:
opps[[c for c in opps.columns if 'Stage' in c]].head(2)

Unnamed: 0,StageName,StageSortOrder,LastStageChangeDate,readymade_inv__Inventory_Qualify_Stage__c,Partner_Stage__c
0,Closed Won,4,2015-07-22 13:49:58,75.0,
1,Closed Lost,5,2015-08-03 15:40:33,75.0,


In [6]:
opps['CloseDate'].describe()

count                    7972
unique                   1564
top       2019-12-31 00:00:00
freq                      235
Name: CloseDate, dtype: object

### Looking at lead IDs

In [7]:
leads_url = get_file_url("sfc-export", "Lead.csv")
leads = pd.read_csv(leads_url, encoding="ISO-8859-1", low_memory=False)
leads[[c for c in leads.columns if 'Id' in c]].head(3)

Unnamed: 0,Id,MasterRecordId,RecordTypeId,OwnerId,ConvertedAccountId,ConvertedContactId,ConvertedOpportunityId,CreatedById,LastModifiedById,JigsawContactId,IndividualId,bizible2__BizibleId__c,mkto2__Acquisition_Program_Id__c
0,00Q31000012IRiDEAW,,,005i0000001iPy1AAE,0013100001XoZgFAAV,0033100002ksvBvAAI,0063100000XzU2aAAF,005i0000001hjDIAAY,00531000006kRT2AAM,,000000000000000AAA,,
1,00Q31000012ITYBEA4,,,005i0000001iPsDAAU,0013100001Xnv0SAAR,0033100002krg0AAAQ,0063100000XyrV3AAJ,005i0000001hjDIAAY,005i0000001iPsDAAU,,000000000000000AAA,,
2,00Q31000012IUAMEA4,,,005i0000001iPy1AAE,0013100001dzrPUAAY,0033100002tE9R3AAK,0063100000afCpAAAU,005i0000001hjDIAAY,005i0000001iPy1AAE,,000000000000000AAA,,


### Mapping lead IDs to opportunity IDs with created dates

In [8]:
leads_and_opps = leads[['Id', 'ConvertedOpportunityId', 'CreatedDate']]
leads_and_opps.columns = ['lead_id', 'opportunity_id', 'lead_date']

In [9]:
leads_and_opps.head(3)

Unnamed: 0,lead_id,opportunity_id,lead_date
0,00Q31000012IRiDEAW,0063100000XzU2aAAF,2015-07-19 20:36:11
1,00Q31000012ITYBEA4,0063100000XyrV3AAJ,2015-07-20 15:57:41
2,00Q31000012IUAMEA4,0063100000afCpAAAU,2015-07-20 18:02:44


In [10]:
leads_and_opps = leads_and_opps.merge(opps[['Id', 'CreatedDate', 'CloseDate', 'StageName']], left_on='opportunity_id', right_on="Id")

leads_and_opps['opportunity_date'] = leads_and_opps['CreatedDate']
leads_and_opps['opportunity_closedate'] = leads_and_opps['CloseDate']
leads_and_opps['opportunity_stage'] = leads_and_opps['StageName']

leads_and_opps.drop(['Id', 'CreatedDate', 'CloseDate', 'StageName'], axis=1, inplace=True)

In [11]:
leads_and_opps.head(3)

Unnamed: 0,lead_id,opportunity_id,lead_date,opportunity_date,opportunity_closedate,opportunity_stage
0,00Q31000012IRiDEAW,0063100000XzU2aAAF,2015-07-19 20:36:11,2015-07-28 18:32:52,2015-07-28 00:00:00,Closed Won
1,00Q31000012ITYBEA4,0063100000XyrV3AAJ,2015-07-20 15:57:41,2015-07-20 20:27:09,2015-07-31 00:00:00,Closed Lost
2,00Q31000012IUAMEA4,0063100000afCpAAAU,2015-07-20 18:02:44,2016-01-07 15:15:24,2015-09-01 00:00:00,Closed Lost


In [12]:
len(leads_and_opps)

7014

In [14]:
save_dataframe_to_file("sfc-export", 'leads_to_opportunities.csv', leads_and_opps)

uploading to S3
Done
