# Combine Hunter.io Contacts, gSheet and Email Manual

### Dependencies and Setup

In [1]:
# import dependencies
import pandas as pd

### Dataset Combination

In [2]:
# pull in the hunter.io contacts first
hunter_io = pd.read_csv('data_files/hubspot_contact_upload_08112019.csv')
hunter_io = hunter_io.drop(columns=['Unnamed: 0'])
hunter_io = hunter_io.rename(columns={'title': 'jobTitle',
                                     'profileUrl': 'linkedInUrl'})
hunter_io['personalEmail'] = ''
hunter_io['industry'] = ''
hunter_io['freeLinkedInMail'] = ''
hunter_io['reachedOut'] = ''
hunter_io['message'] = ''
hunter_io = hunter_io[['firstName', 'lastName', 'jobTitle', 'companyName',
                       'industry', 'linkedInUrl', 'location',
                       'freeLinkedInMail',
                       'email', 'personalEmail',
                      'reachedOut', 'message']]
hunter_io.head()

Unnamed: 0,firstName,lastName,jobTitle,companyName,industry,linkedInUrl,location,freeLinkedInMail,email,personalEmail,reachedOut,message
0,Microshare,Data,Director Data Science,Microshare.io,,https://www.linkedin.com/sales/people/ACwAACLv...,Greater Philadelphia Area,,mdata@microshare.io,,,
1,Ujjwal,S.,Director Data Science,Capgemini,,https://www.linkedin.com/sales/people/ACwAAA2W...,Greater New York City Area,,s-ujjwal@capgemini.com,,,
2,Juliette,Tabet,Director Data Science,Netflix,,https://www.linkedin.com/sales/people/ACwAAAGy...,San Francisco Bay Area,,jtabet@netflix.com,,,
3,Thomas,Wiecki,Director Data Science,Quantopian,,https://www.linkedin.com/sales/people/ACwAAAlP...,"Cologne Area, Germany",,twiecki@quantopian.com,,,
4,Palak,Mazumdar,Director Data Science,IBM,,https://www.linkedin.com/sales/people/ACwAABXw...,"Bengaluru Area, India",,palakmazumdar@bluemix.net,,,


In [3]:
# load in the gSheet contacts
gsheet_contacts = pd.read_csv('data_files/Potential_Leads_CogniTech - HubSpot.csv')
gsheet_contacts.head()

Unnamed: 0,Full name,Job title,Company name,Industry,LinkedIn point of contact,LinkedIn URL,State/Region,Free LinkedIn InMail,Email,Work email,Reached Out,Message
0,Julie Wang,"Director, Data Science",Confluent,Software,,https://www.linkedin.com/in/juliewanglinkedin,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
1,Christine Nicolas,"Director, Strategy & Analytics",Gainsight,Software,,https://www.linkedin.com/in/christinenicolas,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
2,Tina Wisner,"VP, Marketing Analytics",Feld Direct,Marketing and Advertising,,https://www.linkedin.com/in/tinawisner,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
3,Ivan Hom,Head of Data Science,NetApp,Tech,,https://www.linkedin.com/in/ivan-hom-phd-0873501,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
4,Robin Chiang,Director of Product Experience Analytics,Facebook,Internet,,https://www.linkedin.com/in/robinrchiang,SF Bay Area,Yes,,,Messaged on LinkedIn,No response


In [4]:
# create firstName and lastName columns from 'Full name'
def first_name_last_name(full_name, first_last='first'):
    if first_last == 'first':
        return full_name.split(' ')[0]
    else:
        return full_name.split(' ')[1]

gsheet_contacts['firstName'] = gsheet_contacts.apply(lambda row: first_name_last_name(row['Full name'], 'first'), axis=1)
gsheet_contacts['lastName'] = gsheet_contacts.apply(lambda row: first_name_last_name(row['Full name'], 'last'), axis=1)
gsheet_contacts.head()

Unnamed: 0,Full name,Job title,Company name,Industry,LinkedIn point of contact,LinkedIn URL,State/Region,Free LinkedIn InMail,Email,Work email,Reached Out,Message,firstName,lastName
0,Julie Wang,"Director, Data Science",Confluent,Software,,https://www.linkedin.com/in/juliewanglinkedin,SF Bay Area,Yes,,,Messaged on LinkedIn,No response,Julie,Wang
1,Christine Nicolas,"Director, Strategy & Analytics",Gainsight,Software,,https://www.linkedin.com/in/christinenicolas,SF Bay Area,Yes,,,Messaged on LinkedIn,No response,Christine,Nicolas
2,Tina Wisner,"VP, Marketing Analytics",Feld Direct,Marketing and Advertising,,https://www.linkedin.com/in/tinawisner,SF Bay Area,Yes,,,Messaged on LinkedIn,No response,Tina,Wisner
3,Ivan Hom,Head of Data Science,NetApp,Tech,,https://www.linkedin.com/in/ivan-hom-phd-0873501,SF Bay Area,Yes,,,Messaged on LinkedIn,No response,Ivan,Hom
4,Robin Chiang,Director of Product Experience Analytics,Facebook,Internet,,https://www.linkedin.com/in/robinrchiang,SF Bay Area,Yes,,,Messaged on LinkedIn,No response,Robin,Chiang


In [5]:
# rename column headers
gsheet_contacts = gsheet_contacts.rename(columns={'Job title': 'jobTitle',
                                                 'Company name': 'companyName',
                                                 'Industry': 'industry',
                                                 'LinkedIn URL': 'linkedInUrl',
                                                 'State/Region': 'location',
                                                 'Free LinkedIn InMail': 'freeLinkedInMail',
                                                 'Email': 'personalEmail',
                                                 'Work email': 'email',
                                                 'Reached Out': 'reachedOut',
                                                 'Message': 'message'})

# use same order of columns
gsheet_contacts = gsheet_contacts[['firstName', 'lastName', 'jobTitle', 'companyName',
                       'industry', 'linkedInUrl', 'location',
                       'freeLinkedInMail',
                       'email', 'personalEmail',
                      'reachedOut', 'message']]
gsheet_contacts.head()

Unnamed: 0,firstName,lastName,jobTitle,companyName,industry,linkedInUrl,location,freeLinkedInMail,email,personalEmail,reachedOut,message
0,Julie,Wang,"Director, Data Science",Confluent,Software,https://www.linkedin.com/in/juliewanglinkedin,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
1,Christine,Nicolas,"Director, Strategy & Analytics",Gainsight,Software,https://www.linkedin.com/in/christinenicolas,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
2,Tina,Wisner,"VP, Marketing Analytics",Feld Direct,Marketing and Advertising,https://www.linkedin.com/in/tinawisner,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
3,Ivan,Hom,Head of Data Science,NetApp,Tech,https://www.linkedin.com/in/ivan-hom-phd-0873501,SF Bay Area,Yes,,,Messaged on LinkedIn,No response
4,Robin,Chiang,Director of Product Experience Analytics,Facebook,Internet,https://www.linkedin.com/in/robinrchiang,SF Bay Area,Yes,,,Messaged on LinkedIn,No response


### Combine

In [6]:
combined_data = pd.concat([hunter_io, gsheet_contacts])
len(combined_data)

676

In [7]:
len(gsheet_contacts) + len(hunter_io)

676

In [8]:
combined_data.head()

Unnamed: 0,firstName,lastName,jobTitle,companyName,industry,linkedInUrl,location,freeLinkedInMail,email,personalEmail,reachedOut,message
0,Microshare,Data,Director Data Science,Microshare.io,,https://www.linkedin.com/sales/people/ACwAACLv...,Greater Philadelphia Area,,mdata@microshare.io,,,
1,Ujjwal,S.,Director Data Science,Capgemini,,https://www.linkedin.com/sales/people/ACwAAA2W...,Greater New York City Area,,s-ujjwal@capgemini.com,,,
2,Juliette,Tabet,Director Data Science,Netflix,,https://www.linkedin.com/sales/people/ACwAAAGy...,San Francisco Bay Area,,jtabet@netflix.com,,,
3,Thomas,Wiecki,Director Data Science,Quantopian,,https://www.linkedin.com/sales/people/ACwAAAlP...,"Cologne Area, Germany",,twiecki@quantopian.com,,,
4,Palak,Mazumdar,Director Data Science,IBM,,https://www.linkedin.com/sales/people/ACwAABXw...,"Bengaluru Area, India",,palakmazumdar@bluemix.net,,,


### Manually Reached Contacts

In [18]:
# load in manually reached out to contacts
manual_contacts = pd.read_csv('data_files/manual_reached_contacts.csv')
manual_contacts = manual_contacts.drop(columns=['Unnamed: 0'])
manual_contacts.head()

Unnamed: 0,firstName,lastName,jobTitle,companyName,industry,linkedInUrl,location,freeLinkedInMail,email,personalEmail,reachedOut,message
0,Bilal,Zuberi,,Lux Capital,,,,,bz@luxcapital.com,,,
1,Hemant,Taneja,,General Catalyst,,,,,htaneja@generalcatalyst.com,,,
2,Anoop,Muraleedharan,,Autodesk Inc.,,,,,anoop.muraleedharan@autodesk.com,,,
3,Alden,Timme,,Oracle,,,,,alden.timme@oracle.com,,,
4,Sarah,Gustafson,,Gusto,,,,,sarah.gustafson@gusto.com,,,


In [19]:
# add into master database
combined_data_final = pd.concat([combined_data, manual_contacts])

In [20]:
len(combined_data_final)

716

In [21]:
len(combined_data) + len(manual_contacts)

716

In [22]:
# output csv
combined_data_final.to_csv('data_files/hubspot_contact_upload_08182019_master.csv')