In [4]:
import os, time, json
import regex as re
import pandas as pd
from dotenv import load_dotenv
from industry_grouping import startup_ind_main
from read_companies import batch_read_companies
from read_deals import get_deals_by_pipeline, list_deal_associations
from read_contacts import batch_read_contacts
from bs4 import BeautifulSoup
import requests
load_dotenv()

DBDIR = "C:/Users/galon/Sputnik ATX Team Dropbox/Programming Datasets"
RAW_DIR = os.path.join(DBDIR, "data", "raw")
CLEAN_DIR = os.path.join(DBDIR, "data", "clean")
IND_DIR = os.path.join(CLEAN_DIR, "industry_mapping")


VC_INDUSTRY_COLS = os.path.join(RAW_DIR, "hs", "vc_industry_columns.csv")
STARTPATH = os.path.join(CLEAN_DIR, 'scraped_data', 'crunchbase', 'cb_startups_main.csv')
INVPATH = os.path.join(CLEAN_DIR, 'scraped_data', 'crunchbase', 'cb_vc_main_merged.csv')
MAPPING_OUTPATH = os.path.join(IND_DIR, "cb_starts_mapped.csv")
START_HERF_PATH = os.path.join(CLEAN_DIR, 'scraped_data', 'crunchbase','cb_inv_overview_scraped.csv')
VC_PATH = os.path.join(RAW_DIR, 'hs', 'vc_list_export.csv')
# propose 397 each for first citizens
# pd.set_option('display.max_colwidth', None)
# pd.set_option('display.max_columns', 1000)
# pd.set_option('display.max_rows', 50)

In [5]:
# df = startup_ind_main()
# def company_investments(df, id_, arg_list):

#     print(len(df[df['id']==id_]))
#     df = df[df['pf_inds'].str.contains('|'.join(arg_list))]
#     return df[df['id']==id_]

# def prelim(df):
        
#     num_specific = df.loc[df['pf_inds'].apply(lambda x: len(re.findall(';',str(x)))) == 1, :]
#     ind_specific = df.loc[df['pf_inds'] == ';Mental Health', :]
#     return ind_specific

# company_investments(df, 9060201392, ['Fintech', 'Employment', 'Human Resources'])

In [21]:
def get_associations(dict_list):
    
    for dictionary in dict_list:
        
        dictionary['associated_company_ids'] = list_deal_associations(dictionary['hs_object_id'], 'company').results[0].id
        dictionary['associated_contact_ids'] = [result.id for result in list_deal_associations(dictionary['hs_object_id'], 'contact').results]
        del dictionary['createdate']
    
    return pd.DataFrame.from_dict(dict_list).rename(columns={'hs_object_id':'deal_id'}).explode('associated_contact_ids')
    

In [34]:
def merge_associations(df):
    
    """Uses columns in the input dataframe as arguments for API calls. Gets the information of associated contacts"""
    
    company_list = []
    contact_list = []
    contact_inputs = [{'id': str(id_)} for id_ in list(df.loc[~df['associated_contact_ids'].isna(), 'associated_contact_ids'].values)]
    company_inputs = [{'id': str(id_)} for id_ in list(df.loc[~df['associated_company_ids'].isna(), 'associated_company_ids'].values)]
    
    for company in batch_read_companies(['name', 'id'], [], None, company_inputs).results:
        
        company_list.append({'associated_company_name' : company.properties['name'],'associated_company_ids':company.id})
    
    for contact in batch_read_contacts(['firstname', 'lastname','email', 'id'], [], None, contact_inputs).results:
        
        contact_list.append({'associated_first': contact.properties['firstname'], 'associated_last': contact.properties['lastname'], 'associated_email': contact.properties['email'], 'associated_contact_ids': contact.id})
        
    return df.merge(pd.DataFrame.from_dict(company_list), on='associated_company_ids', how='left').merge(pd.DataFrame.from_dict(contact_list), on='associated_contact_ids', how='left')

In [35]:
merge_associations(get_associations(get_deals_by_pipeline([], 'default')))

[{'company_name': 'Lubbock Angel Network', 'associated_company_ids': '9980109572'}, {'company_name': 'Sweater', 'associated_company_ids': '9059960082'}, {'company_name': 'Parade Ventures', 'associated_company_ids': '9060068770'}, {'company_name': 'Firebrand Ventures', 'associated_company_ids': '9060056426'}, {'company_name': 'HandsOnAngel', 'associated_company_ids': '9906829446'}, {'company_name': 'Urban Innovation Fund', 'associated_company_ids': '9059979276'}, {'company_name': 'Amplify.LA', 'associated_company_ids': '9060175963'}, {'company_name': 'Detroit Venture Partners', 'associated_company_ids': '9060175770'}, {'company_name': 'Camp One Ventures', 'associated_company_ids': '9059184979'}, {'company_name': 'Blue Leaf Ventures', 'associated_company_ids': '9059996281'}, {'company_name': 'Backend Capital', 'associated_company_ids': '9059185173'}, {'company_name': 'Sound Ventures', 'associated_company_ids': '9060165512'}, {'company_name': 'Alamo Angels', 'associated_company_ids': '905

Unnamed: 0,associated_startup,dealname,dealstage,deal_id,pipeline,reason,associated_company_ids,associated_contact_ids,company_name,first,last,email
0,Savimbo;Fila Manila,Cosmic Venture Partners - New Deal,appointmentscheduled,10755825286,default,,9060201771,,Cosmic Venture Partners,,,
1,Bright Pay Health,Pivotal Ventures - New Deal,appointmentscheduled,10756489840,default,,9060154962,,Pivotal Ventures,,,
2,Shmoody;SaveDay,Pear VC - Re-engage,30695286,10756491690,default,,9060154211,336278,Pear VC,Keith,Bender,keith@pear.vc
3,,CEAS Investments - Re-engage,30695286,10756492198,default,,9060092831,339249,CEAS Investments,Ronnie,Martinez,ronniej@ceasinvestments.com
4,SaveDay,SixThirty - Re-engage,40609340,10756492831,default,,9060056407,,SixThirty,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
78,SaveDay,Lightbank - New Deal,appointmentscheduled,11130527076,default,,9060144477,,Lightbank,,,
79,SaveDay,Loyal VC - New Deal,appointmentscheduled,11130527652,default,,9060193606,,Loyal VC,,,
80,SaveDay,MGV Capital - New Deal,appointmentscheduled,11130528559,default,,9059185081,,MGV Capital,,,
81,SaveDay,Mithril Capital Management - New Deal,appointmentscheduled,11130670337,default,,9060028051,,Mithril Capital Management,,,
