In [147]:
'''
투자자 분석
1. 초기 투자를 잘 하는 작은 규모의 투자자를 찾아보자
2. Top10, 25 들이랑 같이 들어가는 투자자들을 찾아보자
'''

'\n\xed\x88\xac\xec\x9e\x90\xec\x9e\x90 \xeb\xb6\x84\xec\x84\x9d\n1. \xec\xb4\x88\xea\xb8\xb0 \xed\x88\xac\xec\x9e\x90\xeb\xa5\xbc \xec\x9e\x98 \xed\x95\x98\xeb\x8a\x94 \xec\x9e\x91\xec\x9d\x80 \xea\xb7\x9c\xeb\xaa\xa8\xec\x9d\x98 \xed\x88\xac\xec\x9e\x90\xec\x9e\x90\xeb\xa5\xbc \xec\xb0\xbe\xec\x95\x84\xeb\xb3\xb4\xec\x9e\x90\n2. Top10, 25 \xeb\x93\xa4\xec\x9d\xb4\xeb\x9e\x91 \xea\xb0\x99\xec\x9d\xb4 \xeb\x93\xa4\xec\x96\xb4\xea\xb0\x80\xeb\x8a\x94 \xed\x88\xac\xec\x9e\x90\xec\x9e\x90\xeb\x93\xa4\xec\x9d\x84 \xec\xb0\xbe\xec\x95\x84\xeb\xb3\xb4\xec\x9e\x90\n'

In [148]:
import pandas as pd
import re
import gensim
import pickle
import numpy as np
import operator
from datetime import datetime
from matplotlib import pyplot as plt
%matplotlib inline

In [149]:
# load data
data_folder = '../data/csv_export/'
util_folder = '../util/'
df_organizations = pd.read_csv(data_folder + 'organizations.csv'.format(data_folder), dtype={'first_funding_on': str, 'last_funding_on':str})
df_description = pd.read_csv(data_folder + 'organization_descriptions.csv')
df_funding_rounds = pd.read_csv(data_folder + 'funding_rounds.csv')
df_funds = pd.read_csv(data_folder + 'funds.csv')
df_investments = pd.read_csv(data_folder + 'investments.csv')
df_acq = pd.read_csv(data_folder + 'acquisitions.csv')
df_people = pd.read_csv(data_folder + 'people.csv')
df_degrees = pd.read_csv(data_folder + 'degrees.csv')
df_investors = pd.read_csv(data_folder + 'investors.csv')

In [180]:
# 잘된 회사를 찾기
def get_investor_scores(criteria = ['B', 'C', 'acquisition', 'ipo'], founded_after_this = '2007-01-01', asia_only = False):    
    # get all companies founded after the time
    df_organizations['founded_on'] = pd.to_datetime(df_organizations['founded_on'], errors='ignore')
    companies = df_organizations[df_organizations['founded_on'] >= founded_after_this]
    
    # 아시아 회사에 대해서만
    if asia_only:
        print('before {}'.format(len(companies)))
        asia = df_country_code[df_country_code['CC'] == 'AS']['a-3'].values
        companies = companies[companies['country_code'].isin(asia)]
        print('after {}'.format(len(companies)))
    companies = companies['uuid']
    print('There are {} companies founded after {}'.format(len(companies), founded_after_this))

    # get good companies with correct funding criteria        
    funding_criteria = []
    if 'A' in criteria: funding_criteria.append('A')
    if 'B' in criteria: funding_criteria.append('B')
    if 'C' in criteria: funding_criteria.append('C')        
    fundings = df_funding_rounds[df_funding_rounds['company_uuid'].isin(companies)]
    good_companies_1 = fundings[fundings['funding_round_code'].isin(criteria)]['company_uuid'].values
    good_companies_1 = list(set(good_companies_1))
    print('{} companies with fundings {}'.format(len(good_companies_1), funding_criteria))
    
    # Get list of acquired companies
    if 'acquisition' in criteria:        
        df_acq = pd.read_csv(data_folder + 'acquisitions.csv')
        df_acq = df_acq[df_acq['price_usd'] > 1000000]
        df_acq = df_acq[df_acq['acquiree_uuid'].isin(companies)]
        print('{} acquired for 1M+'.format(df_acq.shape[0], len(companies)))
#         print('{} acquired for 1M ~ 10M'.format(df_acq[df_acq['price_usd'] < 10000000].shape[0]))
#         print('{} acquired for 1M ~ 50M'.format(df_acq[df_acq['price_usd'] < 50000000].shape[0]))
        good_companies_2 = df_acq['acquiree_uuid'].values
        good_companies_2 = list(set(good_companies_2))
    
    # Get list of IPO companies
    if 'ipo' in criteria:
        df_ipos = pd.read_csv(data_folder + 'ipos.csv')
        df_ipos = df_ipos[df_ipos['company_uuid'].isin(companies)]
        print('{} IPOs'.format(df_ipos.shape[0]))
        good_companies_3 = df_ipos['company_uuid'].values
        good_companies_3 = list(set(good_companies_3))
    
    # combine all good companies
    good_companies = good_companies_1
    if 'acquisition' in criteria:
        good_companies.extend(good_companies_2)
    if 'acquisition' in criteria:
        good_companies.extend(good_companies_3)
    good_companies = list(set(good_companies))
    print('In total, {} good companies'.format(len(good_companies)))
    
    # good fundings
    all_seed = fundings[fundings['company_uuid'].isin(companies)]
    all_seed = all_seed[all_seed['funding_round_type'] == 'seed']['funding_round_uuid']
    all_seed = list(set(all_seed))

    good_seed = fundings[fundings['company_uuid'].isin(good_companies)]
    good_seed = good_seed[good_seed['funding_round_type'] == 'seed']['funding_round_uuid']
    good_seed = list(set(good_seed))

    print('# good companies: {} with {}\n# good / all seed investments: {} / {}'.format(len(good_companies), criteria, len(good_seed), len(all_seed)))

    # good investors
    investor_score_good = {}
    investor_score_all = {}
    investor_score = {}
    for row in df_investments.iterrows():
        funding_round_uuid = row[1][0]
        investor_uuid = row[1][1]
        if funding_round_uuid in all_seed:
            investor_score_all[investor_uuid] = investor_score_all.get(investor_uuid, 0) + 1
        if funding_round_uuid in good_seed:
            investor_score_good[investor_uuid] = investor_score_good.get(investor_uuid, 0) + 1
    return investor_score_good, investor_score_all

In [181]:
def check(investor_score_good, investor_score_all, MIN_SEED = 10):
    investor_score = {}
    for investor, all_count in investor_score_all.items():
        if all_count >= MIN_SEED:
            investor_score[investor]= investor_score_good.get(investor, 0) / float(all_count)

    investor_score = sorted(investor_score.items(), key=operator.itemgetter(1))
    investor_score.reverse()
    print('{} investors with minimum of {} seed investment').format(len(investor_score), MIN_SEED)
    for investor, score in investor_score[:20]:
        print('{:.3f}={}/{}\t{}'.format(score, investor_score_good[investor], investor_score_all[investor], df_investors[df_investors['uuid'] == investor]['investor_name'].values[0]))

In [182]:
criteria = ['B', 'C', 'acquisition', 'ipo']
founded_after_this = '2007-01-01'
global_investor_score_good, global_investor_score_all = get_investor_scores(criteria, founded_after_this, False)
asia_investor_score_good, asia_investor_score_all = get_investor_scores(criteria, founded_after_this, True)

There are 228472 companies founded after 2007-01-01
5558 companies with fundings ['B', 'C']
1301 acquired for 1M+
1423 IPOs
In total, 7801 good companies
# good companies: 7801 with ['B', 'C', 'acquisition', 'ipo']
# good / all seed investments: 2353 / 43526
before 228472
after 28493
There are 28493 companies founded after 2007-01-01
1110 companies with fundings ['B', 'C']
176 acquired for 1M+
141 IPOs
In total, 1386 good companies
# good companies: 1386 with ['B', 'C', 'acquisition', 'ipo']
# good / all seed investments: 266 / 5307


In [183]:
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 5)

2267 investors with minimum of 5 seed investment
1.000=5/5	YL Ventures
0.875=7/8	Beringea
0.833=5/6	Kinnevik AB
0.800=4/5	Rockshield Capital
0.800=4/5	Travis Kalanick
0.800=4/5	Gil Elbaz
0.800=4/5	Traveon Rogers
0.769=10/13	Andrew Mitchell
0.667=4/6	Kite Ventures
0.667=8/12	Romulus Capital
0.667=6/9	Chamath Palihapitiya
0.667=8/12	Bullish
0.667=4/6	Darian Shirazi
0.625=5/8	MHS Capital
0.600=6/10	Genacast Ventures
0.600=3/5	Opus Capital
0.600=3/5	Loic Le Meur
0.600=3/5	Thomas Madsen-Mygdal
0.600=3/5	Marek Fodor
0.600=3/5	ProVenture Management


In [184]:
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 10)

1085 investors with minimum of 10 seed investment
0.769=10/13	Andrew Mitchell
0.667=8/12	Bullish
0.667=8/12	Romulus Capital
0.600=6/10	Genacast Ventures
0.600=6/10	Simon Murdoch
0.500=6/12	Aol Ventures
0.500=5/10	Hadi Partovi
0.500=5/10	Gordon Crawford
0.500=5/10	Daher Capital
0.476=10/21	Ashton Kutcher
0.467=7/15	Grace Beauty Capital
0.458=11/24	IA Ventures
0.452=14/31	Webb Investment Network
0.444=12/27	Kevin Colleran
0.444=8/18	Polaris Partners
0.438=7/16	Accelerator Ventures
0.429=6/14	Anthemis Group
0.429=15/35	Redpoint
0.424=14/33	Lee Linden
0.417=5/12	Merus Capital


In [185]:
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 3)

443 investors with minimum of 3 seed investment
0.750=3/4	Mitsubishi UFJ Capital
0.667=2/3	Accion Venture Lab
0.667=2/3	Kinnevik AB
0.667=2/3	Mirae Asset Venture Investment
0.667=2/3	HV Holtzbrinck Ventures
0.667=2/3	Alex Zubillaga
0.667=2/3	Startup Factory
0.500=2/4	Hummingbird Ventures
0.500=2/4	Trendlines Group
0.500=2/4	Founder Collective
0.500=2/4	B Dash Ventures
0.500=3/6	Fastlane Ventures
0.500=2/4	Genesis Partners
0.455=5/11	Sinovation Ventures
0.400=2/5	Mizuho Capital
0.400=2/5	Revo Capital
0.400=2/5	Moshe Lichtman
0.400=2/5	DCM Ventures
0.400=2/5	Eilon Tirosh
0.400=2/5	DSG Consumer Partners


In [186]:
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 5)

234 investors with minimum of 5 seed investment
0.500=3/6	Fastlane Ventures
0.455=5/11	Sinovation Ventures
0.400=2/5	Moshe Lichtman
0.400=2/5	Mizuho Capital
0.400=2/5	DCM Ventures
0.400=2/5	Revo Capital
0.400=2/5	Eilon Tirosh
0.400=2/5	DSG Consumer Partners
0.375=3/8	Entree Capital
0.375=3/8	PLUS Ventures
0.333=2/6	Rocket Internet
0.333=2/6	Magma Venture Partners
0.333=2/6	Serguei Netessine
0.308=4/13	Microsoft Accelerator
0.300=3/10	ZhenFund
0.286=2/7	Toivo Annus
0.286=2/7	GREE Ventures
0.278=5/18	Sequoia Capital
0.273=3/11	Rebright Partners
0.267=4/15	SAIF Partners


In [187]:
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 10)

89 investors with minimum of 10 seed investment
0.455=5/11	Sinovation Ventures
0.308=4/13	Microsoft Accelerator
0.300=3/10	ZhenFund
0.278=5/18	Sequoia Capital
0.273=3/11	Rebright Partners
0.267=4/15	SAIF Partners
0.250=3/12	Fabrice Grinda
0.250=5/20	Accel Partners
0.226=7/31	Incubate Fund
0.214=3/14	Capstone Partners Korea
0.200=2/10	Global Brain Corporation
0.200=2/10	Legend Star
0.200=2/10	OurCrowd-GCai
0.192=5/26	Bon Angels Venture Partners
0.182=2/11	Binny Bansal
0.182=2/11	Explore. Dream. Discover.
0.143=2/14	Jerusalem Venture Partners (JVP)
0.133=2/15	Golden Gate Ventures
0.133=2/15	lool ventures
0.125=6/48	CyberAgent Ventures
