In [182]:
'''
투자자 분석
1. 초기 투자를 잘 하는 작은 규모의 투자자를 찾아보자
2. Top10, 25 들이랑 같이 들어가는 투자자들을 찾아보자
'''

'\n\xed\x88\xac\xec\x9e\x90\xec\x9e\x90 \xeb\xb6\x84\xec\x84\x9d\n1. \xec\xb4\x88\xea\xb8\xb0 \xed\x88\xac\xec\x9e\x90\xeb\xa5\xbc \xec\x9e\x98 \xed\x95\x98\xeb\x8a\x94 \xec\x9e\x91\xec\x9d\x80 \xea\xb7\x9c\xeb\xaa\xa8\xec\x9d\x98 \xed\x88\xac\xec\x9e\x90\xec\x9e\x90\xeb\xa5\xbc \xec\xb0\xbe\xec\x95\x84\xeb\xb3\xb4\xec\x9e\x90\n2. Top10, 25 \xeb\x93\xa4\xec\x9d\xb4\xeb\x9e\x91 \xea\xb0\x99\xec\x9d\xb4 \xeb\x93\xa4\xec\x96\xb4\xea\xb0\x80\xeb\x8a\x94 \xed\x88\xac\xec\x9e\x90\xec\x9e\x90\xeb\x93\xa4\xec\x9d\x84 \xec\xb0\xbe\xec\x95\x84\xeb\xb3\xb4\xec\x9e\x90\n'

In [183]:
import pandas as pd
import re
import gensim
import pickle
import numpy as np
import operator
from datetime import datetime
from matplotlib import pyplot as plt
import networkx as nx
from copy import copy
%matplotlib inline

In [184]:
# load data
data_folder = '../data/csv_export/'
util_folder = '../util/'
df_organizations = pd.read_csv(data_folder + 'organizations.csv'.format(data_folder), dtype={'first_funding_on': str, 'last_funding_on':str})
df_description = pd.read_csv(data_folder + 'organization_descriptions.csv')
df_funding_rounds = pd.read_csv(data_folder + 'funding_rounds.csv')
df_funds = pd.read_csv(data_folder + 'funds.csv')
df_investments = pd.read_csv(data_folder + 'investments.csv')
df_acq = pd.read_csv(data_folder + 'acquisitions.csv')
df_people = pd.read_csv(data_folder + 'people.csv')
df_degrees = pd.read_csv(data_folder + 'degrees.csv')
df_investors = pd.read_csv(data_folder + 'investors.csv')
df_country_code = pd.read_csv(util_folder + 'country_code.csv', delimiter = '\t')

In [242]:
'''
1. 초기 투자를 잘 하는 투자자를 찾아보자
'''

# 잘된 회사를 찾기
def get_investor_scores(criteria = ['B', 'C', 'acquisition', 'ipo'], founded_after_this = '2007-01-01', asia_only = False, words = None, expand = False):    
    if words is not None:
        # load dataset
        if 'word2company' not in globals():
            print('loading word2company')
            global word2company
            word2company = pickle.load(open(util_folder + 'word2company.pickle'))
        if 'model' not in globals():
            print('loading word2vec model')
            global model
            model = gensim.models.word2vec.Word2Vec.load(util_folder + 'word2vec')

        # expand word set if necessary
        # train 된 word2vec 을 사용하여 관련도가 높은 단어들을 포함한다
        final_words = []
        if type(words) == str:
            words = [words]
        for word in words:
            if ' ' in word:
                word = word.replace(' ', '_')
            if expand:
                _words = model.most_similar(word)
                _words = [str(_word) for _word, sim in _words]
                final_words.extend(_words)
            final_words.append(word)
        final_words = list(set(final_words))    

        # choose companies that have relevant words
        companies = []
        for word in final_words:
            _companies = word2company.get(word, [])
            companies.extend(_companies)
        companies = list(set(companies))    
    
    # get all companies founded after the time
    df_organizations['founded_on'] = pd.to_datetime(df_organizations['founded_on'], errors='ignore')
    if words is None:
        companies = df_organizations[df_organizations['founded_on'] >= founded_after_this]
    else:
        df_companies = df_organizations[df_organizations['uuid'].isin(companies)]
        companies = df_companies[df_companies['founded_on'] >= founded_after_this]

    # 아시아 회사에 대해서만
    if asia_only:
        print('before {}'.format(len(companies)))
        asia = df_country_code[df_country_code['CC'] == 'AS']['a-3'].values
        companies = companies[companies['country_code'].isin(asia)]
        print('after {}'.format(len(companies)))
    companies = companies['uuid']
    print('There are {} companies founded after {}'.format(len(companies), founded_after_this))

    # get good companies with correct funding criteria        
    fundings = df_funding_rounds[df_funding_rounds['company_uuid'].isin(companies)]
    good_companies_1 = fundings[fundings['funding_round_code'].isin(criteria)]['company_uuid'].values
    good_companies_1 = list(set(good_companies_1))
    print('{} companies with late stage fundings'.format(len(good_companies_1)))
    
    # Get list of acquired companies
    if 'acquisition' in criteria:        
        df_acq = pd.read_csv(data_folder + 'acquisitions.csv')
        df_acq = df_acq[df_acq['price_usd'] > 1000000]
        df_acq = df_acq[df_acq['acquiree_uuid'].isin(companies)]
        print('{} acquired for 1M+'.format(df_acq.shape[0], len(companies)))
        good_companies_2 = df_acq['acquiree_uuid'].values
        good_companies_2 = list(set(good_companies_2))
    
    # Get list of IPO companies
    if 'ipo' in criteria:
        df_ipos = pd.read_csv(data_folder + 'ipos.csv')
        df_ipos = df_ipos[df_ipos['company_uuid'].isin(companies)]
        print('{} IPOs'.format(df_ipos.shape[0]))
        good_companies_3 = df_ipos['company_uuid'].values
        good_companies_3 = list(set(good_companies_3))
    
    # combine all good companies
    good_companies = good_companies_1
    if 'acquisition' in criteria:
        good_companies.extend(good_companies_2)
    if 'acquisition' in criteria:
        good_companies.extend(good_companies_3)
    good_companies = list(set(good_companies))
    print('In total, {} good companies'.format(len(good_companies)))
    
#     print fundings.head()
    # earyl fundings
#     print fundings[fundings['funding_round_type'] == 'seed'].head()
    early_funding = pd.concat([fundings[fundings['funding_round_type'] == 'seed'],
                              fundings[fundings['funding_round_code'] == 'A']], axis = 0).reset_index()

    # good fundings
    all_seed = early_funding[early_funding['company_uuid'].isin(companies)]['funding_round_uuid'].values
#     all_seed = all_seed[all_seed['funding_round_type'] == 'seed']['funding_round_uuid']
    all_seed = list(set(all_seed))

    good_seed = early_funding[early_funding['company_uuid'].isin(good_companies)]['funding_round_uuid'].values
#     good_seed1 = good_seed[good_seed['funding_round_type'] == 'seed']['funding_round_uuid']
#     good_seed1 = list(set(good_seed1))
#     good_seed2 = good_seed[good_seed['funding_round_code'] == 'A']['funding_round_uuid']
#     good_seed2 = list(set(good_seed2))
#     good_seed1.extend(good_seed2)
    good_seed = list(set(good_seed))

    print('# good companies: {} with {}\n# good / all early investments: {} / {}'.format(len(good_companies), criteria, len(good_seed), len(all_seed)))

    # good investors
    investor_score_good = {}
    investor_score_all = {}
    investor_score = {}

    good_investment_count = df_investments[df_investments['funding_round_uuid'].isin(good_seed)].groupby('investor_uuid')['funding_round_uuid'].count()
    all_investment_count = df_investments[df_investments['funding_round_uuid'].isin(all_seed)].groupby('investor_uuid')['funding_round_uuid'].count()
    return good_investment_count.to_dict(), all_investment_count.to_dict()

In [243]:
'''
초기 투자 잘 하는 투자자들 중 어떤 규모의 투자자들을 보고 싶은가?
'''
    
def check(investor_score_good, investor_score_all, MIN_SEED = 10):
    investor_score = {}
    for investor, all_count in investor_score_all.items():
        if all_count >= MIN_SEED:
            investor_score[investor]= investor_score_good.get(investor, 0) / float(all_count)

    investor_score = sorted(investor_score.items(), key=operator.itemgetter(1))
    investor_score.reverse()
    print('{} investors with minimum of {} seed investment').format(len(investor_score), MIN_SEED)
    for investor, score in investor_score[:20]:
        try:
            print('{:.3f}={}/{}\t{}'.format(score, investor_score_good[investor], investor_score_all[investor], df_investors[df_investors['uuid'] == investor]['investor_name'].values[0]))
        except Exception as e:
            print('no investor name')

In [244]:
criteria = ['B', 'C', 'D', 'E', 'F', 'acquisition', 'ipo']
founded_after_this = '2007-01-01'
global_investor_score_good, global_investor_score_all = get_investor_scores(criteria, founded_after_this, False)
asia_investor_score_good, asia_investor_score_all = get_investor_scores(criteria, founded_after_this, True)

There are 228472 companies founded after 2007-01-01
5628 companies with late stage fundings
1301 acquired for 1M+
1423 IPOs
In total, 7867 good companies
# good companies: 7867 with ['B', 'C', 'D', 'E', 'F', 'acquisition', 'ipo']
# good / all early investments: 7519 / 57263
before 228472
after 28493
There are 28493 companies founded after 2007-01-01
1127 companies with late stage fundings
176 acquired for 1M+
141 IPOs
In total, 1402 good companies
# good companies: 1402 with ['B', 'C', 'D', 'E', 'F', 'acquisition', 'ipo']
# good / all early investments: 1182 / 7756


In [245]:
# global 투자자, 최소 5개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 5)

3372 investors with minimum of 5 seed investment
1.000=6/6	Highway 12 Ventures
1.000=5/5	Dustin Moskovitz
1.000=5/5	Bill Gates
1.000=6/6	UTEC- University of Tokyo Edge Capital
1.000=5/5	Bill Campbell
1.000=7/7	Vision Ridge Capital Partners
1.000=10/10	Windcrest Partners
1.000=7/7	Chrysalix Venture Capital
1.000=5/5	Angus Davis
0.933=14/15	Advanced Technology Ventures
0.909=20/22	Sigma Partners
0.870=20/23	Flagship Pioneering
0.857=6/7	K2VC
0.857=6/7	Lead Edge Capital
0.846=11/13	Kite Ventures
0.833=5/6	Spring Ventures
0.833=5/6	Arboretum Ventures
0.833=5/6	Blackstone
0.833=5/6	Intersouth Partners
0.833=5/6	Chrysalis Ventures


In [246]:
# global 투자자, 최소 10개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 10)

1717 investors with minimum of 10 seed investment
1.000=10/10	Windcrest Partners
0.933=14/15	Advanced Technology Ventures
0.909=20/22	Sigma Partners
0.870=20/23	Flagship Pioneering
0.846=11/13	Kite Ventures
0.818=9/11	Cedar Fund
0.800=12/15	Gemini Israel Ventures
0.800=8/10	New Leaf Venture Partners
0.789=15/19	Andrew Mitchell
0.769=30/39	83North
0.750=9/12	MentorTech Ventures
0.746=44/59	Benchmark
0.739=17/23	Morgenthaler Ventures
0.733=11/15	Jeremy Stoppelman
0.727=8/11	Kinnevik AB
0.722=13/18	Genacast Ventures
0.714=25/35	Third Rock Ventures
0.702=33/47	North Bridge Venture Partners & Growth Equity
0.700=7/10	Allegis Capital
0.700=7/10	Globespan Capital Partners


In [247]:
# global 투자자, 최소 50개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 50)

267 investors with minimum of 50 seed investment
0.746=44/59	Benchmark
0.639=39/61	IA Ventures
0.610=89/146	Kleiner Perkins Caufield & Byers
0.607=156/257	Sequoia Capital
0.606=40/66	Venrock
0.602=71/118	Redpoint
0.586=34/58	Thrive Capital
0.569=70/123	Battery Ventures
0.534=39/73	Union Square Ventures
0.531=43/81	Polaris Partners
0.522=35/67	Highland Capital Partners
0.519=27/52	Bain Capital Ventures
0.515=53/103	Matrix Partners
0.511=72/141	Lightspeed Venture Partners
0.508=31/61	Oreilly AlphaTech Ventures
0.508=65/128	Atlas Venture
0.506=43/85	DCM Ventures
0.504=65/129	Bessemer Venture Partners
0.500=48/96	Harrison Metal
0.500=29/58	Mayfield Fund


In [248]:
# global 투자자, 최소 200개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 200)

26 investors with minimum of 200 seed investment
0.607=156/257	Sequoia Capital
0.470=125/266	Accel Partners
0.455=142/312	First Round
0.453=115/254	GV
0.439=100/228	Founder Collective
0.411=113/275	New Enterprise Associates
0.403=106/263	Andreessen Horowitz
0.394=97/246	Lerer Hippeau Ventures
0.357=189/529	SV Angel
0.319=67/210	True Ventures
0.238=55/231	Great Oaks Venture Capital
0.146=45/309	High-Tech Gruenderfonds
0.134=33/247	FundersClub
0.130=63/483	Techstars
0.097=110/1135	Y Combinator
0.093=118/1263	500 Startups
0.083=30/361	Kima Ventures
0.081=23/284	Plug and Play
0.050=17/338	Northstar Ventures
0.047=10/213	Seedcamp


In [249]:
# asia 투자자, 최소 3개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 3)

759 investors with minimum of 3 seed investment
1.000=3/3	World Innovation Lab (WiL)
1.000=3/3	Indo-US Venture Partners
1.000=3/3	Square Peg Capital
1.000=4/4	Elevar Equity
1.000=3/3	Wolfson Group
1.000=3/3	Meridian Capital China
1.000=3/3	GS Home Shopping
0.909=10/11	83North
0.900=9/10	Redpoint
0.857=6/7	K2VC
0.833=5/6	China Growth Capital
0.800=4/5	Gemini Israel Ventures
0.800=4/5	BRM Capital
0.800=4/5	Trustbridge Partners
0.800=4/5	Ventech
0.778=7/9	Northern Light Venture Capital
0.769=10/13	Legend Capital
0.750=3/4	WuXi Healthcare Ventures
0.750=3/4	F-Prime Capital Partners
0.750=3/4	Cedar Fund


In [250]:
# asia 투자자, 최소 5개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 5)

431 investors with minimum of 5 seed investment
0.909=10/11	83North
0.900=9/10	Redpoint
0.857=6/7	K2VC
0.833=5/6	China Growth Capital
0.800=4/5	Gemini Israel Ventures
0.800=4/5	BRM Capital
0.800=4/5	Trustbridge Partners
0.800=4/5	Ventech
0.778=7/9	Northern Light Venture Capital
0.769=10/13	Legend Capital
0.750=6/8	Canaan Partners
0.714=5/7	Founder Collective
0.714=5/7	Ceyuan Ventures
0.700=7/10	Norwest Venture Partners - NVP
0.692=9/13	BlueRun Ventures
0.692=9/13	Battery Ventures
0.684=13/19	GSR Ventures
0.667=10/15	GGV Capital
0.667=6/9	e.ventures
0.667=4/6	Rhodium


In [251]:
# asia 투자자, 최소 10개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 10)

186 investors with minimum of 10 seed investment
0.909=10/11	83North
0.900=9/10	Redpoint
0.769=10/13	Legend Capital
0.700=7/10	Norwest Venture Partners - NVP
0.692=9/13	BlueRun Ventures
0.692=9/13	Battery Ventures
0.684=13/19	GSR Ventures
0.667=10/15	GGV Capital
0.645=20/31	Qiming Venture Partners
0.636=7/11	Bertelsmann Asia Investment Fund
0.615=8/13	Mitsubishi UFJ Capital
0.600=6/10	Genesis Partners
0.600=6/10	Infinity Venture Partners
0.600=12/20	Bessemer Venture Partners
0.600=6/10	Giza Venture Capital
0.583=7/12	Mayfield Fund
0.583=14/24	Source Code Capital
0.580=69/119	Sequoia Capital
0.571=8/14	Kleiner Perkins Caufield & Byers
0.562=9/16	Stonebridge Capital


In [252]:
# asia 투자자, 최소 20개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 20)

64 investors with minimum of 20 seed investment
0.645=20/31	Qiming Venture Partners
0.600=12/20	Bessemer Venture Partners
0.583=14/24	Source Code Capital
0.580=69/119	Sequoia Capital
0.556=30/54	IDG Capital Partners
0.545=12/22	Matrix Partners
0.500=13/26	DCM Ventures
0.486=18/37	Gobi Partners
0.478=11/23	Magma Venture Partners
0.455=10/22	Qualcomm Ventures
0.444=28/63	Accel Partners
0.429=9/21	Morningside Group
0.421=16/38	Sinovation Ventures
0.418=23/55	Matrix Partners China
0.417=10/24	GREE Ventures
0.409=9/22	Helion Venture Partners
0.400=14/35	Vertex Ventures
0.389=21/54	SAIF Partners
0.361=13/36	Kalaari Capital
0.359=14/39	IDG Ventures India


In [253]:
word = 'iot'
specific_global_investor_score_good, specific_global_investor_score_all = get_investor_scores(criteria, founded_after_this, False, word, False)
check(specific_global_investor_score_good, specific_global_investor_score_all, MIN_SEED = 2)

There are 798 companies founded after 2007-01-01
34 companies with late stage fundings
3 acquired for 1M+
3 IPOs
In total, 40 good companies
# good companies: 40 with ['B', 'C', 'D', 'E', 'F', 'acquisition', 'ipo']
# good / all early investments: 43 / 410
93 investors with minimum of 2 seed investment
1.000=2/2	Rock Health
1.000=3/3	Lifeline Ventures
1.000=2/2	Kleiner Perkins Caufield & Byers
1.000=2/2	True Ventures
1.000=2/2	Finnish Industry Investment Ltd (Suomen Teollisuussijoitus)
1.000=2/2	Kepha Partners
1.000=2/2	Disruptive
1.000=2/2	Sigma Partners
0.500=1/2	Crosslink Capital
0.500=1/2	Alliance of Angels
0.500=1/2	Startupbootcamp
0.333=1/3	Earlybird Venture Capital
0.333=1/3	Y Combinator
0.333=1/3	CrunchFund
0.250=1/4	Bain Capital Ventures
no investor name
no investor name
no investor name
no investor name
no investor name


In [254]:
word = 'deep_learning'
specific_global_investor_score_good, specific_global_investor_score_all = get_investor_scores(criteria, founded_after_this, False, word, False)
check(specific_global_investor_score_good, specific_global_investor_score_all, MIN_SEED = 2)

There are 203 companies founded after 2007-01-01
7 companies with late stage fundings
1 acquired for 1M+
0 IPOs
In total, 8 good companies
# good companies: 8 with ['B', 'C', 'D', 'E', 'F', 'acquisition', 'ipo']
# good / all early investments: 10 / 147
56 investors with minimum of 2 seed investment
1.000=2/2	Fuel Capital
0.667=2/3	Draper Fisher Jurvetson (DFJ)
0.667=2/3	AME Cloud Ventures
0.600=3/5	Allen & Company
0.500=1/2	Plug and Play
0.500=1/2	Vertex Ventures
0.500=1/2	Salesforce Ventures
0.500=1/2	Lux Capital
0.500=1/2	SV Angel
0.200=1/5	Data Collective
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name


In [255]:
word = 'drone'
specific_global_investor_score_good, specific_global_investor_score_all = get_investor_scores(criteria, founded_after_this, False, word, False)
check(specific_global_investor_score_good, specific_global_investor_score_all, MIN_SEED = 2)

There are 244 companies founded after 2007-01-01
9 companies with late stage fundings
0 acquired for 1M+
3 IPOs
In total, 12 good companies
# good companies: 12 with ['B', 'C', 'D', 'E', 'F', 'acquisition', 'ipo']
# good / all early investments: 16 / 141
57 investors with minimum of 2 seed investment
1.000=2/2	SierraMaya360
1.000=2/2	SK Ventures
1.000=2/2	Data Collective
1.000=2/2	GV
1.000=2/2	VTF Capital
1.000=3/3	AngelPad
1.000=2/2	ff Venture Capital
1.000=2/2	SoftTech VC
0.500=1/2	Menlo Ventures
0.500=1/2	Qualcomm Ventures
0.333=1/3	ZhenFund
0.333=1/3	Bee Partners
0.333=1/3	GGV Capital
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name


In [256]:
'''
2. 유명한 투자자들과 인맥이 있을 것 같은 작은 투자자들을 살펴보자
'''

# get investors with high centrality
f = open(util_folder + 'investor_centrality_degree.pickle')
investors_high_central = pickle.load(f)
f.close()

In [257]:
# 유명한 회사들 찾기 1 (network centrality)
top25 = [uuid for uuid, value in investors_high_central[:25]]
top25_central = []
for i, uuid in enumerate(top25):
    print df_investors[df_investors['uuid'] == (uuid)]['investor_name'].values[0], investors_high_central[i][1]
    top25_central.append(uuid)

500 Startups 0.0645051194539
New Enterprise Associates 0.0460750853242
SV Angel 0.0447886584405
Intel Capital 0.0431346810186
Y Combinator 0.0369650826989
Accel Partners 0.035914938304
Kleiner Perkins Caufield & Byers 0.034287214492
Sequoia Capital 0.0339459175637
GV 0.0312943029667
First Round 0.0301916513521
Goldman Sachs 0.0284589131006
Greylock Partners 0.028170123392
Bessemer Venture Partners 0.0279075872933
Andreessen Horowitz 0.0277238120242
Draper Fisher Jurvetson (DFJ) 0.0273825150958
Index Ventures 0.0267786820688
Great Oaks Venture Capital 0.0244683644001
Lerer Hippeau Ventures 0.024284589131
CrunchFund 0.0241795746915
Khosla Ventures 0.0237857705434
General Catalyst 0.0236545024941
Felicis Ventures 0.0235494880546
RRE Ventures 0.0227881333683
BoxGroup 0.0219480178525
Menlo Ventures 0.0213179312155


In [258]:
# 유명한 회사들 찾기 2. funding frequency
groups = df_investments.groupby('investor_uuid')['funding_round_uuid'].count().reset_index().sort_values(by = 'funding_round_uuid', ascending = False)#['investor_uuid'].apply(list)
count = 0
top25_funding = []
for index, row in groups.iterrows():
    uuid = row[0]
    fund_count = row[1]
    print df_investors[df_investors['uuid'] == (uuid)]['investor_name'].values[0], fund_count
    top25_funding.append(uuid)
    if count == 25: break
    count += 1

500 Startups 1518
Sequoia Capital 1432
Y Combinator 1344
New Enterprise Associates 1328
Intel Capital 1179
Accel Partners 1078
NYSERDA 1004
Kleiner Perkins Caufield & Byers 968
Wayra 952
SOSV 950
Draper Fisher Jurvetson (DFJ) 782
Start-Up Chile 745
SV Angel 738
Bessemer Venture Partners 712
Techstars 691
Right Side Capital Management 661
Greylock Partners 592
First Round 580
Index Ventures 573
Goldman Sachs 571
Lightspeed Venture Partners 558
Battery Ventures 556
Brand Capital 553
High-Tech Gruenderfonds 551
Plug and Play 543
Venrock 538


In [259]:
# 유명한 회사들과 공동 투자를 많이 한 회사를 찾아라
# 그 중 최소한 MIN 번 이상의 투자를 한 회사들
MIN = 3

# get all companies founded after the time
companies = df_organizations['uuid']
print('There are {} companies'.format(len(companies)))

# get famous investors
top_investors = top25_central
top_investors.extend(top25_funding)
top_investors = list(set(top_investors))

co_funding_count = {}
funding_count = {}
for investor in top_investors:
    co_funding_count[investor] = {}

groups = df_investments.groupby('funding_round_uuid')['investor_uuid'].apply(list)
# 같은 회사에 투자를 한 경우, 두 투자 회사 사이에 링크를 준다
for investors in groups.values:
    for investor1 in investors:
        for investor2 in investors:
            if investor1 != investor2:
                if investor1 in top_investors and investor2 not in top_investors:
                    co_funding_count[investor1][investor2] = co_funding_count[investor1].get(investor2, 0) + 1
                    funding_count[investor2] = funding_count.get(investor2, 0) + 1
                if investor2 in top_investors and investor1 not in top_investors:
                    co_funding_count[investor2][investor1] = co_funding_count[investor2].get(investor1, 0) + 1
                    funding_count[investor1] = funding_count.get(investor1, 0) + 1

There are 520408 companies


In [260]:
co_funding_prop = {}
print('TOP VC List')
for investor1 in top_investors:
    print('{}'.format(df_investors[df_investors['uuid'] == (investor1)]['investor_name'].values[0]))
    
print('\n\n')
for investor1 in top_investors:
    co_funding_propop[investor1] = {}
    for investor2, count in funding_count.items():
        if count >= MIN:
            if investor2 in co_funding_count[investor1]:
                co_funding_prop[investor1][investor2] = co_funding_count[investor1][investor2] / float(count)
    # sort
    score = co_funding_prop[investor1]
    score = sorted(score.items(), key=operator.itemgetter(1))
    score.reverse()
    print('TOP VC {}'.format(df_investors[df_investors['uuid'] == (investor1)]['investor_name'].values[0]))
    for key, value in score[:10]:
        name = df_investors[df_investors['uuid'] == (key)]['investor_name'].values[0]
        print('{}% {}: {}'.format(value * 100, funding_count[key], name))
    print('\n\n')

TOP VC List
Battery Ventures
Bessemer Venture Partners
New Enterprise Associates
Khosla Ventures
SV Angel
Techstars
First Round
500 Startups
Lerer Hippeau Ventures
High-Tech Gruenderfonds
Wayra
CrunchFund
Greylock Partners
Right Side Capital Management
Accel Partners
Intel Capital
Venrock
Andreessen Horowitz
GV
Index Ventures
General Catalyst
Sequoia Capital
NYSERDA
Great Oaks Venture Capital
Y Combinator
Menlo Ventures
BoxGroup
Plug and Play
Start-Up Chile
SOSV
Draper Fisher Jurvetson (DFJ)
Goldman Sachs
Kleiner Perkins Caufield & Byers
Lightspeed Venture Partners
RRE Ventures
Felicis Ventures
Brand Capital





NameError: name 'co_funding_propop' is not defined