In [98]:
'''
투자자 분석
1. 초기 투자를 잘 하는 작은 규모의 투자자를 찾아보자
2. Top10, 25 들이랑 같이 들어가는 투자자들을 찾아보자
'''

'\n\xed\x88\xac\xec\x9e\x90\xec\x9e\x90 \xeb\xb6\x84\xec\x84\x9d\n1. \xec\xb4\x88\xea\xb8\xb0 \xed\x88\xac\xec\x9e\x90\xeb\xa5\xbc \xec\x9e\x98 \xed\x95\x98\xeb\x8a\x94 \xec\x9e\x91\xec\x9d\x80 \xea\xb7\x9c\xeb\xaa\xa8\xec\x9d\x98 \xed\x88\xac\xec\x9e\x90\xec\x9e\x90\xeb\xa5\xbc \xec\xb0\xbe\xec\x95\x84\xeb\xb3\xb4\xec\x9e\x90\n2. Top10, 25 \xeb\x93\xa4\xec\x9d\xb4\xeb\x9e\x91 \xea\xb0\x99\xec\x9d\xb4 \xeb\x93\xa4\xec\x96\xb4\xea\xb0\x80\xeb\x8a\x94 \xed\x88\xac\xec\x9e\x90\xec\x9e\x90\xeb\x93\xa4\xec\x9d\x84 \xec\xb0\xbe\xec\x95\x84\xeb\xb3\xb4\xec\x9e\x90\n'

In [99]:
import pandas as pd
import re
import gensim
import pickle
import numpy as np
import operator
from datetime import datetime
from matplotlib import pyplot as plt
import networkx as nx
from copy import copy
%matplotlib inline

In [100]:
# load data
data_folder = '../data/csv_export/'
util_folder = '../util/'
df_organizations = pd.read_csv(data_folder + 'organizations.csv'.format(data_folder), dtype={'first_funding_on': str, 'last_funding_on':str})
df_description = pd.read_csv(data_folder + 'organization_descriptions.csv')
df_funding_rounds = pd.read_csv(data_folder + 'funding_rounds.csv')
df_funds = pd.read_csv(data_folder + 'funds.csv')
df_investments = pd.read_csv(data_folder + 'investments.csv')
df_acq = pd.read_csv(data_folder + 'acquisitions.csv')
df_people = pd.read_csv(data_folder + 'people.csv')
df_degrees = pd.read_csv(data_folder + 'degrees.csv')
df_investors = pd.read_csv(data_folder + 'investors.csv')
df_country_code = pd.read_csv(util_folder + 'country_code.csv', delimiter = '\t')

In [116]:
'''
1. 초기 투자를 잘 하는 투자자를 찾아보자
'''

# 잘된 회사를 찾기
def get_investor_scores(investment_criteria = ['A', 'B', 'C', 'D'], success_criteria = ['acquisition', 'ipo'], founded_after_this = '2007-01-01', founded_before_this = '2020-01-01',  asia_only = False, words = None, expand = False):    
    if words is not None:
        # load dataset
        if 'word2company' not in globals():
            print('loading word2company')
            global word2company
            word2company = pickle.load(open(util_folder + 'word2company.pickle'))
        if 'model' not in globals():
            print('loading word2vec model')
            global model
            model = gensim.models.word2vec.Word2Vec.load(util_folder + 'word2vec')

        # expand word set if necessary
        # train 된 word2vec 을 사용하여 관련도가 높은 단어들을 포함한다
        final_words = []
        if type(words) == str:
            words = [words]
        for word in words:
            if ' ' in word:
                word = word.replace(' ', '_')
            if expand:
                _words = model.most_similar(word)
                _words = [str(_word) for _word, sim in _words]
                final_words.extend(_words)
            final_words.append(word)
        final_words = list(set(final_words))    

        # choose companies that have relevant words
        companies = []
        for word in final_words:
            _companies = word2company.get(word, [])
            companies.extend(_companies)
        companies = list(set(companies))    
    
    # get all companies founded after the time
    df_organizations['founded_on'] = pd.to_datetime(df_organizations['founded_on'], errors='ignore')
    df = df_organizations.copy()
    df = df[df['founded_on'] >= founded_after_this]
    df = df[df['founded_on'] <= founded_before_this]
    
    print(df.shape)
    # filter by words if necessary
    if words is not None:
        df = df[df['uuid'].isin(companies)]
#         companies = df_companies[df_companies['founded_on'] >= founded_after_this]
    companies = df        
    
    print(companies.shape)
    # 아시아 회사에 대해서만
    if asia_only:
        print('before {}'.format(len(companies)))
        asia = df_country_code[df_country_code['CC'] == 'AS']['a-3'].values
        companies = companies[companies['country_code'].isin(asia)]
        print('after {}'.format(len(companies)))
    companies = companies['uuid']
    print('There are {} companies founded from {} to {}'.format(len(companies), founded_after_this, founded_before_this))

    # get good companies with correct funding criteria        
    fundings = df_funding_rounds[df_funding_rounds['company_uuid'].isin(companies)]
    good_companies_1 = fundings[fundings['funding_round_code'].isin(success_criteria)]['company_uuid'].values
    good_companies_1 = list(set(good_companies_1))
    print('{} companies with late stage fundings'.format(len(good_companies_1)))
    
    # Get list of acquired companies
    if 'acquisition' in success_criteria:        
        df_acq = pd.read_csv(data_folder + 'acquisitions.csv')
        df_acq = df_acq[df_acq['price_usd'] > 1000000]
        df_acq = df_acq[df_acq['acquiree_uuid'].isin(companies)]
        print('{} acquired for 1M+'.format(df_acq.shape[0], len(companies)))
        good_companies_2 = df_acq['acquiree_uuid'].values
        good_companies_2 = list(set(good_companies_2))
    
    # Get list of IPO companies
    if 'ipo' in success_criteria:
        df_ipos = pd.read_csv(data_folder + 'ipos.csv')
        df_ipos = df_ipos[df_ipos['company_uuid'].isin(companies)]
        print('{} IPOs'.format(df_ipos.shape[0]))
        good_companies_3 = df_ipos['company_uuid'].values
        good_companies_3 = list(set(good_companies_3))
    
    # combine all good companies
    good_companies = good_companies_1
    if 'acquisition' in success_criteria:
        good_companies.extend(good_companies_2)
    if 'ipo' in success_criteria:
        good_companies.extend(good_companies_3)
    good_companies = list(set(good_companies))
    print('In total, {} good companies'.format(len(good_companies)))
    
    early_funding = pd.concat([fundings[fundings['funding_round_type'] == 'seed'],
                              fundings[fundings['funding_round_code'].isin(investment_criteria)]], axis = 0).reset_index()

    # good fundings
    all_seed = early_funding[early_funding['company_uuid'].isin(companies)]['funding_round_uuid'].values
    all_seed = list(set(all_seed))
    good_seed = early_funding[early_funding['company_uuid'].isin(good_companies)]['funding_round_uuid'].values
    good_seed = list(set(good_seed))
    print('# good companies: {} with {}\n# good / all early investments: {} / {}'.format(len(good_companies), success_criteria, len(good_seed), len(all_seed)))

    # good investors
    investor_score_good = {}
    investor_score_all = {}
    investor_score = {}

    good_investment_count = df_investments[df_investments['funding_round_uuid'].isin(good_seed)].groupby('investor_uuid')['funding_round_uuid'].count()
    all_investment_count = df_investments[df_investments['funding_round_uuid'].isin(all_seed)].groupby('investor_uuid')['funding_round_uuid'].count()
    return good_investment_count.to_dict(), all_investment_count.to_dict()

In [102]:
'''
초기 투자 잘 하는 투자자들 중 어떤 규모의 투자자들을 보고 싶은가?
'''
    
def check(investor_score_good, investor_score_all, MIN_SEED = 10):
    investor_score = {}
    for investor, all_count in investor_score_all.items():
        if all_count >= MIN_SEED:
            investor_score[investor]= investor_score_good.get(investor, 0) / float(all_count)

    investor_score = sorted(investor_score.items(), key=operator.itemgetter(1))
    investor_score.reverse()
    print('{} investors with minimum of {} seed investment').format(len(investor_score), MIN_SEED)
    for investor, score in investor_score[:20]:
        try:
            print('{:.3f}={}/{}\t{}'.format(score, investor_score_good[investor], investor_score_all[investor], df_investors[df_investors['uuid'] == investor]['investor_name'].values[0]))
        except Exception as e:
            print('no investor name')

In [103]:
# criteria = ['B', 'C', 'D', 'E', 'F', 'acquisition', 'ipo']
investment_criteria = ['A', 'B', 'C', 'D']
success_criteria = ['acquisition', 'ipo']
founded_after_this = '2007-01-01'
founded_before_this = '2020-01-01'
global_investor_score_good, global_investor_score_all = get_investor_scores(investment_criteria, success_criteria, founded_after_this, founded_before_this, False)
asia_investor_score_good, asia_investor_score_all = get_investor_scores(investment_criteria, success_criteria, founded_after_this, founded_before_this, True)

(228466, 34)
(228466, 34)
There are 228466 companies founded from 2007-01-01 to 2020-01-01
0 companies with late stage fundings
1301 acquired for 1M+
1423 IPOs
In total, 2649 good companies
# good companies: 2649 with ['acquisition', 'ipo']
# good / all early investments: 1777 / 66237
(228466, 34)
(228466, 34)
before 228466
after 28493
There are 28493 companies founded from 2007-01-01 to 2020-01-01
0 companies with late stage fundings
176 acquired for 1M+
141 IPOs
In total, 315 good companies
# good companies: 315 with ['acquisition', 'ipo']
# good / all early investments: 183 / 9470


In [104]:
# global 투자자, 최소 5개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 5)

4179 investors with minimum of 5 seed investment
1.000=7/7	Adage Capital Management
0.900=9/10	Jennison Associates
0.889=16/18	Aisling Capital
0.833=5/6	Sabby Capital
0.833=5/6	Yahoo
0.800=4/5	BioGeneration Ventures
0.800=4/5	Janus Capital Group
0.800=4/5	Greener Capital
0.800=4/5	Rockshield Capital
0.727=8/11	HealthCare Ventures
0.714=5/7	Sprout Group
0.696=16/23	Rock Springs Capital
0.667=4/6	ProVenture Management
0.667=8/12	MedImmune Ventures
0.632=12/19	Alta Partners
0.625=5/8	Remeditex Ventures
0.625=5/8	HBM BioVentures
0.600=3/5	Mesa Verde Venture Partners
0.600=3/5	Salman Partners
0.600=3/5	Skyline Ventures


In [105]:
# global 투자자, 최소 10개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 10)

2228 investors with minimum of 10 seed investment
0.900=9/10	Jennison Associates
0.889=16/18	Aisling Capital
0.727=8/11	HealthCare Ventures
0.696=16/23	Rock Springs Capital
0.667=8/12	MedImmune Ventures
0.632=12/19	Alta Partners
0.600=6/10	Brookside Capital
0.571=8/14	Casdin Capital
0.571=8/14	Perceptive Advisors
0.545=6/11	EcoR1 Capital
0.545=6/11	Team Europe
0.542=13/24	Vivo Capital
0.541=20/37	RA Capital Management
0.526=10/19	Amgen Ventures
0.519=14/27	Delphi Ventures
0.500=5/10	Partner Fund Management
0.486=17/35	MPM Capital
0.478=11/23	Redmile Group
0.476=20/42	New Leaf Venture Partners
0.458=11/24	Clarus Ventures


In [106]:
# global 투자자, 최소 50개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 50)

377 investors with minimum of 50 seed investment
0.410=25/61	Third Rock Ventures
0.370=20/54	Domain Associates
0.313=21/67	Versant Ventures
0.308=16/52	Flagship Pioneering
0.303=20/66	Morgenthaler Ventures
0.301=28/93	OrbiMed
0.283=15/53	Novartis Venture Fund
0.232=16/69	Osage University Partners
0.218=17/78	F-Prime Capital Partners
0.208=15/72	IVP (Institutional Venture Partners)
0.186=30/161	Venrock
0.175=29/166	Benchmark
0.173=9/52	SR One
0.172=29/169	Atlas Venture
0.171=33/193	Norwest Venture Partners - NVP
0.170=9/53	Sapphire Ventures
0.169=11/65	Ronald Conway
0.165=13/79	ARCH Venture Partners
0.164=10/61	SV Health Investors
0.159=14/88	Avalon Ventures


In [107]:
# global 투자자, 최소 200개의 seed 투자
check(global_investor_score_good, global_investor_score_all, MIN_SEED = 200)

42 investors with minimum of 200 seed investment
0.149=41/275	Greylock Partners
0.142=55/387	Kleiner Perkins Caufield & Byers
0.130=38/293	Bessemer Venture Partners
0.122=64/523	Accel Partners
0.097=23/237	Founder Collective
0.096=23/239	Redpoint
0.090=19/212	Matrix Partners
0.088=55/622	Sequoia Capital
0.086=31/359	Index Ventures
0.085=26/307	Lightspeed Venture Partners
0.083=23/277	Intel Capital
0.083=34/411	GV
0.082=43/527	New Enterprise Associates
0.081=47/580	SV Angel
0.079=26/331	Khosla Ventures
0.077=19/246	Felicis Ventures
0.068=17/251	Greycroft Partners
0.066=29/438	Andreessen Horowitz
0.062=27/433	First Round
0.061=14/231	RRE Ventures


In [108]:
# asia 투자자, 최소 3개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 3)

1020 investors with minimum of 3 seed investment
0.750=3/4	Proxima Ventures Ltd
0.667=2/3	econtext Asia and Beenos Asia
0.667=2/3	Nvidia
0.500=2/4	iNetworks 360
0.500=2/4	Vaizra Investments
0.500=2/4	CITIC Securities
0.500=3/6	Jerusalem Global Ventures
0.429=3/7	Kinnevik AB
0.429=3/7	Founder Collective
0.400=2/5	Benchmark
0.400=6/15	Giza Venture Capital
0.333=1/3	Rafi Gidron
0.333=1/3	China Minsheng Bank
0.333=1/3	Spark Capital
0.333=1/3	Rebate Networks
0.333=1/3	New China Capital Management
0.333=1/3	Heliant Ventures
0.333=1/3	Emerge
0.333=1/3	Orchid Asia Group Management
0.333=1/3	Arkin Holdings


In [109]:
# asia 투자자, 최소 5개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 5)

591 investors with minimum of 5 seed investment
0.500=3/6	Jerusalem Global Ventures
0.429=3/7	Kinnevik AB
0.429=3/7	Founder Collective
0.400=2/5	Benchmark
0.400=6/15	Giza Venture Capital
0.286=2/7	HV Holtzbrinck Ventures
0.273=3/11	Rhodium
0.250=2/8	Moshe Lichtman
0.227=5/22	83North
0.222=2/9	Glilot Capital Partners
0.222=4/18	Mitsubishi UFJ Capital
0.214=3/14	AddVenture
0.200=1/5	Index Ventures
0.200=1/5	CICC
0.200=1/5	Sovereign’s Capital
0.200=1/5	Oryzn Capital
0.200=1/5	Bigcolors
0.200=1/5	SoftBank Capital
0.200=2/10	Israel Cleantech Ventures (ICV)
0.200=1/5	CapitalG


In [110]:
# asia 투자자, 최소 10개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 10)

277 investors with minimum of 10 seed investment
0.400=6/15	Giza Venture Capital
0.273=3/11	Rhodium
0.227=5/22	83North
0.222=4/18	Mitsubishi UFJ Capital
0.214=3/14	AddVenture
0.200=3/15	Nokia Growth Partners (NGP)
0.200=2/10	Israel Cleantech Ventures (ICV)
0.200=2/10	Rocket Internet
0.188=3/16	Alibaba
0.167=2/12	Ventech
0.154=2/13	Horizons Ventures
0.154=2/13	PLUS Ventures
0.150=3/20	BlueRun Ventures
0.150=3/20	ITOCHU Technology Ventures
0.145=9/62	GGV Capital
0.143=2/14	Infinity Venture Partners
0.133=2/15	Zero2IPO Ventures
0.133=2/15	New Enterprise Associates
0.128=5/39	Legend Capital
0.125=2/16	Entree Capital


In [111]:
# asia 투자자, 최소 20개의 seed 투자
check(asia_investor_score_good, asia_investor_score_all, MIN_SEED = 20)

108 investors with minimum of 20 seed investment
0.227=5/22	83North
0.150=3/20	BlueRun Ventures
0.150=3/20	ITOCHU Technology Ventures
0.145=9/62	GGV Capital
0.128=5/39	Legend Capital
0.118=12/102	Accel Partners
0.103=3/29	SMBC Venture Capital
0.100=2/20	Baidu
0.098=5/51	Bessemer Venture Partners
0.097=6/62	IDG Ventures India
0.095=2/21	National Research Foundation
0.091=2/22	Kae Capital
0.091=2/22	Warburg Pincus
0.087=2/23	GMO VenturePartners
0.083=2/24	Mumbai Angels
0.083=3/36	Helion Venture Partners
0.083=3/36	OurCrowd-GCai
0.081=3/37	Magma Venture Partners
0.075=3/40	Incubate Fund
0.071=3/42	Intel Capital


In [119]:
word = 'iot'
# investment_criteria = ['A', 'B', 'C', 'D'], success_criteria = ['acquisition', 'ipo'], founded_after_this = '2007-01-01', founded_before_this = '2020-01-01',  asia_only = False, words = None, expand = False
specific_global_investor_score_good, specific_global_investor_score_all = get_investor_scores(investment_criteria, success_criteria, founded_after_this, '2020-01-01', False, word, False)
check(specific_global_investor_score_good, specific_global_investor_score_all, MIN_SEED = 2)

(228466, 34)
(798, 34)
There are 798 companies founded from 2007-01-01 to 2020-01-01
0 companies with late stage fundings
3 acquired for 1M+
3 IPOs
In total, 6 good companies
# good companies: 6 with ['acquisition', 'ipo']
# good / all early investments: 0 / 463
140 investors with minimum of 2 seed investment
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name


In [120]:
word = 'deep_learning'
specific_global_investor_score_good, specific_global_investor_score_all = get_investor_scores(investment_criteria, success_criteria, founded_after_this, '2020-01-01', False, word, False)
check(specific_global_investor_score_good, specific_global_investor_score_all, MIN_SEED = 2)

(228466, 34)
(203, 34)
There are 203 companies founded from 2007-01-01 to 2020-01-01
0 companies with late stage fundings
1 acquired for 1M+
0 IPOs
In total, 1 good companies
# good companies: 1 with ['acquisition', 'ipo']
# good / all early investments: 3 / 161
63 investors with minimum of 2 seed investment
1.000=2/2	Fuel Capital
0.667=2/3	Draper Fisher Jurvetson (DFJ)
0.667=2/3	AME Cloud Ventures
0.600=3/5	Allen & Company
0.500=1/2	Lux Capital
0.500=1/2	SV Angel
0.200=1/5	Data Collective
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name


In [121]:
word = 'drone'
specific_global_investor_score_good, specific_global_investor_score_all = get_investor_scores(investment_criteria, success_criteria, founded_after_this, '2020-01-01', False, word, False)
check(specific_global_investor_score_good, specific_global_investor_score_all, MIN_SEED = 2)

(228466, 34)
(244, 34)
There are 244 companies founded from 2007-01-01 to 2020-01-01
0 companies with late stage fundings
0 acquired for 1M+
3 IPOs
In total, 3 good companies
# good companies: 3 with ['acquisition', 'ipo']
# good / all early investments: 1 / 153
78 investors with minimum of 2 seed investment
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name
no investor name


In [None]:
'''
2. 유명한 투자자들과 인맥이 있을 것 같은 작은 투자자들을 살펴보자
'''

# get investors with high centrality
f = open(util_folder + 'investor_centrality_degree.pickle')
investors_high_central = pickle.load(f)
f.close()

In [None]:
# 유명한 회사들 찾기 1 (network centrality)
top25 = [uuid for uuid, value in investors_high_central[:25]]
top25_central = []
for i, uuid in enumerate(top25):
    print df_investors[df_investors['uuid'] == (uuid)]['investor_name'].values[0], investors_high_central[i][1]
    top25_central.append(uuid)

In [None]:
# 유명한 회사들 찾기 2. funding frequency
groups = df_investments.groupby('investor_uuid')['funding_round_uuid'].count().reset_index().sort_values(by = 'funding_round_uuid', ascending = False)#['investor_uuid'].apply(list)
count = 0
top25_funding = []
for index, row in groups.iterrows():
    uuid = row[0]
    fund_count = row[1]
    print df_investors[df_investors['uuid'] == (uuid)]['investor_name'].values[0], fund_count
    top25_funding.append(uuid)
    if count == 25: break
    count += 1

In [190]:
# 유명한 회사들과 공동 투자를 많이 한 회사를 찾아라
# 그 중 최소한 MIN 번 이상의 투자를 한 회사들
MIN = 10
early_criteria = []
                  
# get all companies founded after the time
companies = df_organizations['uuid']
print('There are {} companies'.format(len(companies)))

# get famous investors
top_investors = top25_central
top_investors.extend(top25_funding)
top_investors = list(set(top_investors))

co_funding_count = {}
funding_count = {}
for investor in top_investors:
    co_funding_count[investor] = {}

# get funding_round_uuid from top_investors from df_investments
funding_round_from_top_vc = df_investments[df_investments['investor_uuid'].isin(top_investors)]['funding_round_uuid'].values

# get company_uuid from funding_round_uuid from df_funding_rounds
companies_funded_by_top_vc = df_funding_rounds[df_funding_rounds['funding_round_uuid'].isin(funding_round_from_top_vc)]['company_uuid'].values

# get seed, A funding_round from company_uuid from funding_rounds
early_all_funding_rounds = pd.concat([df_funding_rounds[df_funding_rounds['funding_round_type'] == 'seed'],
                          df_funding_rounds[df_funding_rounds['funding_round_code'].isin(early_criteria)]], axis = 0).reset_index()['funding_round_uuid'].values
df = df_funding_rounds[df_funding_rounds['company_uuid'].isin(companies_funded_by_top_vc)]
early_good_funding_rounds = pd.concat([df[df['funding_round_type'] == 'seed'],
                          df[df['funding_round_code'].isin(early_criteria)]], axis = 0).reset_index()['funding_round_uuid'].values

# get counts from early_funding_rounds from df_investments
count = pd.concat([df_investments[df_investments['funding_round_uuid'].isin(early_all_funding_rounds)].groupby('investor_uuid')['funding_round_uuid'].count().rename('all_early'),
                   df_investments[df_investments['funding_round_uuid'].isin(early_good_funding_rounds)].groupby('investor_uuid')['funding_round_uuid'].count().rename('good_early')],
                  axis = 1).reset_index()
count = count.fillna(0)
count = count[count['all_early'] > MIN]
count = count.rename(columns = {'index': 'investor_uuid'})
count = count[~count['investor_uuid'].isin(top_investors)]
count['score'] = count['good_early'] / count['all_early']

count = count.merge(df_investors, left_on = 'investor_uuid', right_on = 'uuid')
count = count.sort_values(['score', 'all_early'], ascending = False)
count = count[['investor_name', 'all_early', 'good_early', 'score']]
print count.head(50)

There are 520408 companies
           investor_name  all_early  good_early     score
889      Chinaccelerator         76        76.0  1.000000
378               FOOD-X         42        42.0  1.000000
363             Wefunder         23        23.0  1.000000
406      Walter Winshall         23        23.0  1.000000
68             HAX Boost         22        22.0  1.000000
373              URBAN-X         19        19.0  1.000000
715           Ullas Naik         15        15.0  1.000000
969           Kal Vepuri         15        15.0  1.000000
307           Kevin Hale         14        14.0  1.000000
556       Maven Ventures         14        14.0  1.000000
100          Adrian Aoun         11        11.0  1.000000
659       Lunch Van Fund         11        11.0  1.000000
605            Indie Bio         87        86.0  0.988506
343        Paul Buchheit         81        80.0  0.987654
116           Start Fund         53        52.0  0.981132
807          Max Levchin         24        23

In [None]:
co_funding_prop = {}
print('TOP VC List')
for investor1 in top_investors:
    print('{}'.format(df_investors[df_investors['uuid'] == (investor1)]['investor_name'].values[0]))
    
print('\n\n')
for investor1 in top_investors:
    co_funding_prop[investor1] = {}
    for investor2, count in funding_count.items():
        if count >= MIN:
            if investor2 in co_funding_count[investor1]:
                co_funding_prop[investor1][investor2] = co_funding_count[investor1][investor2] / float(count)
    # sort
    score = co_funding_prop[investor1]
    score = sorted(score.items(), key=operator.itemgetter(1))
    score.reverse()
    print('TOP VC {}'.format(df_investors[df_investors['uuid'] == (investor1)]['investor_name'].values[0]))
    for key, value in score[:10]:
        name = df_investors[df_investors['uuid'] == (key)]['investor_name'].values[0]
        print('{}% {}: {}'.format(value * 100, funding_count[key], name))
    print('\n\n')