In [54]:
import pandas as pd
import numpy as np

## import and process deals.csv

In [55]:
deals = pd.read_csv('investmentsUK\deals.csv')    #import data
deals = deals.drop(columns=['Beauhurst company URL','Beauhurst deal URL','Amount raised (converted to GBP)'])    #drop useless information
deals = deals.rename(columns={'Verified investment amount (converted to GBP)':'Investment amount'})    #rename column
deals = deals.dropna()    #clean missing data rows
deals['Deal date'] = pd.to_datetime(deals['Deal date'],format='%Y-%m-%d')
deals = deals.sort_values(by=['Company name'])    #sort the data by company name
deals.head()

Unnamed: 0,Company name,Deal date,Investment amount
3866,10 Digital Media,2015-11-16,3398454.0
6303,1066 Airsoft,2016-05-31,23668.0
1395,14M Genomics,2014-12-16,5000000.0
6787,1855,2017-06-30,20000.0
8443,1st Impression,2013-11-22,75005.0


## import and process liquidity.csv

In [56]:
liquidity = pd.read_csv('investmentsUK\liquidity.csv')    #import data
liquidity = liquidity.drop(columns=['Beauhurst URL','Companies House Number'])    #drop useless information
liquidity = liquidity.rename(columns={"Date of the company's exit or death":"Exit date","Current Stage of Evolution":"Current stage"})    #rename column
# liquidity['Exit date'] = pd.to_datetime(liquidity['Exit date'],format='%d/%m/%Y')
# liquidity['Exit date'] = pd.to_datetime(liquidity['Exit date'],format='%Y-%m-%d')
liquidity = liquidity.sort_values(by=['Company name'])    #sort the data by company name
liquidity.head()
# liquidity.info()

Unnamed: 0,Company name,Incorporation date,Current stage,Exit date,Exit price
641,10 Digital Media,,Dead,27/11/2018,0.0
1020,10 Minutes With,19/03/2013,Dead,29/06/2017,0.0
3197,1066 Airsoft,13/05/2014,Dead,27/02/2019,0.0
1076,14M Genomics,09/01/2014,Dead,21/03/2016,0.0
3358,1855,27/02/2013,Dead,20/02/2020,0.0


## left merge deals.csv and liquidity.csv to get startups.csv

In [57]:
startups = pd.merge(deals, liquidity, on='Company name')    #left join deals.csv with liquidity.csv which have been processed
startups.loc[(startups['Current stage'] == "Dead") ,'Exit price'] = 0    #put zero in the 'Exit price' column if the startup is dead
startups = startups.dropna(subset=['Exit price'])    #clean data rows whose values in "Exit price" are missing
startups['Exit date'] = pd.to_datetime(startups['Exit date'],format='%d/%m/%Y')
startups['Exit date'] = pd.to_datetime(startups['Exit date'],format='%Y-%m-%d')
# startups['Incorporation date'] = pd.to_datetime(startups['Incorporation date'],format='%d/%m/%Y')
# startups['Incorporation date'] = pd.to_datetime(startups['Incorporation date'],format='%Y-%m-%d')
startups.head()
# startups.info()

Unnamed: 0,Company name,Deal date,Investment amount,Incorporation date,Current stage,Exit date,Exit price
0,10 Digital Media,2015-11-16,3398454.0,,Dead,2018-11-27,0.0
1,1066 Airsoft,2016-05-31,23668.0,13/05/2014,Dead,2019-02-27,0.0
2,14M Genomics,2014-12-16,5000000.0,09/01/2014,Dead,2016-03-21,0.0
3,1855,2017-06-30,20000.0,27/02/2013,Dead,2020-02-20,0.0
4,1st Impression,2013-11-22,75005.0,26/08/2009,Dead,2019-12-16,0.0


In [58]:
count_value=startups['Company name'].value_counts() #select those startups who have raised more than one round funding
print(count_value) 
# Length: 2833
count_value1=count_value[count_value>1]
print(count_value1) 
# Length: 1405
rounds_company=list(count_value1.index)
startups=startups[startups['Company name'].isin(rounds_company)]
startups = startups.sort_values(by=['Company name','Deal date']) 
startups.head()
# [4154 rows x 7 columns]

Houseology            13
NetThings             12
Streetlife            11
Microtest Matrices    11
TyresOnTheDrive        9
                      ..
Ellumia                1
Exclusive              1
That's Vapore          1
BioStraw               1
Cavalry                1
Name: Company name, Length: 2833, dtype: int64
Houseology            13
NetThings             12
Streetlife            11
Microtest Matrices    11
TyresOnTheDrive        9
                      ..
Onlicar                2
Microcosm              2
Trinity ICT            2
Shazam                 2
Slappie                2
Name: Company name, Length: 1405, dtype: int64


Unnamed: 0,Company name,Deal date,Investment amount,Incorporation date,Current stage,Exit date,Exit price
7,201 Software,2016-10-31,29999.0,04/09/2015,Dead,2019-07-23,0.0
6,201 Software,2017-03-15,29999.0,04/09/2015,Dead,2019-07-23,0.0
9,2BE Technology,2014-02-14,10000.0,18/12/2013,Dead,2019-10-01,0.0
8,2BE Technology,2015-04-02,80000.0,18/12/2013,Dead,2019-10-01,0.0
10,2BE Technology,2016-02-19,20000.0,18/12/2013,Dead,2019-10-01,0.0


In [59]:
startups.to_csv(r'startups.csv')    #export startups.csv

## import and process shares.csv to get investors.csv

In [60]:
shares = pd.read_csv('investmentsUK\shares.csv')    #import data
shares = shares.loc[:,['Company name','filing_date', 'first_name', 'last_name', 'number_of_shares','percent_total']]    #select and retain useful information
shares = shares.dropna(subset=['number_of_shares','last_name'])    
shares = shares[~shares['number_of_shares'].isin([0])]    #clean data rows whose values in "number_of_shares" or "last_name" are missing
shares['first_name'] = shares['first_name'].fillna('')    #replace Nan to "" in "first_name" column
shares['Investor name'] = shares['first_name'].str.cat(shares['last_name'], sep='_')    #splice two columns into "Investor name"
shares = shares.drop(columns=['first_name','last_name'])    #drop useless information
shares = shares.drop_duplicates(subset=None, keep='first', inplace=False)    #eliminate duplicate data rows
# shares['total_shares']=shares.number_of_shares/shares.percent_total*100
# shares.round({'total_shares': 2})
# shares = shares.dropna(subset=['total_shares']) 
shares['filing_date'] = pd.to_datetime(shares['filing_date'],format='%d/%m/%Y')
shares['filing_date'] = pd.to_datetime(shares['filing_date'],format='%Y-%m-%d')
shares = shares.sort_values(by=['Company name','filing_date','Investor name', 'number_of_shares'])    
#set Investor name as index and sort the data first by 'Company name' then by 'number_of_shares'
investors = shares.drop_duplicates(subset=['Company name','Investor name','number_of_shares'], keep='first', inplace=False)
#eliminate duplicate data rows whose number_of_shares make no change [179719 rows x 6 columns] --> [87229 rows x 6 columns]
investors.head()

Unnamed: 0,Company name,filing_date,number_of_shares,percent_total,Investor name
33100,10 Digital Media,2015-06-18,100.0,100.0,_SPORTLOBSTER SA
33103,10 Digital Media,2018-06-18,100.0,100.0,_CONSTELLATION SPORTS LIMITED
52887,10 Minutes With,2013-03-19,1000000.0,100.0,MANFREDI_DI CINTIO
52888,10 Minutes With,2014-03-19,96167.0,3.73,ANDREA_AUTERI
52899,10 Minutes With,2014-03-19,30000.0,1.164,ANTONIO_BELLONI


In [61]:
# investors.to_csv(r'investors.csv')    #export investors.csv

## Inner merge startups.csv and investors.csv
#### merge important features together on 'Computer name'

In [62]:
df = pd.merge(startups,investors,on='Company name',how='inner')
df = df[['Company name','Deal date','Investment amount','filing_date','Investor name','number_of_shares','percent_total','Current stage','Incorporation date','Exit date','Exit price']]
df = df.sort_values(by=['Company name','Investor name','Deal date','filing_date']) 
df=df.loc[(df['Deal date']<df['filing_date'] )]
# drop deals whose deal date cannot match with filing date [151030 rows x 12 columns] --> [117438 rows x 12 columns]
df.head()

Unnamed: 0,Company name,Deal date,Investment amount,filing_date,Investor name,number_of_shares,percent_total,Current stage,Incorporation date,Exit date,Exit price
1,201 Software,2016-10-31,29999.0,2017-09-03,ANDREW_MORSON,406.0,2.056,Dead,04/09/2015,2019-07-23,0.0
14,201 Software,2017-03-15,29999.0,2017-09-03,ANDREW_MORSON,406.0,2.056,Dead,04/09/2015,2019-07-23,0.0
2,201 Software,2016-10-31,29999.0,2017-09-03,CHRISTOPHER JOHN_LARGE,1524.0,7.716,Dead,04/09/2015,2019-07-23,0.0
15,201 Software,2017-03-15,29999.0,2017-09-03,CHRISTOPHER JOHN_LARGE,1524.0,7.716,Dead,04/09/2015,2019-07-23,0.0
3,201 Software,2016-10-31,29999.0,2017-09-03,DARREN_FRIEND,406.0,2.056,Dead,04/09/2015,2019-07-23,0.0


In [63]:
df=df.drop_duplicates(subset=['filing_date','Investor name','Company name'], keep='last') 
#match 'filing_date' with the closest 'Deal date' [117438 rows x 12 columns] --> [38108 rows x 12 columns]
df.head()

Unnamed: 0,Company name,Deal date,Investment amount,filing_date,Investor name,number_of_shares,percent_total,Current stage,Incorporation date,Exit date,Exit price
14,201 Software,2017-03-15,29999.0,2017-09-03,ANDREW_MORSON,406.0,2.056,Dead,04/09/2015,2019-07-23,0.0
15,201 Software,2017-03-15,29999.0,2017-09-03,CHRISTOPHER JOHN_LARGE,1524.0,7.716,Dead,04/09/2015,2019-07-23,0.0
16,201 Software,2017-03-15,29999.0,2017-09-03,DARREN_FRIEND,406.0,2.056,Dead,04/09/2015,2019-07-23,0.0
17,201 Software,2017-03-15,29999.0,2017-09-03,DAVID_EDWARDS,482.0,2.44,Dead,04/09/2015,2019-07-23,0.0
18,201 Software,2017-03-15,29999.0,2017-09-03,JAKE_WORRALL,1498.0,7.584,Dead,04/09/2015,2019-07-23,0.0


In [66]:
df.to_csv(r'df.csv')    #export df.csv

## Split Always-Follow & Never-Follow Investors

In [67]:
set_n = df[df.duplicated(subset=['Company name','Investor name'], keep=False)==False] #Never-Follow df' Deals
set_n = set_n.sort_values(by=['Company name','Investor name','Deal date','filing_date']) 
# set_n.head()
# [25614 rows x 12 columns]
set_a = df[df.duplicated(subset=['Company name','Investor name'], keep=False)==True] #Always-Follow df' Deals
set_a = set_a.sort_values(by=['Company name','Investor name','Deal date','filing_date']) 
set_a.head()
# [12494 rows x 12 columns]

Unnamed: 0,Company name,Deal date,Investment amount,filing_date,Investor name,number_of_shares,percent_total,Current stage,Incorporation date,Exit date,Exit price
215,3-Cs,2013-05-01,333882.0,2013-06-29,A._BOUMAN,11250.0,0.554,Dead,29/06/2000,2018-10-18,0.0
306,3-Cs,2014-04-29,117250.0,2014-06-29,A._BOUMAN,21250.0,0.866,Dead,29/06/2000,2018-10-18,0.0
216,3-Cs,2013-05-01,333882.0,2013-06-29,BARBARA_WEST,70000.0,3.444,Dead,29/06/2000,2018-10-18,0.0
365,3-Cs,2014-12-10,196000.0,2015-06-29,BARBARA_WEST,90000.0,3.158,Dead,29/06/2000,2018-10-18,0.0
224,3-Cs,2013-05-01,333882.0,2013-06-29,DAVID B._FRANSEN,102500.0,5.044,Dead,29/06/2000,2018-10-18,0.0


In [68]:
# set_n.to_csv(r'set_n.csv')    
# set_a.to_csv(r'set_a.csv')    

## Calculate how many shares have been issued each round 

In [69]:
shar = shares.drop_duplicates(subset=['filing_date','Company name'], keep='last') 
shar['total_shares'] = shar['number_of_shares']*100/shar['percent_total'] 
#Calculate the cumulative number of shares issued by the startup firms in each funding round
shar.round({'total_shares': 2})
shar = shar.loc[:,['Company name','filing_date','total_shares']]
length_column = shar.shape[0]
shar['round_shares'] = ''
for i in range(0,length_column):
    if shar['Company name'].iloc[i] == shar['Company name'].iloc[i-1]:
        shar['round_shares'].iloc[i] = shar['total_shares'].iloc[i]-shar['total_shares'].iloc[i-1]
    else:
        shar['round_shares'].iloc[i] = shar['total_shares'].iloc[i]
shar = shar[~shar['round_shares'].isin([0])]  
#Calculate the number of shares issued separately by the startup in each funding round
shar.head()
#[14982 rows x 4 columns]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shar['total_shares'] = shar['number_of_shares']*100/shar['percent_total']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
  shar['round_shares'].iloc[i] = shar['total_shares'].iloc[i]-shar['total_shares'].iloc[i-1]


Unnamed: 0,Company name,filing_date,total_shares,round_shares
33100,10 Digital Media,2015-06-18,100.0,100.0
52887,10 Minutes With,2013-03-19,1000000.0,1000000.0
52902,10 Minutes With,2014-03-19,2578034.0,1578033.838973
52938,10 Minutes With,2015-03-19,3044543.0,466508.887294
52976,10 Minutes With,2016-03-19,3246753.0,202210.520486


In [70]:
length_column = shar.shape[0]
shar['rounds'] = ''
for i in range(0,length_column):
    if shar['Company name'].iloc[i] == shar['Company name'].iloc[i-1]:
        shar['rounds'].iloc[i] = shar['rounds'].iloc[i-1]+1
    else:
        shar['rounds'].iloc[i] = 1
shar.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,Company name,filing_date,total_shares,round_shares,rounds
33100,10 Digital Media,2015-06-18,100.0,100.0,1
52887,10 Minutes With,2013-03-19,1000000.0,1000000.0,1
52902,10 Minutes With,2014-03-19,2578034.0,1578033.838973,2
52938,10 Minutes With,2015-03-19,3044543.0,466508.887294,3
52976,10 Minutes With,2016-03-19,3246753.0,202210.520486,4


In [71]:
shar.to_csv(r'shar.csv')

## Calculate the final value of each startup

In [72]:
exit_value = shar.drop(columns='round_shares')
exit_value = exit_value.drop_duplicates(subset='Company name', keep='last')
exit_value = exit_value.rename(columns={'total_shares':'exit_shares'})
#Calculate the cumulative number of shares issued by the company at the time of investor exit
exit_value = exit_value.drop(columns='filing_date')
shar = shar.drop(columns='rounds')
shar = pd.merge(shar, exit_value, on='Company name',how='left')
# shar.info() [14982 rows x 5 columns]
shar.head()

Unnamed: 0,Company name,filing_date,total_shares,round_shares,exit_shares,rounds
0,10 Digital Media,2015-06-18,100.0,100.0,100.0,1
1,10 Minutes With,2013-03-19,1000000.0,1000000.0,3246753.0,4
2,10 Minutes With,2014-03-19,2578034.0,1578033.838973,3246753.0,4
3,10 Minutes With,2015-03-19,3044543.0,466508.887294,3246753.0,4
4,10 Minutes With,2016-03-19,3246753.0,202210.520486,3246753.0,4


In [73]:
shar.to_csv(r'shar.csv')

## Left merge shar.csv and set_n.csv 
#### Calculate TVPI of Never-Follow Investors / set_nn.csv

In [74]:
set_nn = pd.merge(set_n, shar, on=['Company name','filing_date'],how='left') 
set_nn = set_nn[~set_nn['round_shares'].isin([0])]    #Clear NaN 37023 --> 35288
set_nn['total_value'] = set_nn['Exit price']/set_nn['exit_shares']*set_nn['number_of_shares']
set_nn['paid_in'] = set_nn['Investment amount']/set_nn['round_shares']*set_nn['number_of_shares']
set_nn = set_nn.loc[(set_nn['paid_in']>0)]    #drop the data which has dumped shares during funding
set_nn['TVPI'] = set_nn['total_value']/set_nn['paid_in']
set_nn = set_nn.sort_values(by='TVPI',ascending=False)
set_nn.head()
# set_nn.info()

Unnamed: 0,Company name,Deal date,Investment amount,filing_date,Investor name,number_of_shares,percent_total,Current stage,Incorporation date,Exit date,Exit price,total_shares,round_shares,exit_shares,rounds,total_value,paid_in,TVPI
18712,Shazam,2014-03-10,6007666.0,2017-07-27,CLAUS_NAHMZOW,4173546.0,0.134,Exited,22/05/2000,2017-12-11,300000000.0,3109453000.0,3109452736.318408,1.0,3,1252064000000000.0,8063.563762,155274248084.95053
18711,Shazam,2014-03-10,6007666.0,2017-07-27,CHRIS_BORTON,35000000.0,1.126,Exited,22/05/2000,2017-12-11,300000000.0,3109453000.0,3109452736.318408,1.0,3,1.05e+16,67622.288496,155274248084.95053
18820,Shazam,2014-03-10,6007666.0,2017-07-27,_RAULT-WANG 2011 FAMILY TRUST,28000000.0,0.901,Exited,22/05/2000,2017-12-11,300000000.0,3109453000.0,3109452736.318408,1.0,3,8400000000000000.0,54097.830797,155274248084.95053
18821,Shazam,2014-03-10,6007666.0,2017-07-27,_SANFORD LUM REVOCABLE TRUST,15664569.0,0.504,Exited,22/05/2000,2017-12-11,300000000.0,3109453000.0,3109452736.318408,1.0,3,4699371000000000.0,30264.971545,155274248084.95053
18822,Shazam,2014-03-10,6007666.0,2017-07-27,_SERCOTEL S A DE C V,335655778.0,10.796,Exited,22/05/2000,2017-12-11,300000000.0,3109453000.0,3109452736.318408,1.0,3,1.006967e+17,648508.91015,155274248084.95053


In [75]:
set_nn.to_csv(r'set_nn.csv')

## Left merge shar.csv and set_a.csv 
#### Calculate TVPI of Always-Follow Investors / set_aa.csv

In [76]:
set_aa = pd.merge(set_a, shar, on=['Company name','filing_date'],how='left') 
length_column = set_aa.shape[0]
set_aa['add_shares'] = ''
for i in range(0,length_column):
    if set_aa['Investor name'].iloc[i] == set_aa['Investor name'].iloc[i-1]:
        set_aa['add_shares'].iloc[i] = set_aa['number_of_shares'].iloc[i]-set_aa['number_of_shares'].iloc[i-1]
    else:
        set_aa['add_shares'].iloc[i] = set_aa['number_of_shares'].iloc[i]
#calculate the number of increased shares purchased by investors per funding round
set_aa.head()
# set_aa.info()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,Company name,Deal date,Investment amount,filing_date,Investor name,number_of_shares,percent_total,Current stage,Incorporation date,Exit date,Exit price,total_shares,round_shares,exit_shares,rounds,add_shares
0,3-Cs,2013-05-01,333882.0,2013-06-29,A._BOUMAN,11250.0,0.554,Dead,29/06/2000,2018-10-18,0.0,2032520.0,2032520.325203,2849003.0,3,11250.0
1,3-Cs,2014-04-29,117250.0,2014-06-29,A._BOUMAN,21250.0,0.866,Dead,29/06/2000,2018-10-18,0.0,2453988.0,421467.404858,2849003.0,3,10000.0
2,3-Cs,2013-05-01,333882.0,2013-06-29,BARBARA_WEST,70000.0,3.444,Dead,29/06/2000,2018-10-18,0.0,2032520.0,2032520.325203,2849003.0,3,70000.0
3,3-Cs,2014-12-10,196000.0,2015-06-29,BARBARA_WEST,90000.0,3.158,Dead,29/06/2000,2018-10-18,0.0,2849003.0,395015.118941,2849003.0,3,20000.0
4,3-Cs,2013-05-01,333882.0,2013-06-29,DAVID B._FRANSEN,102500.0,5.044,Dead,29/06/2000,2018-10-18,0.0,2032520.0,2032520.325203,2849003.0,3,102500.0


In [80]:
set_aa['each_paid_in'] = set_aa['Investment amount']/set_aa['round_shares']*set_aa['add_shares']
length_column = set_aa.shape[0]
set_aa['paid_in'] = ''
for i in range(0,length_column):
    if set_aa['Investor name'].iloc[i] == set_aa['Investor name'].iloc[i-1]:
        set_aa['paid_in'].iloc[i] = set_aa['each_paid_in'].iloc[i]+set_aa['paid_in'].iloc[i-1]
    else:
        set_aa['paid_in'].iloc[i] = set_aa['each_paid_in'].iloc[i]
#calculate the cumulative paid-in of investors
set_aa.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,Company name,Deal date,Investment amount,filing_date,Investor name,number_of_shares,percent_total,Current stage,Incorporation date,Exit date,Exit price,total_shares,round_shares,exit_shares,rounds,add_shares,each_paid_in,paid_in
1,3-Cs,2014-04-29,117250.0,2014-06-29,A._BOUMAN,21250.0,0.866,Dead,29/06/2000,2018-10-18,0.0,2453988.0,421467.404858,2849003.0,3,10000.0,2781.947041,2781.947041
3,3-Cs,2014-12-10,196000.0,2015-06-29,BARBARA_WEST,90000.0,3.158,Dead,29/06/2000,2018-10-18,0.0,2849003.0,395015.118941,2849003.0,3,20000.0,9923.670796,9923.670796
5,3-Cs,2014-12-10,196000.0,2015-06-29,DAVID B._FRANSEN,152500.0,5.351,Dead,29/06/2000,2018-10-18,0.0,2849003.0,395015.118941,2849003.0,3,50000.0,24809.176991,24809.176991
8,3-Cs,2014-12-10,196000.0,2015-06-29,GERRIT_BOUMAN,977480.0,34.3,Dead,29/06/2000,2018-10-18,0.0,2849003.0,395015.118941,2849003.0,3,261000.0,129503.903894,129503.903894
10,3-Cs,2014-04-29,117250.0,2014-06-29,K._CLARK,47600.0,1.939,Dead,29/06/2000,2018-10-18,0.0,2453988.0,421467.404858,2849003.0,3,20000.0,5563.894083,5563.894083


In [77]:
set_aa.to_csv(r'set_aa_copy.csv')  

In [81]:
set_aa = set_aa.drop_duplicates(subset=['Company name','Investor name'], keep='last') 
set_aa = set_aa.drop(columns='each_paid_in')
set_aa = set_aa.loc[(set_aa['paid_in']>0)]    ##drop the data which has dumped shares during funding 6589-->5834
set_aa['total_value'] = set_aa['Exit price']/set_aa['exit_shares']*set_aa['number_of_shares']
set_aa['TVPI'] = set_aa['total_value']/set_aa['paid_in']
# set_aa['paid_in'] = pd.to_numeric(set_aa['paid_in'], errors='coerce')
# set_aa['total_value'] = pd.to_numeric(set_aa['total_value'], errors='coerce')
# set_aa['TVPI'] = pd.to_numeric(set_aa['TVPI'], errors='coerce')
# set_aa.round(2)
set_aa = set_aa.sort_values(by='TVPI',ascending=False)
set_aa.head()

Unnamed: 0,Company name,Deal date,Investment amount,filing_date,Investor name,number_of_shares,percent_total,Current stage,Incorporation date,Exit date,Exit price,total_shares,round_shares,exit_shares,rounds,add_shares,paid_in,total_value,TVPI
7166,Open Energi,2011-07-21,4000003.0,2018-09-01,ALEX_ROSSI,166124.0,0.0,Exited,09/09/1999,2011-07-28,4000000.0,55440050000.0,54589247105.965996,55439420000.0,4,1.0,7.3e-05,11.98598,163576.32669
7204,Open Energi,2011-07-21,4000003.0,2018-09-01,SIMON_AHMED,130010.0,0.0,Exited,09/09/1999,2011-07-28,4000000.0,55440050000.0,54589247105.965996,55439420000.0,4,1.0,7.3e-05,9.380328,128016.170048
1922,Chiltern,2014-06-13,204299.0,2020-05-23,_COVANCE INC,10.0,100.0,Exited,31/12/1982,2017-07-31,909221093.0,10.0,2.0,10.0,4,2.0,204299.0,909221100.0,4450.443189
6887,NewVoiceMedia,2018-03-27,116512.0,2018-06-28,MARK_DUNN,107000.0,0.053,Exited,23/07/1998,2018-09-20,263749447.1,203086700.0,13878037.656986,203952700.0,3,6000.0,50.372539,138371.2,2746.957613
1928,City Pantry,2018-01-13,42336.0,2018-03-13,HENRY ASHTON_CROSBY,143409.0,4.657,Exited,13/03/2013,2019-07-12,16000000.0,3078815.0,849602.828572,5081639.0,3,10688.0,532.586701,451536.2,847.817264


In [82]:
set_aa.to_csv(r'set_aa.csv')  

## Drop exceptional data

In [83]:
# set_never = set_nn[~set_nn['Company name'].isin(['Shazam','Chiltern','Cobalt Light Systems'])]
# set_always = set_aa[~set_aa['Company name'].isin(['Shazam','Chiltern'])]

In [84]:
set_never = set_nn.loc[(set_nn['TVPI']<300)]
set_always = set_aa.loc[(set_aa['TVPI']<300)]

In [85]:
set_never.to_csv(r'set_never.csv')  

In [86]:
set_always.to_csv(r'set_always.csv')  