# SETUP

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import geopy
import plotly.express as px
import ast
import json
from pd_replicator import replicator
from datetime import datetime
import gc

  from scipy.stats import gaussian_kde


In [3]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('mode.use_inf_as_na', True)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)
sns.set(rc={'figure.figsize':(10,6)})
# Graphics in SVG format are more sharp and legible
%config InlineBackend.figure_format = 'svg'

  pd.set_option('mode.use_inf_as_na', True)


In [4]:
import warnings

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)


In [1]:
print("This is a change we are making")

This is a change we are making


# FUNCTION AND CONSTANT DEFINITIONS

<span style="color:blue"> **REMEMBER TO CHANGE THIS FOR EACH REPORT** </span>

This_mth is usually the last month for which you want the data for


In [5]:
this_mth = '2025-07'
this_mth_minus_12 = '2024-08'
this_mth_minus_24 = '2023-08'
current_year = 2025

In [6]:
pctiles = [0.05,0.1,0.25,0.5,0.75,0.9,0.95] 


**Define functions being used**

In [7]:
def to_1D(series):
 return pd.Series([x for _list in series for x in _list])

In [8]:
def percentile(n):
    def percentile_(x):
        return np.nanpercentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

# IMPORT DATA

<span style="color:orange"> **SID TO DO LATER: CHANGE THE CSV NAMES FOR IMPORTING LATER** </span>


### Importing Raw Data

These are raw data files downloaded as csv from Snowflake

In [9]:
#List of filenames and corresponding variable names

filenames = ['assets_sf.csv', 'deals_sf.csv', 'investors_sf.csv','assets_all_automated.csv']
variable_names = ['assets', 'deals','investors','assets_automated']

# #Loop through filenames and read each one into a variable
for filename, varname in zip(filenames, variable_names):
   globals()[varname] = pd.read_csv(filename)


  globals()[varname] = pd.read_csv(filename)


### Importing Mapping files

These are created locally in the data folder

In [10]:
# List of filenames and corresponding variable names
filenames = ['country_names.csv', 'sector_mapping.csv', 'owner_pct_mapping.csv','deal_pct_mapping.csv']
variable_names = ['country_names', 'sector_mapping','owner_pct_mapping','deal_pct_mapping']

# Loop through filenames and read each one into a variable
for filename, varname in zip(filenames, variable_names):
    globals()[varname] = pd.read_csv(filename)

### Importing Override Files

These are manual overrides files for the investor ranking table where we think EV is wrongly calculated

In [11]:
# List of filenames and corresponding variable names
filenames = ['investor_ranking_override.csv','investor_ranking_override_us.csv']
variable_names = ['investor_ranking_override','investor_ranking_override_us']

# Loop through filenames and read each one into a variable
for filename, varname in zip(filenames, variable_names):
    globals()[varname] = pd.read_csv(filename)

# DATA PREPERATION

## Assets Data Prep

In [12]:
assets['ownership'].value_counts()

ownership
listed            46513
private           14419
regular           13414
ventureCapital     9891
minority           6647
subsidiary         3126
other               572
bankrupt            322
government          109
Name: count, dtype: int64

### Coverting to List from Str

In [13]:

assets['owner_ids'] = assets['owner_ids'].fillna('[]')
assets['owner_ids']=  assets['owner_ids'].apply(eval)  


assets['owner_names'] = assets['owner_names'].fillna('[]')
assets['owner_names'] =  assets['owner_names'].apply(eval)  


assets['owner_shares'] = assets['owner_shares'].fillna('[]')
assets['owner_shares'] =  assets['owner_shares'].apply(eval)  

### Converting to USD

In [14]:
USD_TO_EUR = assets[assets['currency'] == 'USD']['currency_to_eur'].value_counts(dropna=False).idxmax()
USD_TO_EUR

np.float64(0.855513)

In [15]:
assets['revenue_fte_ratio_usd'] = assets['revenue_fte_ratio_eur'] / USD_TO_EUR
assets['revenue_usd'] = assets['revenue_eur'] / USD_TO_EUR
assets['revenue_with_ai_generated_usd'] = assets['revenue_with_ai_generated_eur'] / USD_TO_EUR
assets['ebitda_usd'] = assets['ebitda_eur'] / USD_TO_EUR

### Mapping to regions

In [16]:
#we rename "region" column to "country_code" so it does not interfere with "region" column in "country_names" dataset
assets = assets.rename(columns={"region":"country_code"})
#then we merge "country_names" dataset with "deals" dataset so we have regions (US, Europe) colum appended
assets = pd.merge(assets, country_names, left_on = 'country_code', right_on = 'country_code', how="left")




In [17]:
assets_automated['asset_id'] = assets_automated['id']
assets_automated = assets_automated.rename(columns={"region":"country_code"})
assets_automated = pd.merge(assets_automated, country_names, left_on = 'country_code', right_on = 'country_code', how="left")


In [18]:
region_mapping = {
    "West": [
        "California", "Oregon", "Washington", "Hawaii", "Alaska",  
        "Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming"  
    ],
    "Midwest": [
        "Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", 
        "Iowa", "Kansas", "Minnesota", "Missouri", "Nebraska", "North Dakota", "South Dakota"  
    ],
    "South": [
        "Delaware", "District of Columbia", "Florida", "Georgia", "Maryland", "North Carolina", "South Carolina", "Virginia", "West Virginia", 
        "Alabama", "Kentucky", "Mississippi", "Tennessee", 
        "Arkansas", "Louisiana", "Oklahoma", "Texas"  
    ],
    "Northeast": [
        "Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont",  
        "New Jersey", "New York", "Pennsylvania"  
    ]
}

def map_us_region(state):
    for region, states in region_mapping.items():
        if state in states:
            return region
    return "Other"  # Fallback in case a state isn't found

In [19]:
#Note this was changed from headquarters_region_map before

assets['us_sub_region_map'] = assets['headquarters_region'].apply(map_us_region)

In [20]:
# This is old mapping for the US. Keeping it here if we need it for later.

'''
region_mapping = {
    "West Coast": ["California", "Oregon", "Washington", "Hawaii"],
    "Mountain": ["Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", "Alaska"],
    "Midwest": ["Iowa", "Kansas", "Missouri", "Nebraska", "North Dakota", "South Dakota", "Indiana"],
    "Great Lakes": ["Illinois", "Michigan", "Minnesota", "Ohio", "Wisconsin"],
    "New England": ["Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont"],
    "Mid-Atlantic": ["Delaware", "District of Columbia", "Maryland", "New Jersey", "New York", "Pennsylvania", "Virginia"],
    "South": ["Arkansas", "Kentucky", "Louisiana", "Oklahoma", "Tennessee", "Texas", "West Virginia"],
    "Southeast": ["Alabama", "Florida", "Georgia", "Mississippi", "North Carolina", "Puerto Rico", "South Carolina"]
}

   '''

'\nregion_mapping = {\n    "West Coast": ["California", "Oregon", "Washington", "Hawaii"],\n    "Mountain": ["Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", "Alaska"],\n    "Midwest": ["Iowa", "Kansas", "Missouri", "Nebraska", "North Dakota", "South Dakota", "Indiana"],\n    "Great Lakes": ["Illinois", "Michigan", "Minnesota", "Ohio", "Wisconsin"],\n    "New England": ["Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont"],\n    "Mid-Atlantic": ["Delaware", "District of Columbia", "Maryland", "New Jersey", "New York", "Pennsylvania", "Virginia"],\n    "South": ["Arkansas", "Kentucky", "Louisiana", "Oklahoma", "Tennessee", "Texas", "West Virginia"],\n    "Southeast": ["Alabama", "Florida", "Georgia", "Mississippi", "North Carolina", "Puerto Rico", "South Carolina"]\n}\n\n   '

### Extracting Yearly Financial Metrics

Revenue, EBITDA etc.

In [21]:
# Metrics and their corresponding type arrays (e.g. dicslosed or verified or estimate)

metrics_with_type = {
    'revenue': 'revenue_type_array',
    'ebitda': 'ebitda_type_array',
     'ebit': 'ebit_type_array',
    'eps': 'eps_type_array',
    'fte': 'fte_type_array',
    'net_income': 'net_income_type_array'
}

# All metrics to parse
metrics = [
    'revenue', 
    'ebitda',
    'capex', 
   # 'capital', 
   # 'cash', 
    'debt',
    'net_debt', 
    'gross_margin', 
   # 'inventories', 
   # 'payables',
   # 'receivables', 
    'free_cash_flow', 
    'net_income', 
    'eps', 
    'fte'
]

# Helper function to parse JSON safely
def safe_json_loads(x):
    if isinstance(x, str):
        try:
            return json.loads(x)
        except json.JSONDecodeError:
            return []
    return x

# Apply JSON parsing
assets['years_array'] = assets['years_array'].fillna('[]').apply(safe_json_loads)


for metric in metrics:
    assets[f'{metric}_array'] = assets[f'{metric}_array'].fillna('[]').apply(safe_json_loads)

for metric, type_field in metrics_with_type.items():
    assets[type_field] = assets[type_field].fillna('[]').apply(safe_json_loads)

# Define years to extract
years = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]

# Build per-year columns with filtering on type
for metric in metrics:
    for year in years:
        col_name = f"{metric}_{year}"
        if metric in metrics_with_type:
            type_field = metrics_with_type[metric]
            assets[col_name] = assets.apply(
                lambda row: float(row[f"{metric}_array"][row['years_array'].index(str(year))])
                if (
                    str(year) in row['years_array']
                    and row[f"{metric}_array"][row['years_array'].index(str(year))] not in [None, "", "null"]
                    and row[type_field][row['years_array'].index(str(year))] in ['disclosed', 'verifiedSource']
                )
                else np.nan,
                axis=1
            )
        else:
            assets[col_name] = assets.apply(
                lambda row: float(row[f"{metric}_array"][row['years_array'].index(str(year))])
                if str(year) in row['years_array'] and row[f"{metric}_array"][row['years_array'].index(str(year))] not in [None, "", "null"]
                else np.nan,
                axis=1
            )

### Converting Financials to EUR

In [22]:
year_cols = [
    col for col in assets.columns 
    if any(str(y) in col for y in years) and not col.startswith("fte_")
]

for col in year_cols:
    assets[col] = assets[col] * assets["currency_to_eur"]


In [23]:
year_cols

['revenue_2013',
 'revenue_2014',
 'revenue_2015',
 'revenue_2016',
 'revenue_2017',
 'revenue_2018',
 'revenue_2019',
 'revenue_2020',
 'revenue_2021',
 'revenue_2022',
 'revenue_2023',
 'revenue_2024',
 'ebitda_2013',
 'ebitda_2014',
 'ebitda_2015',
 'ebitda_2016',
 'ebitda_2017',
 'ebitda_2018',
 'ebitda_2019',
 'ebitda_2020',
 'ebitda_2021',
 'ebitda_2022',
 'ebitda_2023',
 'ebitda_2024',
 'capex_2013',
 'capex_2014',
 'capex_2015',
 'capex_2016',
 'capex_2017',
 'capex_2018',
 'capex_2019',
 'capex_2020',
 'capex_2021',
 'capex_2022',
 'capex_2023',
 'capex_2024',
 'debt_2013',
 'debt_2014',
 'debt_2015',
 'debt_2016',
 'debt_2017',
 'debt_2018',
 'debt_2019',
 'debt_2020',
 'debt_2021',
 'debt_2022',
 'debt_2023',
 'debt_2024',
 'net_debt_2013',
 'net_debt_2014',
 'net_debt_2015',
 'net_debt_2016',
 'net_debt_2017',
 'net_debt_2018',
 'net_debt_2019',
 'net_debt_2020',
 'net_debt_2021',
 'net_debt_2022',
 'net_debt_2023',
 'net_debt_2024',
 'gross_margin_2013',
 'gross_margin_201

### Adding yearly Net Debt / EBITDA, Capex / Sales

In [24]:

for year in years:
    year_str = str(year)

    # Only calculate net debt / ebitda where ebitda is positive
    ebitda_col = f'ebitda_{year_str}'
    net_debt_col = f'net_debt_{year_str}'
    nd_by_ebitda_col = f'net_debt_by_ebitda_{year_str}'

    mask = assets[ebitda_col] > 0
    assets.loc[mask, nd_by_ebitda_col] = assets.loc[mask, net_debt_col] / assets.loc[mask, ebitda_col]

    # Calculate capex to sales (revenue) ratio
    capex_col = f'capex_{year_str}'
    revenue_col = f'revenue_{year_str}'
    capex_to_sales_col = f'capex_to_sales_{year_str}'

    assets[capex_to_sales_col] = assets[capex_col] / assets[revenue_col]



#for year in years:
 #       assets.loc[assets['ebitda_' + str(year)] > 0,'net_debt_by_ebitda_' + str(year)] = assets['net_debt_' + str(year)] / assets['ebitda_' + str(year)]
  #      assets.loc[:,'capex_to_sales_' + str(year)] = assets['capex_' + str(year)] / assets['revenue_' + str(year)]

Last reported value

In [25]:
assets['capex_to_sales']=assets['capex_eur']/assets['revenue_eur']

### Adding YoY numbers

In [26]:
metric_for_growth = 'revenue'

# Generate year-over-year pairs from 2013 to 2025
year_pairs = [(y1, y0) for y0, y1 in zip(range(2013, 2024), range(2014, 2025))]

# Calculate change columns
for y1, y0 in year_pairs:
    col_name = f"{metric_for_growth}_chg_{y1}_{y0}"
    assets[col_name] = assets[f"{metric_for_growth}_{y1}"] / assets[f"{metric_for_growth}_{y0}"]

### Adding CAGRs

In [27]:
metrics_for_CAGR = ['revenue', 'ebitda','fte']

for metric in metrics_for_CAGR:
    assets[f'{metric}_chg_2022_2017'] = (assets[f'{metric}_2022'] / assets[f'{metric}_2017'])**(1/5) - 1
    assets[f'{metric}_chg_2023_2018'] = (assets[f'{metric}_2023'] / assets[f'{metric}_2018'])**(1/5) - 1
    assets[f'{metric}_chg_2022_2019'] = (assets[f'{metric}_2022'] / assets[f'{metric}_2019'])**(1/3) - 1
    assets[f'{metric}_chg_2023_2020'] = (assets[f'{metric}_2023'] / assets[f'{metric}_2020'])**(1/3) - 1
    assets[f'{metric}_chg_2023_2019'] = (assets[f'{metric}_2023'] / assets[f'{metric}_2019'])**(1/4) - 1
    assets[f'{metric}_chg_2023_2021'] = (assets[f'{metric}_2023'] / assets[f'{metric}_2021'])**(1/2) - 1
    assets[f'{metric}_chg_2024_2019'] = (assets[f'{metric}_2024'] / assets[f'{metric}_2019'])**(1/5) - 1

### Adding EBITDA margins

In [28]:
ebitda_years = list(range(2013, 2025))  # 2013 to 2024 inclusive

for year in ebitda_years:
    revenue_col = f'revenue_{year}'
    ebitda_col = f'ebitda_{year}'
    margin_col = f'EBITDA_Margin_{year}'

    assets[margin_col] = assets.apply(
        lambda row: row[ebitda_col] / row[revenue_col]
        if pd.notnull(row[ebitda_col]) and pd.notnull(row[revenue_col]) and row[revenue_col] != 0
        else np.nan,
        axis=1
    )

### Adding revenue, EBITDA and FTE for NaN datapoints

<span style="color:orange"> **LATER: SID TO CHECK THIS CODE AND SEE HOW MUCH IT IMPACTS** </span>


Check the code once and we are not using EBITDA code for now

In [29]:
# Step 1: Parse average FTE from fte_range
def parse_avg_fte(rng):
    try:
        low, high = map(float, rng.split('-'))
        return (low + high) / 2
    except:
        return np.nan

# Fill missing FTE using fte_range
assets['fte_range_avg'] = assets['fte_range'].apply(parse_avg_fte)
assets['estimated_fte'] = assets['fte']
assets.loc[assets['estimated_fte'].isna(), 'estimated_fte'] = assets['fte_range_avg']

# Step 2: Compute sub-sector level median revenue per FTE
sub_sector_median_revenue_per_fte = (
    assets
    .drop(columns=['subsector'])  # Exclude the grouping column from the apply input
    .groupby(assets['subsector'], group_keys=False)
    .apply(lambda df: (df['revenue'] / df['estimated_fte'])[df['estimated_fte'] > 0].median())
    .to_dict()
)



# Step 3: Define logic to estimate revenue
def estimate_revenue(row):
    if pd.notnull(row['revenue']):
        return row['revenue']
    elif pd.notnull(row['revenue_with_ai_generated']):
        return row['revenue_with_ai_generated']
    elif pd.notnull(row['estimated_fte']) and row['subsector'] in sub_sector_median_revenue_per_fte:
        return row['estimated_fte'] * sub_sector_median_revenue_per_fte[row['subsector']]
    else:
        return np.nan

# Step 4: Apply estimation
assets['estimated_revenues'] = assets.apply(estimate_revenue, axis=1)
assets['estimated_revenues_calc_eur'] = assets['estimated_revenues'] * assets['currency_to_eur']


# EBITDA addition
assets['estimated_ebitda'] = assets['ebitda'].combine_first(assets['ebitda_with_ai_generated'])


len(assets[assets['estimated_revenues'].isnull()])/len(assets)


0.030574763453422162

### Adding Growth metrics

In [30]:
# Define the growth metrics and target period list
growth_metrics = [
    'revenue_growth', 'ebitda_growth', 'ebit_growth', 'fte_growth', 'gross_margin_growth'
]
period = ['oneYear', 'twoYears', 'threeYears', 'threeMonths', 'sixMonths']

# Helper function to extract growth value by period
def extract_growth_value(row, metric, p):
    period_array = row['growth_period_array']
    values_array = row[f'{metric}_array']
    if isinstance(period_array, list) and isinstance(values_array, list):
        try:
            index = period_array.index(p)
            val = values_array[index]
            return float(val) if val not in [None, "", "null"] else np.nan
        except ValueError:
            return np.nan
    return np.nan

# Apply safe_json_loads to all growth arrays including the period array
for metric in growth_metrics + ['growth_period']:
    assets[f'{metric}_array'] = assets[f'{metric}_array'].fillna('[]').apply(safe_json_loads)

# Create new columns for each metric and period
for metric in growth_metrics:
    for p in period:
        col_name = f"{metric}_{p.lower()}"
        assets[col_name] = assets.apply(lambda row: extract_growth_value(row, metric, p), axis=1)

### Creating bands

Note: this is EUR, as we go ahead would need more USD as well

**Revenue Bands**

In [31]:
def revenue_range(revenue):
    # First check revenue ranges
    if revenue < 50:
        return "1_small_lt_50m_eur"
    elif 50 <= revenue < 250:
        return "2_medium_50_250m_eur"
    elif 250 <= revenue < 1000:
        return "3_large_250_1000m_eur"
    elif revenue >= 1000:
        return "4_mega_large_gt_1bn_eur"
    else:
        return "5_unknown"

# Apply the function to the DataFrame
assets['revenue_range'] = assets['revenue_eur'].apply(revenue_range)

**EBITDA Bands**

In [32]:
def ebitda_range(ebitda):
    # First check revenue ranges
    if ebitda < 10:
        return "1_small_lt_10m_eur"
    elif 10 <= ebitda < 50:
        return "2_medium_10_50m_eur"
    elif 50 <= ebitda < 200:
        return "3_large_50_200m_eur"
    elif ebitda >= 200:
        return "4_mega_large_gt_200m_eur"
    else:
        return "5_unknown"

# Apply the function to the DataFrame
assets['ebitda_range'] = assets['ebitda_eur'].apply(ebitda_range)

**Add-on Bands**

In [33]:
assets['add_on_band'] = pd.cut(assets['add_on_deal_count_l5y'], bins = [-1,0,2,5,1000])


### Creating new other columns

In [34]:
assets["cnt"] = 1

In [35]:
assets['count_owners']=assets['owner_ids'].apply(lambda x: len(x))

In [36]:
def calculate_company_age(year_founded):
    if pd.isna(year_founded):
        return np.nan
    else:
        return current_year - year_founded

# Applying the function to the 'year_founded' column to create 'company_age' column
assets['company_age'] = assets['year_founded'].apply(calculate_company_age)

In [37]:
assets['ebitda_eur_pos'] = assets[assets['ebitda_eur']>0]['ebitda_eur']

### Creating filters assets datasets

In [38]:
assets_EU = assets[assets['region']=='Europe']
assets_EU_PE = assets_EU[assets_EU["ownership"].isin(["regular", "minority"])]

In [39]:
assets_NA = assets[assets["region"]=="North America"]
assets_NA_PE = assets_NA[assets_NA["ownership"].isin(["regular", "minority"])]

In [40]:
assets_EU_PE_majority = assets_EU_PE[assets_EU_PE['ownership']=='regular']
assets_EU_PE_ex_fin = assets_EU_PE[assets_EU_PE['sector'] != 'financial']

### Coverage Test

In [41]:
assets.groupby(['subsector'])[['revenue_chg_2023_2022','revenue_chg_2022_2021']].count().sort_values(by='revenue_chg_2023_2022')

Unnamed: 0_level_0,revenue_chg_2023_2022,revenue_chg_2022_2021
subsector,Unnamed: 1_level_1,Unnamed: 2_level_1
business,0,0
other,57,53
education,371,385
infrastructure,393,410
rawMaterials,665,681
agriculture,690,711
telecom,708,729
insurance,750,767
medtech,975,1016
biotechnology,1027,1063


In [42]:
#to_1D(assets['fte_array']).value_counts(dropna = False).sort_index() / len(assets['years_array'])

fte_columns = [col for col in assets.columns if col.startswith('fte_') and col[4:].isdigit()]
fte_fill_rates = assets[fte_columns].notnull().mean().sort_index()
fte_fill_rates


fte_2013   0.01
fte_2014   0.02
fte_2015   0.05
fte_2016   0.08
fte_2017   0.13
fte_2018   0.19
fte_2019   0.25
fte_2020   0.60
fte_2021   0.64
fte_2022   0.65
fte_2023   0.65
fte_2024   0.55
dtype: float64

In [43]:
cols = [c for c in assets if c.startswith('revenue_') and c[8:].isdigit()]
fill_rates_rev = assets[assets['ownership'] != 'listed'][cols].notna().mean().sort_index()
fill_rates_rev


revenue_2013   0.04
revenue_2014   0.06
revenue_2015   0.11
revenue_2016   0.16
revenue_2017   0.26
revenue_2018   0.36
revenue_2019   0.45
revenue_2020   0.49
revenue_2021   0.51
revenue_2022   0.51
revenue_2023   0.47
revenue_2024   0.23
dtype: float64

In [44]:
#to_1D(assets['ebitda_years']).value_counts(dropna = False).sort_index() / len(assets['ebitda_years'])

cols = [c for c in assets_EU_PE if c.startswith('ebitda_') and c[8:].isdigit()]
fill_rates_EU_PE_EBITDA = assets_EU_PE[assets_EU_PE['ownership'] != 'listed'][cols].notna().mean().sort_index()
fill_rates_EU_PE_EBITDA
#to_1D(assets_EU_PE[assets_EU_PE['ownership']!='listed']['revenue_years']).value_counts(dropna = False).sort_index() / len(assets_EU_PE['revenue_years'])

ebitda_2013   0.04
ebitda_2014   0.07
ebitda_2015   0.13
ebitda_2016   0.20
ebitda_2017   0.34
ebitda_2018   0.47
ebitda_2019   0.57
ebitda_2020   0.62
ebitda_2021   0.66
ebitda_2022   0.67
ebitda_2023   0.63
ebitda_2024   0.30
dtype: float64

In [45]:
#to_1D(assets['ebitda_pct_revenue_years']).value_counts(dropna = False).sort_index() / len(assets['ebitda_pct_revenue_years'])

cols = [c for c in assets if c.startswith('EBITDA_Margin_') and c[8:].isdigit()]
fill_rates_EBITDA_margin = assets[assets['ownership'] != 'listed'][cols].notna().mean().sort_index()
fill_rates_EBITDA_margin
#to_1D(assets_EU_PE[assets_EU_PE['ownership']!='listed']['revenue_years']).value_counts(dropna = False).sort_index() / len(assets_EU_PE['revenue_years'])

Series([], dtype: float64)

**EBITDA and Revenue Coverage**

**Not much AI generated revenue and EBITDA in the UK & Europe**

In [46]:
assets[assets['sub_region_2']=='UK']['revenue_is_ai_generated'].value_counts()

revenue_is_ai_generated
False    6931
True      168
Name: count, dtype: int64

In [47]:
assets[assets['sub_region_2']=='UK']['ebitda_is_ai_generated'].value_counts()

ebitda_is_ai_generated
False    6932
True      167
Name: count, dtype: int64

In [48]:
assets[assets['region']=='Europe']['revenue_is_ai_generated'].value_counts()

revenue_is_ai_generated
False    36538
True      1727
Name: count, dtype: int64

In [49]:
assets[assets['region']=='Europe']['ebitda_is_ai_generated'].value_counts()

ebitda_is_ai_generated
False    36545
True      1720
Name: count, dtype: int64

**Even though EBITDA and Revenue numbers are still missing in Europe**

In [50]:
assets[assets['region']=='Europe']['ebitda_eur'].isna().value_counts()

ebitda_eur
False    30744
True      7521
Name: count, dtype: int64

In [51]:
assets[assets['region']=='Europe']['revenue_eur'].isna().value_counts()

revenue_eur
False    33075
True      5190
Name: count, dtype: int64

## Investors Data Prep

**Create a alias investor id database**

Sometimes the investor id is missing even if the buyer name is present.. so you could use this mapping to lookup investor id and create a richer database. Also work with Basil to fix these.

In [52]:

df = investors[['investor_id', 'investor_name', 'aliases']].copy()

# Split by NaNs
alias_missing = df[df['aliases'].isna()].copy()
alias_present = df[df['aliases'].notna()].copy()

# Fix NaNs: use name as alias
alias_missing['aliases'] = alias_missing['investor_name']

# Convert stringified lists to real lists
alias_present['aliases'] = alias_present['aliases'].apply(ast.literal_eval)

# Append `name` to each list
alias_present['aliases'] = alias_present.apply(
    lambda row: row['aliases'] + [row['investor_name']], axis=1
)

# Explode the list
alias_present = alias_present.explode('aliases')

# Combine the two subsets
investors_alias = pd.concat([alias_missing, alias_present], ignore_index=True)

# Final cleanup and column order
investors_alias = investors_alias[['investor_id', 'aliases', 'investor_name']]
investors_alias


Unnamed: 0,investor_id,aliases,investor_name
0,2,5square,5square
1,3,AAC Capital,AAC Capital
2,5,Active Capital Company,Active Capital Company
3,6,Advent Life Sciences,Advent Life Sciences
4,7,Alpha Private Equity,Alpha Private Equity
...,...,...,...
23673,21419,浙能股权投资基金管理有限公司,Zhejiang Energy Fund (浙能基金)
23674,21419,浙江省能源集团,Zhejiang Energy Fund (浙能基金)
23675,21419,Zhejiang Energy Fund (浙能基金),Zhejiang Energy Fund (浙能基金)
23676,21441,Kotak Alternative Asset Managers,Kotak Alternate Asset Managers


In [53]:
investors_alias[investors_alias['investor_id']==8]

Unnamed: 0,investor_id,aliases,investor_name
11656,8,Apax Global Alpha,Apax Partners
11657,8,Apax Global Buyout,Apax Partners
11658,8,Apax Digital Growth,Apax Partners
11659,8,Apax Mid-Market Israel,Apax Partners
11660,8,Apax Global Impact,Apax Partners
11661,8,Apax Partners,Apax Partners


In [54]:
# Step 1: Build alias → investor_id mapping
alias_to_id = investors_alias.set_index('aliases')['investor_id'].to_dict()

In [55]:
# Step 1: Build investor_id → name mapping
alias_to_name = investors.set_index('investor_id')['investor_name'].to_dict()

In [56]:
investors.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15952 entries, 0 to 15951
Data columns (total 18 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   investor_id                       15952 non-null  int64  
 1   investor_name                     15952 non-null  object 
 2   investor_hq_city                  10992 non-null  object 
 3   investor_country_code             15186 non-null  object 
 4   asset_id                          15952 non-null  object 
 5   advisor_id                        15952 non-null  object 
 6   aliases                           4302 non-null   object 
 7   assets_total                      15952 non-null  int64  
 8   dry_powder_max_eur                1669 non-null   float64
 9   dry_powder_min_eur                1669 non-null   float64
 10  flagship_fund_id                  1380 non-null   float64
 11  fte                               6603 non-null   float64
 12  fund

### Mapping region

In [57]:

# Already solved in snowflake investors = investors.rename(columns={"operational_hq_country_code": "investor_country_code"})
# Already solved in snowflake investors = investors.rename(columns={"operational_hq_city": "investor_hq_city"})
investors = pd.merge(investors, country_names, left_on = 'investor_country_code', right_on = 'country_code', how='left')
investors.drop(columns=['country_code'], inplace=True)
investors = investors.rename(columns={"country_name": "investor_country_name"})
investors = investors.rename(columns={"region": "investor_region"})
investors = investors.rename(columns={"sub_region": "investor_sub_region"})

### Converting to List

In [58]:
investors['asset_id'].fillna('[]', inplace= True)  
investors['asset_id'] = investors['asset_id'].apply(eval) 

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  investors['asset_id'].fillna('[]', inplace= True)


### Other data prep

In [59]:
investors['funds_raised_last_five_years'] = (
    investors['funds_raised_last_five_years'].replace(0, np.nan))

In [60]:
investors['funds_raised_last_five_years_eur'] = (
    investors['funds_raised_last_five_years_eur']
    .replace(0, '-')
    .replace(np.nan, '-')
)

In [61]:
investors['count_assets'] = investors['asset_id'].apply(lambda x: len(x))

### Converting fundraising to USD

In [62]:
investors['funds_raised_last_five_years_usd'] = (
    pd.to_numeric(investors['funds_raised_last_five_years_eur'], errors='coerce') / USD_TO_EUR
)


In [63]:
investors['funds_raised_last_five_years_usd'].fillna("-",inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  investors['funds_raised_last_five_years_usd'].fillna("-",inplace = True)
  investors['funds_raised_last_five_years_usd'].fillna("-",inplace = True)


## Deals Data Prep

### Evaluate Arrays

**Filling NAN arrays with blanks so it is possible to then eval them**

In [64]:
columns_to_fill = [
    'buyer_linked_ids',
    'buyer_names',
    'buyer_leading_parties',
    'buyer_share_values',
    'buyer_share_pcts',
    'buyer_types',

    'seller_linked_ids',
    'seller_names',
    'seller_leading_parties',
    'seller_share_values',
    'seller_share_pcts',
    'seller_types'
]

for column in columns_to_fill:
    deals[column] = deals[column].fillna('[]')

**Preserving str versions of arrays for analysis before convering them to a list**

In [65]:
#Preserving the str names to be able to query later

deals['buyer_names_str'] = deals['buyer_names']
deals['seller_names_str'] = deals['seller_names']

deals['buyer_types_str'] = deals['buyer_types']
deals['seller_types_str'] = deals['seller_types']

deals['buyer_share_values_str'] = deals['buyer_share_values']
deals['seller_share_values_str'] = deals['seller_share_values']


**Converting to list types**

In [66]:
columns_to_eval = {
    'buyer_linked_ids': 'buyer_linked_ids',
    'buyer_names': 'buyer_names',
    'buyer_leading_parties': 'buyer_leading_parties',
    'buyer_share_values': 'buyer_share_values',
    'buyer_share_pcts': 'buyer_share_pcts',
    'buyer_types':'buyer_types',

    'seller_linked_ids': 'seller_linked_ids',
    'seller_names': 'seller_names',
    'seller_leading_parties': 'seller_leading_parties',
    'seller_share_values': 'seller_share_values',
    'seller_share_pcts': 'seller_share_pcts',
    'seller_types':'seller_types'
}

for input_column, output_column in columns_to_eval.items():
    deals[output_column] = deals[input_column].apply(eval)

### Creating announcement date column


**Creating the announcement date column**


There are some missing months

In [67]:
deals['announcement_date_month'].value_counts(dropna = False).sort_index()

announcement_date_month
1.00     43611
2.00     31339
3.00     35503
4.00     34097
5.00     33379
6.00     36915
7.00     35661
8.00     29328
9.00     33120
10.00    34240
11.00    32247
12.00    35789
NaN      10177
Name: count, dtype: int64

Also feel the automated deals are randamoly skewed towards january

In [68]:
deals[['type','announcement_date_month']].value_counts(dropna = False).sort_index()

type       announcement_date_month
automated  1.00                       25484
           2.00                       16650
           3.00                       18967
           4.00                       18411
           5.00                       17689
           6.00                       18716
           7.00                       17472
           8.00                       16133
           9.00                       17662
           10.00                      18072
           11.00                      16892
           12.00                      18115
           NaN                          553
curated    1.00                       18127
           2.00                       14689
           3.00                       16536
           4.00                       15686
           5.00                       15690
           6.00                       18199
           7.00                       18189
           8.00                       13195
           9.00                       154

In [69]:
# Ensure columns are nullable integers
deals['announcement_date_year'] = deals['announcement_date_year'].astype('Int64')
deals['announcement_date_month'] = deals['announcement_date_month'].astype('Int64')

In [70]:
deals.loc[deals['announcement_date_month'] == 0, 'announcement_date_month'] = pd.NA

In [71]:
#assigning random numbers to missing month based on the month frequency of only curated deals

month_distribution = deals[deals['type']=='curated']['announcement_date_month'].value_counts(normalize=True)

# Function to randomly assign month based on the distribution
def assign_random_month():
    return np.random.choice(month_distribution.index, p=month_distribution.values)

# Assign random month to missing 'announcement_date_month' based on the distribution
deals['announcement_date_month'] = deals['announcement_date_month'].apply(
    lambda x: x if pd.notna(x) else assign_random_month()
).astype('Int64')

In [72]:
deals['announcement_date_month'].value_counts(dropna = False)

announcement_date_month
1     44534
6     37883
12    36702
7     36591
3     36397
10    35111
4     34917
5     34217
9     33907
11    33040
2     32076
8     30031
Name: count, dtype: Int64

In [73]:
deals['announcement_date'] = pd.NA

deals['announcement_date'] = (
    deals['announcement_date_year'].astype(str) + '-' +
    deals['announcement_date_month'].astype(str).str.zfill(2)
)

### Adding quarters to deals

In [74]:
deals = deals[deals['announcement_date_year'].notna()]

In [75]:
deals['announcement_date_quarter'] = pd.to_datetime(
    deals['announcement_date_year'].astype(int).astype(str) + '-' + 
    deals['announcement_date_month'].astype('Int64').astype(str), 
    format='%Y-%m', errors='coerce'
).dt.to_period('Q')

### Changing publication date

In [76]:
deals['publication_date'] = pd.to_datetime(deals['publication_date'], utc=True)

In [77]:
deals['publication_date_year_month'] = deals['publication_date'].dt.strftime('%Y-%m')

### Mapping regions and sectors

**Adding region info**

In [78]:
#we rename "region" column to "country_code" so it does not interfere with "region" column in "country_names" dataset
deals = deals.rename(columns={"region":"country_code"})

#then we merge "country_names" dataset with "deals" dataset so we have regions (US, Europe) colum appended
deals = pd.merge(deals, country_names, left_on = 'country_code', right_on = 'country_code', how="left")

<span style="color:orange"> **REQUEST ENGINEERING: US STATE INFO NOT PRESENT IN DEALS** </span>


In [79]:
#Note this was changed from headquarters_region_map before
#deals['us_sub_region_map'] = assets['headquarters_region'].apply(map_region)

**Fill missing sectors and lookup**

We are overwriting sectors based on subsectors. There were some mismatches in sectors and subsectors and also a few were nan.

In [80]:
# Load sector mapping
sector_map = pd.read_csv('sector_mapping.csv')

# Clean whitespace
deals['subsector'] = deals['subsector'].astype(str).str.strip()
sector_map['subsector'] = sector_map['subsector'].astype(str).str.strip()
sector_map['sector'] = sector_map['sector'].astype(str).str.strip()

# Create mapping dictionary: subsector → sector
subsector_to_sector = dict(zip(sector_map['subsector'], sector_map['sector']))

# Overwrite all sector values based on mapped subsector
deals['sector'] = deals['subsector'].map(subsector_to_sector)

In [81]:
deals[['sector','subsector']].value_counts(dropna = False).sort_index()

sector           subsector           
consumer         consumerGoods            11196
                 food                     16694
                 leisure                   8756
                 retail                   10141
financial        assetManagement           6151
                 banking                  10916
                 insurance                 7544
industrials      agriculture               3370
                 automotive                6463
                 construction              7123
                 manufacturing            33990
materialsEnergy  chemicals                 3536
                 energy                   13144
                 rawMaterials              2491
other            infrastructure            2116
                 other                      835
scienceHealth    biotechnology            14845
                 healthcareServices       16123
                 medtech                  12193
                 pharmaceuticals           6184
se

### Creating seperate asset and advisor id datasets 

<span style="color:orange"> **LATER SID TO CHECK: WHETHER WE NEED TO SEPERATE ASSET AND INVESTOR IDS BUYER AND SELLER IDS** </span>

**Why? Because seperate linked id's are not present in Snowflake**


In [82]:
import ast

def extract_ids(row, id_col: str, type_col: str, match_type: str):
    # Get IDs — use only if it's a list
    ids = row[id_col] if isinstance(row[id_col], list) else []

    # Get types — handle NaN, float, str, etc.
    raw_types = row[type_col]
    if isinstance(raw_types, list):
        types = raw_types
    elif isinstance(raw_types, str):
        try:
            types = ast.literal_eval(raw_types)
            if not isinstance(types, list):
                types = []
        except:
            types = []
    else:
        types = []

    # Extract matching IDs
    return [
        id_.strip()
        for t, id_ in zip(types, ids)
        if isinstance(t, str) and t.strip().lower() == match_type
           and isinstance(id_, str) and id_.strip()
    ] or pd.NA

In [83]:
# Buyer investor IDs
deals['buyer_investor_ids'] = deals.apply(
    lambda row: extract_ids(row, 'buyer_linked_ids', 'buyer_types', 'investor'), axis=1
)

# Buyer asset IDs
deals['buyer_asset_ids'] = deals.apply(
    lambda row: extract_ids(row, 'buyer_linked_ids', 'buyer_types', 'asset'), axis=1
)

# Seller investor IDs
deals['seller_investor_ids'] = deals.apply(
    lambda row: extract_ids(row, 'seller_linked_ids', 'seller_types', 'investor'), axis=1
)

# Seller asset IDs
deals['seller_asset_ids'] = deals.apply(
    lambda row: extract_ids(row, 'seller_linked_ids', 'seller_types', 'asset'), axis=1
)

**We are extracting buyer/seller names based on types**

Not sure if we need this in the end


In [84]:
# Generic extraction function
def extract_names(row, name_col: str, type_col: str, match_type: str):
    # Extract names
    names = row[name_col] if isinstance(row[name_col], list) else []

    # Extract types
    raw_types = row[type_col]
    if isinstance(raw_types, list):
        types = raw_types
    elif isinstance(raw_types, str):
        try:
            types = ast.literal_eval(raw_types)
            if not isinstance(types, list):
                types = []
        except:
            types = []
    else:
        types = []

    # Extract matching names
    return [
        n.strip()
        for t, n in zip(types, names)
        if isinstance(t, str) and t.strip().lower() == match_type
           and isinstance(n, str) and n.strip()
    ] or pd.NA

# Apply to full `deals` DataFrame

# Extract seller investor names
deals['seller_investor_names'] = deals.apply(
    lambda row: extract_names(row, 'seller_names', 'seller_types', 'investor'), axis=1
)

# Extract seller asset names
deals['seller_asset_names'] = deals.apply(
    lambda row: extract_names(row, 'seller_names', 'seller_types', 'asset'), axis=1
)

# Extract buyer investor names
deals['buyer_investor_names'] = deals.apply(
    lambda row: extract_names(row, 'buyer_names', 'buyer_types', 'investor'), axis=1
)

# Extract buyer asset names
deals['buyer_asset_names'] = deals.apply(
    lambda row: extract_names(row, 'buyer_names', 'buyer_types', 'asset'), axis=1
)


### Further processing

<span style="color:orange"> **LATER SID TO CHECK: IF WE NEED COMBINES REASONS** </span>


In [85]:

def combine_reason_lists(row):
    # Parse stringified lists safely
    try:
        buyer = ast.literal_eval(row['buyer_reasons']) if isinstance(row['buyer_reasons'], str) else []
    except:
        buyer = []

    try:
        seller = ast.literal_eval(row['seller_reasons']) if isinstance(row['seller_reasons'], str) else []
    except:
        seller = []

    # Combine and filter out empty strings or non-string values
    combined = [r for r in buyer + seller if isinstance(r, str) and r.strip()]
    
    # Return stringified list (like: '["foo", "bar"]') or pd.NA
    return str(combined) if combined else pd.NA

# Apply to dataframe
deals['combined_reasons'] = deals.apply(combine_reason_lists, axis=1)

In [86]:
deals['combined_reasons'].value_counts().head(20)

combined_reasons
['vcRound']                                                                                                                  58227
['platform']                                                                                                                 27434
['vcRound', 'vcRound']                                                                                                       25318
['divestiture']                                                                                                              19355
['vcRound', 'vcRound', 'vcRound']                                                                                            18501
['vcRound', 'vcRound', 'vcRound', 'vcRound']                                                                                 13724
['vcRound', 'vcRound', 'vcRound', 'vcRound', 'vcRound']                                                                       9838
['vcRound', 'vcRound', 'vcRound', 'vcRound', 'vcRound', 'vcRound']

**Creating the majority buyer and seller column**

If there are two it takes the first one that is listed.

**Extracting info for majority deal buyer and seller**

There are some edge cases where there are 2 majority buyers in that case you take the first listed



In [87]:


df_summary = deals[['deal_id','buyer_linked_ids','buyer_names','buyer_share_values','buyer_types','buyer_share_pcts']]
df_summary = df_summary.explode(['buyer_linked_ids','buyer_names','buyer_share_values','buyer_types','buyer_share_pcts'])

df_summary = df_summary[df_summary['buyer_share_values'].isin(['majority','sharedMajority'])]
df_summary = df_summary.drop_duplicates(subset='deal_id', keep='first')

df_summary = df_summary.rename(columns={
    'buyer_linked_ids': 'majority_buyer_linked_id',
    'buyer_names': 'majority_buyer_name',
    'buyer_share_values': 'majority_buyer_share',
    'buyer_types': 'majority_buyer_type',
    'buyer_share_pcts': 'majority_buyer_share_pct'
})

deals = pd.merge (deals, df_summary, on ='deal_id', how = 'left')

deals['majority_buyer_linked_id'] = pd.to_numeric(deals['majority_buyer_linked_id'], errors='coerce').astype('Int64') 




<span style="color:orange"> **THERE IS A BIG ERROR HERE AS ALL LINKED ID's ARE NOT INVESTORS THEY ARE ASSETS AND OTHER TYPES AS WELL** </span>

The code is commented as a result for now


In [88]:
'''
df_summary = investors[['investor_id','investor_country_name','investor_sub_region','investor_region']].copy()

df_summary.rename(columns={
    "investor_id":'majority_buyer_linked_id',
    "investor_region": "majority_buyer_region",
    "investor_country_name": "majority_buyer_country_name",
    "investor_sub_region": "majority_buyer_sub_region"
}, inplace=True)

deals = pd.merge (deals, df_summary, on ='majority_buyer_linked_id', how = 'left')

'''

'\ndf_summary = investors[[\'investor_id\',\'investor_country_name\',\'investor_sub_region\',\'investor_region\']].copy()\n\ndf_summary.rename(columns={\n    "investor_id":\'majority_buyer_linked_id\',\n    "investor_region": "majority_buyer_region",\n    "investor_country_name": "majority_buyer_country_name",\n    "investor_sub_region": "majority_buyer_sub_region"\n}, inplace=True)\n\ndeals = pd.merge (deals, df_summary, on =\'majority_buyer_linked_id\', how = \'left\')\n\n'

In [89]:

df_summary = deals[['deal_id','seller_linked_ids','seller_names','seller_share_values','seller_types','seller_share_pcts']]

df_summary = df_summary.explode(['seller_linked_ids','seller_names','seller_share_values','seller_types','seller_share_pcts'])

df_summary = df_summary[df_summary['seller_share_values'].isin(['majority','sharedMajority'])]

df_summary = df_summary.drop_duplicates(subset='deal_id', keep='first')

df_summary = df_summary.rename(columns={
    'seller_linked_ids': 'majority_seller_linked_id',
    'seller_names': 'majority_seller_name',
    'seller_share_values': 'majority_seller_share',
    'seller_types': 'majority_seller_type',
    'seller_share_pcts': 'majority_seller_share_pct'
})

deals = pd.merge(deals, df_summary, on='deal_id', how='left')

deals['majority_seller_linked_id'] = pd.to_numeric(deals['majority_seller_linked_id'], errors='coerce').astype('Int64') 



<span style="color:orange"> **THERE IS A BIG ERROR HERE AS ALL LINKED ID's ARE NOT INVESTORS THEY ARE ASSETS AND OTHER TYPES AS WELL** </span>

Commenting out the mapping region part for now



In [90]:
'''df_summary = investors[['investor_id','investor_country_name','investor_sub_region','investor_region']].copy()

df_summary.rename(columns={
    "investor_id":'majority_seller_linked_id',
    "investor_region": "majority_seller_region",
    "investor_country_name": "majority_seller_country_name",
    "investor_sub_region": "majority_seller_sub_region"
}, inplace=True)

deals = pd.merge (deals, df_summary, on ='majority_seller_linked_id', how = 'left')
'''

'df_summary = investors[[\'investor_id\',\'investor_country_name\',\'investor_sub_region\',\'investor_region\']].copy()\n\ndf_summary.rename(columns={\n    "investor_id":\'majority_seller_linked_id\',\n    "investor_region": "majority_seller_region",\n    "investor_country_name": "majority_seller_country_name",\n    "investor_sub_region": "majority_seller_sub_region"\n}, inplace=True)\n\ndeals = pd.merge (deals, df_summary, on =\'majority_seller_linked_id\', how = \'left\')\n'

### Filtering the deals table

**Removing aborted and live deals more than 24 months ago from the dataset**

In [91]:
deals = deals[(deals['deal_status'] != 'aborted')]

deals = deals[~((deals['deal_status'] == 'live')&
             (deals['announcement_date']<this_mth_minus_24))]

**Creating a seperate database for automated + curated deals dataset and subsetting final deals dataset to curated only**

In [92]:
deals_all_inc_automated = deals.copy()

In [93]:
deals = deals[deals['type']=='curated']

## Merging Deals and Assets Dataset

In [94]:
#drop these columns to keep ebitda and revenue range at the time of deal intact rather than look up the latest

assets_merge = assets.drop(columns=['country_code','sector','subsector','currency','fte',
                                    'fte_year','revenue','revenue_eur','revenue_year','ebitda',
                                    'ebitda_eur','ebitda_year','ebit','ebit_eur','ebit_year','total_assets','total_assets_eur',
                                    'total_assets_year','country_name','region','sub_region', 'fte', 'fte_year','ebitda_range','revenue_range'])

In [95]:
deals = pd.merge(deals, assets_merge, left_on = 'linked_asset_id', right_on = 'asset_id', how="left")

In [96]:
deals['ev_ebitda_multiple'].notna().sum()

np.int64(6080)

In [97]:
deals['ev_revenue_multiple'].notna().sum()

np.int64(10706)

### Adding missing financials and multiples to deals

<span style="color:orange"> **LATER SID TO CHECK: DOES THIS RESULT IN A LOT OF UPLIFT? ALSO DO FOR AUTOMATED ASSETS** </span>


**Used Linked Asset id to fill sectors for deals if not present**

In [98]:
# Step 2: Fill remaining blanks from assets DataFrame based on linked_asset_id
# (Assumes 'assets' has columns: 'id', 'sector', 'subsector')
assets_lookup = assets[['asset_id', 'sector', 'subsector']].rename(columns={'asset_id': 'linked_asset_id'})
deals = deals.merge(assets_lookup, on='linked_asset_id', how='left', suffixes=('', '_from_assets'))

# Fill missing sector/subsector from assets
deals['sector'] = deals['sector'].fillna(deals['sector_from_assets'])
deals['subsector'] = deals['subsector'].fillna(deals['subsector_from_assets'])

# Drop helper columns
deals.drop(columns=['sector_from_assets', 'subsector_from_assets'], inplace=True)

In [99]:
deals['ev_revenue_multiple'].notna().sum()

np.int64(10706)

In [100]:
deals['ev_eur'].notna().sum()

np.int64(22752)

<span style="color:blue"> **SID TO DO: LOOKUP DEALS FROM AUTOMATED ASSETS NOT JUST CURATED ASSETS** </span>

Issue: that where the asset is automated it's not there; think about whether revenue should be announcment data year or one year before

<span style="color:orange"> **THINK WE DON'T NEED THIS NOW AS IT'S COVERED IN MIKO's CODE** </span>

Opportunity to use automated profiles data here too which you could lookup


In [101]:
'''

#Revenue at time of deal
# --- Step 1: Explode and convert revenue to EUR ---
assets_dealstest = (
    assets
    .explode(['years_array', 'revenue_array'])
    .rename(columns={
        'years_array': 'year_for_deals',
        'revenue_array': 'revenue_for_deals',
        'name': 'asset_name'
    })[
        ['asset_id', 'asset_name', 'year_for_deals', 'revenue_for_deals',
         'revenue_eur', 'currency', 'currency_to_eur']
    ]
)

# Clean and convert types
assets_dealstest['year_for_deals'] = pd.to_numeric(assets_dealstest['year_for_deals'], errors='coerce').astype('Int64')
assets_dealstest['revenue_for_deals'] = pd.to_numeric(assets_dealstest['revenue_for_deals'], errors='coerce')

# Convert to EUR if needed
assets_dealstest['revenue_at_that_year'] = np.where(
    (assets_dealstest['currency'] == 'EUR') | assets_dealstest['currency'].isna(),
    assets_dealstest['revenue_for_deals'],
    assets_dealstest['revenue_for_deals'] * assets_dealstest['currency_to_eur'].fillna(1)
)

# Keep only relevant columns for merge
assets_dealstest = assets_dealstest[['asset_id', 'asset_name', 'year_for_deals', 'revenue_at_that_year']]

# --- Step 2: Prepare deals_temp and calculate revenue_at_deal_year ---
deals_temp = deals[['deal_id', 'linked_asset_id', 'announcement_date_year', 'asset',
                    'revenue_eur', 'ev', 'ev_revenue_multiple',
                    'buyer_reasons', 'seller_reasons']].copy()

# Compute revenue_at_deal_year directly
deals_temp['revenue_at_deal_year'] = np.where(
    deals_temp['revenue_eur'].notna(),
    deals_temp['revenue_eur'],
    np.where(
        deals_temp['ev'].notna() &
        deals_temp['ev_revenue_multiple'].notna() &
        (deals_temp['ev_revenue_multiple'] != 0),
        deals_temp['ev'] / deals_temp['ev_revenue_multiple'],
        np.nan
    )
)

# --- Step 3: Merge historical revenue and backfill missing revenue_at_deal_year ---
merged_revenue = deals_temp.merge(
    assets_dealstest,
    how='left',
    left_on=['linked_asset_id', 'announcement_date_year'],
    right_on=['asset_id', 'year_for_deals']
)

merged_revenue['revenue_at_deal_year'] = np.where(
    merged_revenue['revenue_at_deal_year'].isna(),
    merged_revenue['revenue_at_that_year'],
    merged_revenue['revenue_at_deal_year']
)

# --- Step 4: Finalize deals_temp ---
deals_temp = merged_revenue.drop(columns=['asset_id', 'year_for_deals', 'revenue_at_that_year'])

deals = deals.merge(
    deals_temp[['deal_id', 'revenue_at_deal_year']],
    how='left',
    on='deal_id'
)
# --- Step 5: Continue with your workflow ---
replicator(deals)

'''

"\n\n#Revenue at time of deal\n# --- Step 1: Explode and convert revenue to EUR ---\nassets_dealstest = (\n    assets\n    .explode(['years_array', 'revenue_array'])\n    .rename(columns={\n        'years_array': 'year_for_deals',\n        'revenue_array': 'revenue_for_deals',\n        'name': 'asset_name'\n    })[\n        ['asset_id', 'asset_name', 'year_for_deals', 'revenue_for_deals',\n         'revenue_eur', 'currency', 'currency_to_eur']\n    ]\n)\n\n# Clean and convert types\nassets_dealstest['year_for_deals'] = pd.to_numeric(assets_dealstest['year_for_deals'], errors='coerce').astype('Int64')\nassets_dealstest['revenue_for_deals'] = pd.to_numeric(assets_dealstest['revenue_for_deals'], errors='coerce')\n\n# Convert to EUR if needed\nassets_dealstest['revenue_at_that_year'] = np.where(\n    (assets_dealstest['currency'] == 'EUR') | assets_dealstest['currency'].isna(),\n    assets_dealstest['revenue_for_deals'],\n    assets_dealstest['revenue_for_deals'] * assets_dealstest['curren

<span style="color:blue"> **SID TO DO: CHECK THIS CODE BY JAGADEESH** </span>


In [102]:
#EBITDA at time of deal
'''

# --- Step 1: Explode and convert EBITDA to EUR ---
assets_ebitdatest = (
    assets
    .explode(['years_array', 'ebitda_array'])
    .rename(columns={
        'years_array': 'year_for_deals',
        'ebitda_array': 'ebitda_for_deals',
        'name': 'asset_name'
    })[
        ['asset_id', 'asset_name', 'year_for_deals', 'ebitda_for_deals',
         'currency', 'currency_to_eur']
    ]
)

# Clean and convert types
assets_ebitdatest['year_for_deals'] = pd.to_numeric(assets_ebitdatest['year_for_deals'], errors='coerce').astype('Int64')
assets_ebitdatest['ebitda_for_deals'] = pd.to_numeric(assets_ebitdatest['ebitda_for_deals'], errors='coerce')

# Convert to EUR if needed
assets_ebitdatest['ebitda_at_that_year'] = np.where(
    (assets_ebitdatest['currency'] == 'EUR') | assets_ebitdatest['currency'].isna(),
    assets_ebitdatest['ebitda_for_deals'],
    assets_ebitdatest['ebitda_for_deals'] * assets_ebitdatest['currency_to_eur'].fillna(1)
)

# Keep only relevant columns for merge
assets_ebitdatest = assets_ebitdatest[['asset_id', 'asset_name', 'year_for_deals', 'ebitda_at_that_year']]

# --- Step 2: Prepare deals_temp and calculate ebitda_at_deal_year ---
deals_temp_ebitda = deals[['deal_id', 'linked_asset_id', 'announcement_date_year', 'asset',
                           'ebitda_eur', 'ev', 'ev_ebitda_multiple']].copy()

# Compute ebitda_at_deal_year directly
deals_temp_ebitda['ebitda_at_deal_year'] = np.where(
    deals_temp_ebitda['ebitda_eur'].notna(),
    deals_temp_ebitda['ebitda_eur'],
    np.where(
        deals_temp_ebitda['ev'].notna() &
        deals_temp_ebitda['ev_ebitda_multiple'].notna() &
        (deals_temp_ebitda['ev_ebitda_multiple'] != 0),
        deals_temp_ebitda['ev'] / deals_temp_ebitda['ev_ebitda_multiple'],
        np.nan
    )
)

# --- Step 3: Merge historical ebitda and backfill missing ebitda_at_deal_year ---
merged_ebitda = deals_temp_ebitda.merge(
    assets_ebitdatest,
    how='left',
    left_on=['linked_asset_id', 'announcement_date_year'],
    right_on=['asset_id', 'year_for_deals']
)

merged_ebitda['ebitda_at_deal_year'] = np.where(
    merged_ebitda['ebitda_at_deal_year'].isna(),
    merged_ebitda['ebitda_at_that_year'],
    merged_ebitda['ebitda_at_deal_year']
)

# --- Step 4: Finalize and merge into deals ---
deals_temp_ebitda = merged_ebitda.drop(columns=['asset_id', 'year_for_deals', 'ebitda_at_that_year'])



deals = deals.merge(
    deals_temp_ebitda[['deal_id', 'ebitda_at_deal_year']],
    how='left',
    on='deal_id'
)

# --- Step 5: Continue with your workflow ---
replicator(deals)

'''

"\n\n# --- Step 1: Explode and convert EBITDA to EUR ---\nassets_ebitdatest = (\n    assets\n    .explode(['years_array', 'ebitda_array'])\n    .rename(columns={\n        'years_array': 'year_for_deals',\n        'ebitda_array': 'ebitda_for_deals',\n        'name': 'asset_name'\n    })[\n        ['asset_id', 'asset_name', 'year_for_deals', 'ebitda_for_deals',\n         'currency', 'currency_to_eur']\n    ]\n)\n\n# Clean and convert types\nassets_ebitdatest['year_for_deals'] = pd.to_numeric(assets_ebitdatest['year_for_deals'], errors='coerce').astype('Int64')\nassets_ebitdatest['ebitda_for_deals'] = pd.to_numeric(assets_ebitdatest['ebitda_for_deals'], errors='coerce')\n\n# Convert to EUR if needed\nassets_ebitdatest['ebitda_at_that_year'] = np.where(\n    (assets_ebitdatest['currency'] == 'EUR') | assets_ebitdatest['currency'].isna(),\n    assets_ebitdatest['ebitda_for_deals'],\n    assets_ebitdatest['ebitda_for_deals'] * assets_ebitdatest['currency_to_eur'].fillna(1)\n)\n\n# Keep only 

<span style="color:blue"> **SID TO DO: CHECK THIS MULTIPLES CODE** </span>


In [103]:
deals['ebitda_eur'].describe()


count    14341.00
mean       109.30
std       3404.30
min     -66533.75
25%          1.34
50%          7.20
75%         36.23
max     360414.06
Name: ebitda_eur, dtype: float64

In [104]:
deals['ev_ebitda_multiple'].describe()

count    6080.00
mean       26.81
std       256.61
min      -177.01
25%         7.50
50%        10.87
75%        16.25
max     15375.00
Name: ev_ebitda_multiple, dtype: float64

In [105]:
# Here we are filling EV values if EBITDA and EV/EBITDA or Revenue and EV/Revenue numbers are available
mask_get_ev_from_rev = (
    deals['ev_eur'].isna() &
    (deals['revenue_eur'] > 0) &
    (deals['ev_revenue_multiple'] > 0)
)
deals.loc[mask_get_ev_from_rev, 'ev_eur'] = deals.loc[mask_get_ev_from_rev, 'revenue_eur'] * deals.loc[mask_get_ev_from_rev, 'ev_revenue_multiple']

mask_get_ev_from_ebitda = (
    deals['ev_eur'].isna() &
    (deals['ebitda_eur'] > 0) &
    (deals['ev_ebitda_multiple'] > 0)
)
deals.loc[mask_get_ev_from_ebitda, 'ev_eur'] = deals.loc[mask_get_ev_from_ebitda, 'ebitda_eur'] * deals.loc[mask_get_ev_from_ebitda, 'ev_ebitda_multiple']

In [106]:
# Fill revenue if EV and EV/Revenue multiple are known
mask_get_rev_from_ev = (
    deals['revenue_eur'].isna() &
    deals['ev_eur'].notna() & (deals['ev_eur'] > 0) &
    deals['ev_revenue_multiple'].notna() & (deals['ev_revenue_multiple'] > 0)
)
deals.loc[mask_get_rev_from_ev, 'revenue_eur'] = (
    deals.loc[mask_get_rev_from_ev, 'ev_eur'] / deals.loc[mask_get_rev_from_ev, 'ev_revenue_multiple']
)

# Fill EBITDA if EV and EV/EBITDA multiple are known
mask_get_ebitda_from_ev = (
    deals['ebitda_eur'].isna() &
    deals['ev_eur'].notna() & (deals['ev_eur'] > 0) &
    deals['ev_ebitda_multiple'].notna() & (deals['ev_ebitda_multiple'] > 0)
)
deals.loc[mask_get_ebitda_from_ev, 'ebitda_eur'] = (
    deals.loc[mask_get_ebitda_from_ev, 'ev_eur'] / deals.loc[mask_get_ebitda_from_ev, 'ev_ebitda_multiple']
)


In [107]:
# Get multiples from EV and Revenue and EBITDA

mask_mult_rev = (
    deals['ev_revenue_multiple'].isna() &
    (deals['ev_eur'] > 0) &
    (deals['revenue_eur'] > 0)
)
deals.loc[mask_mult_rev, 'ev_revenue_multiple'] = deals.loc[mask_mult_rev, 'ev_eur'] / deals.loc[mask_mult_rev, 'revenue_eur']

mask_mult_ebitda = (
    deals['ev_ebitda_multiple'].isna() &
    (deals['ev_eur'] > 0) &
    (deals['ebitda_eur'] > 0)
)
deals.loc[mask_mult_ebitda, 'ev_ebitda_multiple'] = deals.loc[mask_mult_ebitda, 'ev_eur'] / deals.loc[mask_mult_ebitda, 'ebitda_eur']

**Basically in this code, we're looking up Revenue, EBITDA and other metrics from the assets table to fill missing values**

In [108]:
#Here we are generating revenue, ebitda and other metrics for the year of the deal, 1 year before, 1 year after etc.

for metric in ['revenue', 'ebitda', 'fte', 'net_debt']:
    for offset in [-2, -1, 0, 1]:
        colname = f'{metric}_{offset:+d}'
        year_str = (deals['announcement_date_year'] + offset).astype(str)
        source_cols = metric + '_' + year_str

        deals[colname] = [
            deals[col].iloc[i] if col in deals.columns else np.nan
            for i, col in enumerate(source_cols)
        ]

In [109]:
deals['ebitda_eur'].describe()


count    15508.00
mean       118.02
std       3327.30
min     -66533.75
25%          1.50
50%          7.92
75%         38.80
max     360414.06
Name: ebitda_eur, dtype: float64

In [110]:
deals['ev_ebitda_multiple'].describe()

count    6300.00
mean       27.63
std       256.22
min      -177.01
25%         7.50
50%        10.86
75%        16.30
max     15375.00
Name: ev_ebitda_multiple, dtype: float64

In [111]:
#The approach we are taking here is that if the deal was announced in Q4 then we are taking the metric for that year, if it was announceed before Q4 we are taking the metric for previous year.

def fill_metric_and_year(deals, metric, metric_col, year_col):

    # Preallocate output Series
    filled_values = deals[metric_col].copy()
    filled_years = pd.Series(np.nan, index=deals.index)

    # FOR Q4 we are taking current year metrics, followed by previous year and then following year
    # FOR Q2 and Q3 we are taking previous year metrics, followed by current year then precedent year, and then next year
    # FOR Q1 we are taking current year metrics, followed by previous year and then precedent year
    
    qtr_hi = [(0, f'{metric}_+0'), (-1, f'{metric}_-1'), (+1, f'{metric}_+1')]
    qtr_lo = [(-1, f'{metric}_-1'), (0, f'{metric}_+0'), (-2, f'{metric}_-2'), (+1, f'{metric}_+1')]
    qtr_mid = [(-1, f'{metric}_-1'), (0, f'{metric}_+0'), (+1, f'{metric}_+1')]
 

    month = deals['announcement_date_month']
    base_year = deals['announcement_date_year']

    hi_mask = (month > 9) & filled_values.isna()
    lo_mask = (month < 4) & filled_values.isna()
    mid_mask = filled_values.isna() & ~(hi_mask | lo_mask)

    def fill_metrics(mask, options):
        for offset, metric_year in options:
            valid_mask = mask & deals[metric_year].notna() & filled_values.isna()
            filled_values.loc[valid_mask] = deals.loc[valid_mask, metric_year]
            filled_years.loc[valid_mask] = base_year.loc[valid_mask] + offset

    fill_metrics(hi_mask, qtr_hi)
    fill_metrics(lo_mask, qtr_lo)
    fill_metrics(mid_mask, qtr_mid)

    deals[metric_col] = filled_values
    deals[year_col] = filled_years


In [112]:
# Fill values and years in one go — fast and clean
fill_metric_and_year(deals, 'revenue', 'revenue_eur', 'revenue_year')
fill_metric_and_year(deals, 'ebitda', 'ebitda_eur', 'ebitda_year')
fill_metric_and_year(deals, 'fte', 'fte', 'fte_year')



# Skipping the net debt reassigment as it's a balance sheet metric
# deals['net_debt_year'] = np.nan
#fill_metric_and_year(deals, 'net_debt', 'net_debt_eur', 'net_debt_year')


In [113]:
deals['ebitda_eur'].describe()


count    31270.00
mean        78.73
std       2420.04
min     -66533.75
25%         -0.21
50%          3.70
75%         18.26
max     360414.06
Name: ebitda_eur, dtype: float64

In [114]:
deals['ev_ebitda_multiple'].describe()


count    6300.00
mean       27.63
std       256.22
min      -177.01
25%         7.50
50%        10.86
75%        16.30
max     15375.00
Name: ev_ebitda_multiple, dtype: float64

In [115]:
# ----------------------------------------
# STEP 7: Re-impute EV using now available metrics
# ----------------------------------------
mask_ev_from_rev_2 = (
    deals['ev_eur'].isna() &
    (deals['revenue_eur'] > 0) &
    (deals['ev_revenue_multiple'] > 0)
)
deals.loc[mask_ev_from_rev_2, 'ev_eur'] = deals.loc[mask_ev_from_rev_2, 'revenue_eur'] * deals.loc[mask_ev_from_rev_2, 'ev_revenue_multiple']

mask_ev_from_ebitda_2 = (
    deals['ev_eur'].isna() &
    (deals['ebitda_eur'] > 0) &
    (deals['ev_ebitda_multiple'] > 0)
)
deals.loc[mask_ev_from_ebitda_2, 'ev_eur'] = deals.loc[mask_ev_from_ebitda_2, 'ebitda_eur'] * deals.loc[mask_ev_from_ebitda_2, 'ev_ebitda_multiple']


In [116]:
# ----------------------------------------
# STEP 8: Re-impute multiples using updated metrics
# ----------------------------------------
mask_mult_rev_2 = (
    deals['ev_revenue_multiple'].isna() &
    (deals['ev_eur'] > 0) &
    (deals['revenue_eur'] > 0)
)
deals.loc[mask_mult_rev_2, 'ev_revenue_multiple'] = deals.loc[mask_mult_rev_2, 'ev_eur'] / deals.loc[mask_mult_rev_2, 'revenue_eur']

mask_mult_ebitda_2 = (
    deals['ev_ebitda_multiple'].isna() &
    (deals['ev_eur'] > 0) &
    (deals['ebitda_eur'] > 0)
)
deals.loc[mask_mult_ebitda_2, 'ev_ebitda_multiple'] = deals.loc[mask_mult_ebitda_2, 'ev_eur'] / deals.loc[mask_mult_ebitda_2, 'ebitda_eur']


In [117]:
deals['ebitda_eur'].describe()


count    31270.00
mean        78.73
std       2420.04
min     -66533.75
25%         -0.21
50%          3.70
75%         18.26
max     360414.06
Name: ebitda_eur, dtype: float64

In [118]:
deals['ev_ebitda_multiple'].describe()


count    6732.00
mean       39.05
std       729.39
min      -177.01
25%         7.50
50%        11.00
75%        16.67
max     55000.00
Name: ev_ebitda_multiple, dtype: float64

## Revenue and EBITDA range mapping for deals

In [119]:
deals['deal_revenue_range'] = deals['revenue_eur'].apply(revenue_range)

In [120]:
deals['deal_ebitda_range'] = deals['ebitda_eur'].apply(ebitda_range)

# Creating Currency Columns

In [121]:
deals['revenue_usd'] = deals['revenue_eur'] / USD_TO_EUR
deals['ebitda_usd'] = deals['ebitda_eur'] / USD_TO_EUR

# CREATING PE ENTRIES, EXITS AND ADD-ONS

## PE Entries 

### Filtering PE Entries dataset

**LOGIC**

- We don't want any VC Rounds
- We only want to include deals where atleast 1 buyer was an investor
- We also exclude current ownership as venture capital
- We don't want seller reason was an IPO because it's not a private market deal

In [122]:
deals_novc = deals[-deals['buyer_reasons'].str.contains("(?i)vcRound",na=False)]
PE_entries = deals_novc[deals_novc['buyer_types_str'].str.contains("(?i)investor",na=False)]
PE_entries = PE_entries[PE_entries['ownership']!='ventureCapital']
PE_entries = PE_entries[~PE_entries['seller_reasons'].str.contains("(?i)IPO",na=False)]
PE_entries = PE_entries[~PE_entries['funding_round_type'].str.contains("(?i)series|seed", na=False)]


### Assigning entry deal types

**LOGIC**

- For a primary deal the seller type should be null or other
- For carve-out the seller type should be an asset
- For secondary the seller type should be an investor


In [123]:
PE_entries.loc[:, 'entry_deal_type'] = 'unknown'

In [124]:
PE_entries.loc[(PE_entries['seller_types_str'] == '[]')| PE_entries['seller_types_str'].str.fullmatch(r'\["other"(,\s*"other")*\]', case=False),'entry_deal_type'] = 'primary'

PE_entries.loc[PE_entries['seller_reasons'].str.contains("(?i)divestiture",na=False), 'entry_deal_type'] = 'carve-out'
PE_entries.loc[PE_entries['seller_types_str'].str.contains("(?i)asset",na=False), 'entry_deal_type'] = 'carve-out'

PE_entries.loc[PE_entries['seller_types_str'].str.contains("(?i)investor",na=False), 'entry_deal_type'] = 'secondary'

PE_entries.loc[PE_entries['majority_seller_type'].str.contains("(?i)asset",na=False), 'entry_deal_type'] = 'carve-out'
PE_entries.loc[PE_entries['majority_seller_type'].str.contains("(?i)investor",na=False), 'entry_deal_type'] = 'secondary'

PE_entries.loc[PE_entries['buyer_reasons'].str.contains("(?i)publicToPrivate",na=False), 'entry_deal_type'] = 'publicToPrivate'

In [125]:
PE_entries['entry_deal_type'].value_counts(dropna = False)

entry_deal_type
primary            20423
secondary           9849
carve-out           4735
publicToPrivate     1053
Name: count, dtype: int64

**Case Studies**

In [126]:
df_summary = PE_entries[PE_entries['sub_region']=='UK']
df_summary[df_summary['announcement_date_year']==2021]['entry_deal_type'].value_counts()

Series([], Name: count, dtype: int64)

In [127]:
PE_entries['entry_deal_type'].value_counts(dropna = False)

entry_deal_type
primary            20423
secondary           9849
carve-out           4735
publicToPrivate     1053
Name: count, dtype: int64

**Re-classifying primary deals where there has been previous deal activity**

Note: The thinking here is that if there's been capital in the asset once all subsequet deals would be secondary.

In [128]:
# Step 1: Filter for investor-type deals only
investor_deals = deals[deals['buyer_types_str'].str.contains("(?i)investor", na=False)].copy()

# Step 2: Get the earliest investor-type deal per linked_asset_id
earliest_investor_deals = (
    investor_deals.sort_values(by='announcement_date')
    .groupby('linked_asset_id', as_index=False)
    .first()[['linked_asset_id', 'announcement_date']]
    .rename(columns={'announcement_date': 'earlier_investor_deal_date'})
)

# Step 3: Merge this into PE_entries
PE_entries = PE_entries.merge(earliest_investor_deals, on='linked_asset_id', how='left')

# Step 4: Vectorized update of entry_deal_type
mask = (
    (PE_entries['entry_deal_type'] == 'primary') &
    (PE_entries['earlier_investor_deal_date'].notna()) &
    (PE_entries['earlier_investor_deal_date'] < PE_entries['announcement_date'])
)

PE_entries.loc[mask, 'entry_deal_type'] = 'secondary'

PE_entries.drop(columns='earlier_investor_deal_date', inplace=True)


# Step 5: Clean up memory
del investor_deals, earliest_investor_deals, mask
gc.collect()


4818

In [129]:
PE_entries['entry_deal_type'].value_counts(dropna = False)

entry_deal_type
primary            15866
secondary          14406
carve-out           4735
publicToPrivate     1053
Name: count, dtype: int64

## PE Exits

**LOGIC**

- Any deal that had seller as an investor
- But you want to exclude VC Exits here
- Exclude secondary post listig transactions

In [130]:
# Want to exit VC exits here
PE_exits = deals_novc[deals_novc['seller_types_str'].str.contains("(?i)investor",na=False)]
PE_exits[~PE_exits['buyer_reasons'].str.contains("(?i)vcRound",na=False)]
PE_exits = PE_exits[PE_exits['ownership']!='ventureCapital']
PE_exits = PE_exits[~PE_exits['funding_round_type'].str.contains("(?i)series|seed", na=False)]

In [131]:
deals[deals['deal_id']==10545382].T.head(100)

Unnamed: 0,167041
deal_id,10545382
announcement_date_month,7
announcement_date_year,2024
asset,WeTransfer
buyers,"[\n {\n ""leading_party"": false,\n ""link..."
currency,EUR
deal_status,
ebit,
ebit_eur,
ebit_year,


In [132]:
PE_exits.shape

(21581, 514)

**Here we are excluding secondary trasnactions from PE's post IPO sale, as they are already listed and are secondary market transactions**

If it's an IPO usually reason type is given as IPO

In [133]:
PE_exits= PE_exits[~((PE_exits['buyer_types_str']=='[]') & (PE_exits['combined_reasons'].isna())  &(PE_exits['ownership']=='listed'))]

In [134]:
PE_exits.shape

(21498, 514)

**Here we are checking if the previous round was VC round so it could be a VC exit and not PE exit**

<span style="color:orange"> **LATER TO DO: NOT SURE IF WE NEED COMBINED REASON HERE CAN DO BUYER REASON** </span>


In [135]:
# Making sure we don't take VC exits in this list, so look if previous round was a VC round this exit round is most likely a VC exit, doing this across automated deals as well because all VC deals are often not curated

deals_previous_reason = deals_all_inc_automated.sort_values(by=['linked_asset_id', 'announcement_date'], ascending=[True, False])
previous_reasons = deals_previous_reason.groupby('linked_asset_id')[['combined_reasons','funding_round_type']].shift(-1)
deals_previous_reason['previous_deal_reason'] = previous_reasons['combined_reasons']
deals_previous_reason['previous_funding_round_type'] = previous_reasons['funding_round_type']


In [136]:
PE_exits = pd.merge(PE_exits,deals_previous_reason[['deal_id','previous_deal_reason','previous_funding_round_type']],how = 'left', on = 'deal_id')
PE_exits

Unnamed: 0,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,majority_seller_linked_id,majority_seller_name,majority_seller_share,majority_seller_type,majority_seller_share_pct,name,asset_id,...,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range,previous_deal_reason,previous_funding_round_type
0,10590356,9,2023,MemberSuite,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590356,107119.00,2025-01-20 13:22:38.193000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],"[5408, 5366]","[Revolution, Arrowroot Capital]","[investor, investor]","[""strategicExit"",""strategicExit""]","[false, false]","[minority, minority]","[, ]","[""GrowthZone""]","[""Revolution"",""Arrowroot Capital""]","[""asset""]","[""investor"",""investor""]","[""majority""]","[""minority"",""minority""]",2023-09,2023Q3,2025-01,United States of America,North America,US,-,,[148434],"[5408, 5366]",,"[Revolution, Arrowroot Capital]",,,[GrowthZone],"['strategicExit', 'strategicExit']",148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,"['vcRound', 'vcRound', 'vcRound', 'vcRound', '...",venture
1,10590357,5,2023,GrowthZone,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,150.00,2024.00,,,,,,,,,,https://app.gain.pro/deal/10590357,148434.00,2025-01-20 13:20:53.490000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,"[82, 8177, 17278]","[Lead Edge Capital, Saratoga Investment Corp, ...","[investor, investor, investor]","[""platform"",""platform"",""platform""]","[false, false, false]","[minority, minority, minority]","[, , ]",[8218],[Greenridge Growth Partners],[investor],"[""""]",[false],[minority],[],"[""Lead Edge Capital"",""Saratoga Investment Corp...","[""Greenridge Growth Partners""]","[""investor"",""investor"",""investor""]","[""investor""]","[""minority"",""minority"",""minority""]","[""minority""]",2023-05,2023Q2,2025-01,United States of America,North America,US,-,"[82, 8177, 17278]",,[8218],,[Greenridge Growth Partners],,"[Lead Edge Capital, Saratoga Investment Corp, ...",,"['platform', 'platform', 'platform']",,,,,,,,,,,GrowthZone,148434.00,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,125.50,150.00,60.00,51.33,5.40,,,,,,,,,,,,,,,,6.16,9.60,6.86,3.33,6.90,,,,,,"(0.0, 2.0]",1.00,3.00,29.00,,,,,,,,,,,,,150.00,,,,,5_unknown,5_unknown,,
2,10590367,10,2024,Siete Foods,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,1200.00,,,,,1051.06,,,,,2024.00,,,,,,,,,,,,https://app.gain.pro/deal/10590367,398973.00,2025-01-20 00:30:25.117000+00:00,US,,,,consumer,"[\n {\n ""leading_party"": false,\n ""link...",food,,,,curated,[31910],[PepsiCo],[asset],"[""""]",[false],[majority],[],"[2333, 8226]","[Stripes Group, AF Ventures]","[investor, investor]","["""",""""]","[false, false]","[minority, minority]","[, ]","[""PepsiCo""]","[""Stripes Group"",""AF Ventures""]","[""asset""]","[""investor"",""investor""]","[""majority""]","[""minority"",""minority""]",2024-10,2024Q4,2025-01,United States of America,North America,US,-,,[31910],"[2333, 8226]",,"[Stripes Group, AF Ventures]",,,[PepsiCo],,31910,PepsiCo,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,,
3,10590368,9,2024,Fleet Complete,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,25.00,21.90,,,,,200.00,,,8.00,,175.18,1.90,,,,2024.00,,,,,,,,,,,,https://app.gain.pro/deal/10590368,4025049.00,2025-01-20 01:16:28.095000+00:00,CA,105.00,91.97,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[342853],[Powerfleet],[asset],"[""""]",[false],[majority],[],"[305, 412]","[Ontario Teachers' Pension Plan, Madison Dearb...","[investor, investor]","[""strategicExit"",""strategicExit""]","[false, false]","[minority, minority]","[, ]","[""Powerfleet""]","[""Ontario Teachers' Pension Plan"",""Madison Dea...","[""asset""]","[""investor"",""investor""]","[""majority""]","[""minority"",""minority""]",2024-09,2024Q3,2025-01,Canada,North America,Canada,-,,[342853],"[305, 412]",,"[Ontario Teachers' Pension Plan, Madison Dearb...",,,[Powerfleet],"['strategicExit', 'strategicExit']",342853,Powerfleet,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2_medium_50_250m_eur,2_medium_10_50m_eur,,
4,10590380,3,2025,Modernizing Medicine,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,5300.00,,,,,4642.17,,,,,2025.00,1710.00,2024.00,,,,,,,,,,https://app.gain.pro/deal/10590380,39514.00,2025-05-22 12:56:52.314000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[577],[Clearlake],[investor],"[""platform""]",[false],[majority],[],[76],[Warburg Pincus],[investor],"[""""]",[false],[majority],[],"[""Clearlake""]","[""Warburg Pincus""]","[""investor""]","[""investor""]","[""majority""]","[""majority""]",2025-03,2025Q1,2025-05,United States of America,North America,US,-,[577],,[76],,[Warburg Pincus],,[Clearlake],,['platform'],577,Clearlake,majority,investor,,76,Warburg Pincus,majority,investor,,Modernizing Medicine,39514.00,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.17,,,,,,,,,,,,,,,,,1710.00,500.00,427.76,70.00,,,,,,,,,,,,,,,,17.12,13.90,16.19,6.96,9.18,,,,,,"(2.0, 5.0]",1.00,3.00,28.00,,,,,,,,,,1460.00,1710.00,,,,,,,5_unknown,5_unknown,,otherUnknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21493,10394227,2,2023,Napier,"[\n {\n ""leading_party"": false,\n ""link...",NOK,,,,,,8.55,2022.00,,,,,,,,,,,,,,,75.00,,,,,,,,,,,https://app.gain.pro/deal/10394227,999241.00,2024-08-27 09:37:59.721000+00:00,NO,,15.60,2022.00,services,"[\n {\n ""leading_party"": false,\n ""link...",technicalServices,,,,curated,"[, 1597]","[Management, CapMan]","[other, investor]","["""",""platform""]","[false, false]","[minority, majority]","[, ]","[, ]","[Kverva, Amar Group]","[investor, investor]","[""divestiture"",""divestiture""]","[false, false]","[, ]","[, ]","[""Management"",""CapMan""]","[""Kverva"",""Amar Group""]","[""other"",""investor""]","[""investor"",""investor""]","[""minority"",""majority""]","["""",""""]",2023-02,2023Q1,2024-08,Norway,Europe,Nordics,-,[1597],,,,"[Kverva, Amar Group]",,[CapMan],,"['platform', 'divestiture', 'divestiture']",1597,CapMan,majority,investor,,,,,,,Napier,999241.00,...,0.56,,,,,,,1.35,1.23,1.16,1.06,,,,0.24,0.15,0.20,0.11,,,,0.31,0.17,0.23,0.13,,,,0.19,0.03,0.13,0.02,,,,,,,,0.47,0.49,0.50,0.55,0.51,,125.50,74.00,195.24,16.58,100.39,6.26,11.05,14.92,,,-0.35,12.76,16.77,,,-1.21,15.79,17.86,,,-2.63,2.09,3.37,17.24,17.24,,,,,,"(-1.0, 0.0]",1.00,1.00,30.00,8.52,13.44,15.60,16.58,,6.70,8.55,8.52,,71.00,76.00,74.00,,22.88,23.11,51.51,,1_small_lt_50m_eur,1_small_lt_10m_eur,,
21494,10394233,7,2022,Oliver,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,500.00,,,,,,,,,,,https://app.gain.pro/deal/10394233,256890.00,2025-06-25 05:31:00.802000+00:00,US,,,,industrials,"[\n {\n ""leading_party"": false,\n ""link...",manufacturing,,,,curated,[5291],[Tenex Capital Management],[investor],"[""platform""]",[false],[majority],[],"[9381, 20174]","[Pfingsten Partners, Dunsirn Partners]","[investor, investor]","["""",""""]","[false, false]","[majority, minority]","[, ]","[""Tenex Capital Management""]","[""Pfingsten Partners"",""Dunsirn Partners""]","[""investor""]","[""investor"",""investor""]","[""majority""]","[""majority"",""minority""]",2022-07,2022Q3,2025-06,United States of America,North America,US,-,[5291],,"[9381, 20174]",,"[Pfingsten Partners, Dunsirn Partners]",,[Tenex Capital Management],,['platform'],5291,Tenex Capital Management,majority,investor,,9381,Pfingsten Partners,majority,investor,,Oliver,256890.00,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,750.00,200.00,171.10,-4.00,,,,,,,,,,,,,,,,2.79,6.89,14.47,-3.16,-3.66,,,,,,"(2.0, 5.0]",1.00,1.00,101.00,,,,,,,,,,,,500.00,,,,,,5_unknown,5_unknown,"['platform', 'platform']",
21495,10394577,10,2018,Changan New Energy Automobile Technology,"[\n {\n ""leading_party"": false,\n ""link...",CNY,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10394577,1101404.00,2025-05-14 08:39:37.095000+00:00,CN,83.06,10.10,,industrials,"[\n {\n ""leading_party"": false,\n ""link...",automotive,1179.97,143.54,2018.00,curated,[],[Changan Automobile (长安汽车)],[asset],"[""""]",[false],[minority],[0.35],[12493],[China South Industries Assets Management (南方工...,[investor],"[""strategicExit""]",[false],[minority],[0.35],"[""Changan Automobile (长安汽车)""]","[""China South Industries Assets Management (南方...","[""asset""]","[""investor""]","[""minority""]","[""minority""]",2018-10,2018Q4,2025-05,China,Asia,,-,,,[12493],,[China South Industries Assets Management (南方工...,,,[Changan Automobile (长安汽车)],['strategicExit'],,,,,,,,,,,Changan Automobile,1101404.00,...,0.03,,,,,,,,1.24,1.16,1.25,1.06,,,,0.22,,0.20,,,,,0.73,,0.33,,,,,0.07,,0.08,,,,,,,,,0.02,0.06,0.10,0.07,0.04,,55119.00,155382.56,19802.11,6745.99,6.04,15.17,15.38,,,-32.95,-22.51,6.11,,,-80.43,-64.32,-16.77,,,12.22,13.36,9.10,41.53,57.55,-13.16,-2.22,11.24,,,"(5.0, 1000.0]",1.00,0.00,163.00,859.72,,,,,,,,,,,,,,,,,1_small_lt_50m_eur,5_unknown,,
21496,10394673,6,2023,DMC Power,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,350.00,,,,,306.56,,,,,2023.00,200.00,,,,,,,,,,,https://app.gain.pro/deal/10394673,170489.00,2025-06-04 09:03:56.510000+00:00,US,,,,industrials,"[\n {\n ""leading_party"": false,\n ""link...",manufacturing,,,,curated,[5433],[Golden Gate Capital],[investor],"[""platform""]",[false],[majority],[],[17],[Bridgepoint],[investor],"[""""]",[false],[majority],[],"[""Golden Gate Capital""]","[""Bridgepoint""]","[""investor""]","[""investor""]","[""majority""]","[""majority""]",2023-06,2023Q2,2025-06,United States of America,North America,US,-,[5433],,[17],,[Bridgepoint],,[Golden Gate Capital],,['platform'],5433,Golden Gate Capital,majority,investor,,17,Bridgepoint,majority,investor,,DMC Power,170489.00,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,350.50,350.00,200.00,171.10,24.00,,,,,,,,,,,,,,,,15.67,32.29,14.95,4.03,13.97,,,,,,"(-1.0, 0.0]",1.00,0.00,12.00,,,,,,,,,,,,200.00,,,,,,5_unknown,5_unknown,['platform'],


In [137]:
PE_exits = PE_exits[~PE_exits['previous_deal_reason'].astype(str).str.contains("vcRound", case=False, na=False)]
PE_exits = PE_exits[~PE_exits['previous_funding_round_type'].str.contains("(?i)series|seed", na=False)]

In [138]:
PE_exits.shape

(19535, 516)

**Classifying all deal types as unkown**

In [139]:
PE_exits['exit_deal_type'] ='unknown'

In [140]:
# Set 'exit_deal_type' to 'other' if 'buyer_types' contains 'other'
PE_exits.loc[
    PE_exits['buyer_types_str'].str.contains('other', case=False, na=False),
    'exit_deal_type'
] = 'MBOs'

# Set 'exit_deal_type' to 'uniwn' if 'buyer_types' contains 'Empty' # These usually are secondary or unkown and a lot of live deals — so not really MBOs
PE_exits.loc[
    PE_exits['buyer_types_str']=='[]',
    'exit_deal_type'
] = 'unknown'

In [141]:
#Strategic exit takes precedence over sponsor to sponsor and IPO takes precedence over everything else

PE_exits.loc[PE_exits['buyer_names_str'].str.contains("(?i)investor",na=False), 'exit_deal_type'] = 'secondary'
PE_exits.loc[PE_exits['buyer_types_str'].str.contains("(?i)investor",na=False), 'exit_deal_type'] = 'secondary'
PE_exits.loc[PE_exits['buyer_types_str'].str.contains("(?i)asset",na=False), 'exit_deal_type'] = 'strategicExit'

PE_exits.loc[PE_exits['majority_buyer_type'].str.contains("(?i)investor",na=False), 'exit_deal_type'] = 'secondary'
PE_exits.loc[PE_exits['majority_buyer_type'].str.contains("(?i)asset",na=False), 'exit_deal_type'] = 'strategicExit'

PE_exits.loc[(PE_exits['combined_reasons'].str.contains('(?i)strategicExit', case=False, na=False)), 'exit_deal_type'] = 'strategicExit'
PE_exits.loc[PE_exits['combined_reasons'].str.contains('IPO', case=False, na=False), 'exit_deal_type'] = 'IPO'

In [142]:
PE_exits['buyer_names']

1        [Lead Edge Capital, Saratoga Investment Corp, ...
2                                                [PepsiCo]
3                                             [Powerfleet]
4                                              [Clearlake]
5                                          [Pervan family]
                               ...                        
21493                                 [Management, CapMan]
21494                           [Tenex Capital Management]
21495                          [Changan Automobile (长安汽车)]
21496                                [Golden Gate Capital]
21497                                              [NYDIG]
Name: buyer_names, Length: 19535, dtype: object

In [143]:
# Some MBOs are actually not MBOs just wrongly classified public market deals, others are also secondary deals


PE_exits = PE_exits[~((PE_exits['buyer_names_str'].str.contains("(?i)public",na=False))&(PE_exits['exit_deal_type'].str.contains("(?i)MBO",na=False)))]

# Here where the buyer type is undicslosed it's actually undiclosed
PE_exits.loc[
    (PE_exits['buyer_names_str'].str.contains("undisclosed buyer", case=False, na=False)) &
    (PE_exits['exit_deal_type'].str.contains("(?i)MBO", case=False, na=False)),
    'exit_deal_type'
] = 'unknown'



In [144]:
PE_exits.shape

(19521, 517)

In [145]:
PE_exits['exit_deal_type'].value_counts(dropna = False)

exit_deal_type
secondary        9278
strategicExit    8291
IPO               927
MBOs              685
unknown           340
Name: count, dtype: int64

In [146]:
PE_exits[['exit_deal_type','deal_status']].value_counts(dropna = False)

exit_deal_type  deal_status
secondary       NaN            9255
strategicExit   NaN            8251
IPO             NaN             905
MBOs            NaN             684
unknown         live            218
                NaN             122
strategicExit   live             40
secondary       live             23
IPO             live             22
MBOs            live              1
Name: count, dtype: int64

## PE Entries Explode

In [147]:
PE_entries_explode = PE_entries.explode(['buyer_linked_ids','buyer_types','buyer_names','buyer_share_values','buyer_share_pcts'])
PE_entries_explode = PE_entries_explode[PE_entries_explode['buyer_types']=='investor']
PE_entries_explode = PE_entries_explode[~(PE_entries_explode['buyer_linked_ids']==0)]
PE_entries_explode['buyer_linked_ids'] = PE_entries_explode['buyer_linked_ids'].replace('', pd.NA)
PE_entries_explode['buyer_linked_ids'] = pd.to_numeric(PE_entries_explode['buyer_linked_ids'], errors='coerce').astype('Int64')
#Not sure about this one as they still might be valid entries such as live deals
PE_entries_explode = PE_entries_explode[~(PE_entries_explode['buyer_linked_ids'].isna())]
PE_entries_explode.shape

(42640, 515)

In [148]:
PE_entries_explode['buyer_linked_ids']

0           82
0         8177
0        17278
1           90
2         9900
         ...  
36055     5291
36056     5444
36057     5433
36058     2682
36059     3311
Name: buyer_linked_ids, Length: 42640, dtype: Int64

**Mapping missing buyer names to investor ids**

There are missing buyer linked ids, so investor are not tagged even if the buyer name is present e.g. Goldman Sachs etc.In this code we are trying to solve for that in the exploded version. Hard to do in in the array version of the code. We have also flagged it to Tech who are fixing this. You need to do this across both entry and exit dfs.

In [149]:
alias_to_id_override = {
    # Goldman Sachs
    "Goldman Sachs PIA": 2797,
    "Goldman Sachs - PIA": 2797,
    "Goldman Sachs - Private Equity": 2797,
    "Goldman Sachs - Growth": 2797,
    "Goldman Sachs - Buyout": 2797,
    "Goldman Sachs - Real Estate": 2797,
    "Goldman Sachs (US)": 2797,
    "GS Capital Partners": 2797,
    "GS Group": 2797,
    "Broad Street Principal Investments (Goldman Sachs)": 2797,

    # J.P. Morgan
    "J.P. Morgan Asset Management": 3085,
    "J.P. Morgan Partners": 3085,
    "JPMorgan Partners": 3085,

    # Crédit Mutuel CIC
    "Crédit Mutuel-CIC": 2001,
    "CIC Finance": 2001,
    "CM-CIC Capital Privé": 2001,
    "CM-CIC Investissement": 2001,
    "CM-CIC Capital": 2001,

    # Société Générale
    "Société Générale Asset Management Alternative Investments": 1082,
    "Société Générale Entrepreneurs": 1082,
    "Société Générale Capital Partenaires": 1082,

    # Crédit Agricole
    "Crédit Agricole - Alpes Développement": 1770,
    "Crédit Agricole Unexo": 1770,
    "Credit Agricole Assurances - Predica": 1770,
    "Crédit Agricole (FR)": 1770,
    "Credit Agricole": 1770,
    "Crédit Agricole Nord-Est Partenaires": 1770,
    "Crédit Agricole Assurances": 1770,

    # ABN AMRO
    "ABN AMRO Capital": 690,
    "ABN AMRO Sustainable Impact Fund (SIF)": 690,
    "ABN AMRO - Energy Transition Fund": 690,

    "Barclays Private Equity": 5669,

    "Macquarie Group": 284,
    
    
}


In [150]:
PE_entries_explode['buyer_linked_ids'] = PE_entries_explode['buyer_linked_ids'].fillna(PE_entries_explode['buyer_names'].map(alias_to_id))
PE_entries_explode['buyer_linked_ids'] = PE_entries_explode['buyer_linked_ids'].fillna(PE_entries_explode['buyer_names'].map(alias_to_id_override))

In [151]:
PE_entries_explode.rename(columns={'announcement_date': 'entry_date','buyer_linked_ids': 'investor_id'},inplace = True)

**Lookup buyer investor region**

This works because you are only looking at investors lookup in buyer linked ids

In [152]:
df_summary = investors[['investor_id','investor_country_name','investor_sub_region','investor_region']].copy()

df_summary.rename(columns={
    "investor_region": "buyer_region",
    "investor_country_name": "buyer_country_name",
    "investor_sub_region": "buyer_sub_region"
}, inplace=True)


In [153]:
PE_entries_explode = pd.merge(PE_entries_explode,df_summary, how = 'left',left_on = 'investor_id', right_on = 'investor_id')

### TO DO ADD DEALS PERCENT

**<span style="color:blue"> TODO: WE SHOULD ALSO MAP DEAL PERCENTS HERE**


In [154]:
#df_summary = PE_entries_EU_18_24.copy()

In [155]:
#df_summary['buyer_shares'] = df_summary.apply(lambda row: row['buyer_shares'][:len(row['buyer_investor_ids'])], axis=1)
#df_summary['buyer_investor_ids'] = df_summary.apply(lambda row: row['buyer_investor_ids'][:len(row['buyer_shares'])], axis=1)

In [156]:
#df_summary = df_summary.explode(['buyer_investor_ids','buyer_shares'])

In [157]:
# For purpose of this analysis sharedMajority acts as minority only

#df_summary.loc[df_summary['buyer_shares']=='sharedMajority','buyer_shares']='minority'

In [158]:
# Count how many majority and minority owners own an investment to get ownership share

'''
deal_ownership_counts = df_summary.groupby(['deal_id']).agg(
    majority_owner_count=('buyer_shares', lambda x: (x == 'majority').sum()),
    minority_owner_count=('buyer_shares', lambda x: ((x == 'minority') | (x == 'sharedMajority')).sum())
) '''

"\ndeal_ownership_counts = df_summary.groupby(['deal_id']).agg(\n    majority_owner_count=('buyer_shares', lambda x: (x == 'majority').sum()),\n    minority_owner_count=('buyer_shares', lambda x: ((x == 'minority') | (x == 'sharedMajority')).sum())\n) "

In [159]:
#df_summary = pd.merge(df_summary, deal_ownership_counts, left_on = ['deal_id'], right_on = ['deal_id'], how='left')

In [160]:
# Look up ownership percentage from deal_pct_mapping

#df_summary = pd.merge(df_summary, deal_pct_mapping, left_on = ['majority_owner_count','minority_owner_count'], right_on = ['majority_owner_count','minority_owner_count'], how='left')


In [161]:
# Assign ownership percent whether the deal buyer took a minority share or a majority share

#df_summary['ownership_pct'] = np.where(df_summary['buyer_shares'] == 'majority', df_summary['majority_pct'],df_summary['minority_pct'])



In [162]:
#normalize the ownership share

#sum_pct = df_summary.groupby('deal_id')['ownership_pct'].transform('sum')

#df_summary['normalized_ownership_pct'] = df_summary['ownership_pct'] / sum_pct


In [163]:
#creating dataframe of PE entries which has investor details

# Step 2: Merge with investors DataFrame

'''
merged_df = df_summary.merge(
    investors[['investor_id', 'investor_sub_region', 'investor_country_name']],
    left_on='buyer_investor_ids',
    right_on='investor_id',
    how='left'
)'''

"\nmerged_df = df_summary.merge(\n    investors[['investor_id', 'investor_sub_region', 'investor_country_name']],\n    left_on='buyer_investor_ids',\n    right_on='investor_id',\n    how='left'\n)"

### Creating Majority and Minority PE Entries


In [164]:
PE_entries.shape

(36060, 515)

In [165]:
PE_entries[PE_entries['buyer_share_values_str']=='[""]']['seller_share_values_str'].value_counts()

seller_share_values_str
[]                                                          212
[""]                                                         31
["majority"]                                                 12
["minority"]                                                  3
["minority","minority","minority","minority","minority"]      2
["minority","minority","minority"]                            2
["","",""]                                                    1
["sharedMajority","sharedMajority"]                           1
["","minority"]                                               1
["",""]                                                       1
Name: count, dtype: int64

In [166]:
PE_entries[PE_entries['buyer_share_values_str']=='[""]']

Unnamed: 0,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,majority_seller_linked_id,majority_seller_name,majority_seller_share,majority_seller_type,majority_seller_share_pct,name,asset_id,...,capex_to_sales_2024,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range,entry_deal_type
785,10599451,3,2022,Clinias Dental Group,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,1.31,2022.00,,,,,,,,,,,,,,,176.00,2022.00,,,,,,,,,,https://app.gain.pro/deal/10599451,15343.00,2025-02-17 07:33:55.172000+00:00,NL,,23.17,2022.00,scienceHealth,[],healthcareServices,,,,curated,[15],[Bencis],[investor],"[""platform""]",[false],[],[],[],[],[],,[],[],[],"[""Bencis""]",[],"[""investor""]",[],"[""""]",[],2022-03,2022Q1,2025-02,Netherlands,Europe,Benelux,-,[15],,,,,,[Bencis],,['platform'],,,,,,,,,,,Clinias Dental Group,15343.00,...,,0.13,,,,,,,,,,1.88,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.06,0.06,,350.50,261.00,43.46,43.46,2.54,87.59,,,,,93.52,,,,,,,,,,48.30,45.57,226.75,7.69,27.27,90.74,,,,,"(5.0, 1000.0]",1.00,2.00,3.00,2.54,,,23.17,43.46,,,1.31,2.54,,,176.00,261.00,,,-2.43,-3.09,1_small_lt_50m_eur,1_small_lt_10m_eur,primary
1045,10602715,3,2022,North Penn Telephone Co,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10602715,3686956.00,2025-02-25 13:40:38.281000+00:00,US,,,,tmt,[],telecom,,,,curated,[91],[Antin],[investor],"[""platform""]",[false],[],[],[],[],[],,[],[],[],"[""Antin""]",[],"[""investor""]",[],"[""""]",[],2022-03,2022Q1,2025-02,United States of America,North America,US,-,[91],,,,,,[Antin],,['platform'],,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,primary
1464,10607696,2,2025,Whip Media,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,110.00,2024.00,,,,,,,,,,https://app.gain.pro/deal/10607696,457534.00,2025-03-18 05:22:56.567000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[18182],[Blue Torch Capital],[investor],"[""platform""]",[false],[],[],"[1844, 806, 7216, 5899, 7157]","[Greycroft, Smedvig Capital, WME Ventures, Com...","[investor, investor, investor, investor, inves...","["""","""","""","""",""""]","[false, false, false, false, false]","[minority, minority, minority, minority, minor...","[, , , , ]","[""Blue Torch Capital""]","[""Greycroft"",""Smedvig Capital"",""WME Ventures"",...","[""investor""]","[""investor"",""investor"",""investor"",""investor"",""...","[""""]","[""minority"",""minority"",""minority"",""minority"",""...",2025-02,2025Q1,2025-03,United States of America,North America,US,-,[18182],,"[1844, 806, 7216, 5899, 7157]",,"[Greycroft, Smedvig Capital, WME Ventures, Com...",,[Blue Torch Capital],,['platform'],,,,,,,,,,,Whip Media,457534.00,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.04,,,,,,,,,,,,,125.50,110.00,40.00,34.22,8.00,,,,,,,,,,,,,,,,-18.03,-18.05,-17.63,-3.85,-3.85,,,,,,"(-1.0, 0.0]",1.00,5.00,11.00,,,,,,,,,,,110.00,,,,,,,5_unknown,5_unknown,secondary
2342,10616685,4,2025,Colonial Enterprises,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,1000.00,875.88,,,,,9000.00,,,9.00,2025.00,7882.94,,,,,2025.00,,,,,,,,,,,,https://app.gain.pro/deal/10616685,,2025-04-04 12:17:05.348000+00:00,US,,,,materialsEnergy,"[\n {\n ""leading_party"": false,\n ""link...",energy,,,,curated,[90],[Brookfield Infrastructure Partners - Colossus...,[investor],"[""platform""]",[false],[],[],"[349, 6623, 7235, 48, 647]","[Caisse de dépôt et placement du Québec, Shell...","[investor, asset, investor, investor, investor]","["""",""divestiture"","""","""",""""]","[false, false, false, false, false]","[minority, minority, minority, minority, minor...","[0.1655, 0.16125, 0.281, 0.23440000000000003, ...","[""Brookfield Infrastructure Partners - Colossu...","[""Caisse de dépôt et placement du Québec"",""She...","[""investor""]","[""investor"",""asset"",""investor"",""investor"",""inv...","[""""]","[""minority"",""minority"",""minority"",""minority"",""...",2025-04,2025Q2,2025-04,United States of America,North America,US,-,[90],,"[349, 7235, 48, 647]",[6623],"[Caisse de dépôt et placement du Québec, Koch ...",[Shell],[Brookfield Infrastructure Partners - Colossus...,,"['platform', 'divestiture']",,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,4_mega_large_gt_200m_eur,secondary
2896,10623015,4,2025,U.S. motor insurance business,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10623015,,2025-04-24 07:40:50.442000+00:00,US,,,,financial,"[\n {\n ""leading_party"": false,\n ""link...",insurance,,,,curated,[554],[J.C. Flowers & Co],[investor],"[""platform""]",[false],[],[],[1267167],[Admiral Group],[asset],"[""""]",[false],[],[],"[""J.C. Flowers & Co""]","[""Admiral Group""]","[""investor""]","[""asset""]","[""""]","[""""]",2025-04,2025Q2,2025-04,United States of America,North America,US,-,[554],,,[1267167],,[Admiral Group],[J.C. Flowers & Co],,['platform'],,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,carve-out
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35931,10383361,5,2016,Advisor Group,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10383361,334497.00,2024-07-13 02:01:24.802000+00:00,US,,,,financial,[],assetManagement,,,,curated,[1767],[Lightyear Capital],[investor],"[""""]",[false],[],[],[],[],[],,[],[],[],"[""Lightyear Capital""]",[],"[""investor""]",[],"[""""]",[],2016-05,2016Q2,2024-07,United States of America,North America,US,-,[1767],,,,,,[Lightyear Capital],,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,primary
35942,10383923,1,2015,Enbi Group,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10383923,31661.00,2024-07-13 02:01:24.802000+00:00,US,,,,industrials,[],manufacturing,,,,curated,[13401],[Platinum Equity],[investor],"[""""]",[false],[],[],[],[],[],,[],[],[],"[""Platinum Equity""]",[],"[""investor""]",[],"[""""]",[],2015-01,2015Q1,2024-07,United States of America,North America,US,-,[13401],,,,,,[Platinum Equity],,,,,,,,,,,,,Enbi Group,31661.00,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.22,8.04,,,,,,"(-1.0, 0.0]",1.00,1.00,75.00,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,primary
35972,10386513,2,2019,Ultimate Software,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10386513,3845031.00,2024-07-13 02:01:24.802000+00:00,US,,,,tmt,[],software,,,,curated,[397],[Hellman & Friedman],[investor],"[""platform""]",[false],[],[],[],[],[],,[],[],[],"[""Hellman & Friedman""]",[],"[""investor""]",[],"[""""]",[],2019-02,2019Q1,2024-07,United States of America,North America,US,-,[397],,,,,,[Hellman & Friedman],,['platform'],,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,secondary
36035,10392391,7,2021,ARE YOU NEIGHBORLY INC,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10392391,160828.00,2024-07-13 02:01:24.802000+00:00,US,,,,services,[],professionalServices,,,,curated,[48],[KKR],[investor],"[""platform""]",[false],[],[],[],[],[],,[],[],[],"[""KKR""]",[],"[""investor""]",[],"[""""]",[],2021-07,2021Q3,2024-07,United States of America,North America,US,-,[48],,,,,,[KKR],,['platform'],,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown,secondary


In [167]:
PE_entries_majority = PE_entries.loc[
    PE_entries['buyer_share_values_str'].str.contains("(?i)majority", na=False)]
PE_entries_majority.shape

(23322, 515)

In [168]:
PE_entries_minority = PE_entries.loc[~PE_entries['buyer_share_values_str'].str.contains("(?i)majority", na=False)]
print(PE_entries_minority.shape)
#PE_entries_minority = PE_entries_minority.loc[PE_entries_minority['buyer_share_values_str'].str.contains("(?i)minority", na=False)]
#print(PE_entries_minority.shape)

(12738, 515)


## PE Exits Explode

In [169]:
PE_exits_explode = PE_exits.explode(['seller_linked_ids','seller_types','seller_names','seller_share_values','seller_share_pcts'])
PE_exits_explode = PE_exits_explode[PE_exits_explode['seller_types']=='investor']
PE_exits_explode['seller_linked_ids'] = PE_exits_explode['seller_linked_ids'].replace('', pd.NA)
PE_exits_explode = PE_exits_explode[~(PE_exits_explode['seller_linked_ids']==0)]
PE_exits_explode.shape

(27262, 517)

**Looking up missing seller linked ids based on seller names**

In [170]:
PE_exits_explode['seller_linked_ids'] = PE_exits_explode['seller_linked_ids'].fillna(PE_exits_explode['seller_names'].map(alias_to_id))
PE_exits_explode['seller_linked_ids'] = PE_exits_explode['seller_linked_ids'].fillna(PE_exits_explode['seller_names'].map(alias_to_id_override))

PE_exits_explode['seller_linked_ids'] = pd.to_numeric(PE_exits_explode['seller_linked_ids'], errors='coerce').astype('Int64')

In [171]:
#Still some missing actually
PE_exits_explode[PE_exits_explode['seller_linked_ids'].isna()][['seller_linked_ids','seller_names']].value_counts(dropna = False)

seller_linked_ids  seller_names                
<NA>               Founder                         11
                   Rabo Participaties               8
                   Credit Mutuel Arkea              8
                   Vertex Ventures                  7
                   Caird Capital                    6
                                                   ..
                   Ferdinand Piëch Beteiligungs     1
                   Fenera Holding                   1
                   Fast-Up Partners                 1
                   Fashion Capital Partners         1
                   ​Italglobal Partners             1
Name: count, Length: 2328, dtype: int64

In [172]:
#Not sure about this one to be honest as there are sometimes no sellers but it could still be a valid PE exit

PE_exits_explode = PE_exits_explode[~(PE_exits_explode['seller_linked_ids'].isna())]

**Map the country of the seller**





In [173]:
PE_exits_explode.rename(columns={'announcement_date': 'exit_date','seller_linked_ids': 'investor_id'},inplace = True)

In [174]:
df_summary = investors[['investor_id','investor_country_name','investor_sub_region','investor_region']].copy()

df_summary.rename(columns={
    "investor_region": "seller_region",
    "investor_country_name": "seller_country_name",
    "investor_sub_region": "seller_sub_region"
}, inplace=True)


In [175]:
PE_exits_explode = pd.merge(PE_exits_explode,df_summary, how = 'left',left_on = 'investor_id', right_on = 'investor_id')


## PE Add-ons

PE add-ons by current PE owned assets. Another thing with Add-ons is that a EU owned asset could have global add-ons so apllying regional filter is not the right way to do things

In [176]:
assets['ownership'].value_counts()

ownership
listed            46513
private           14419
regular           13414
ventureCapital     9891
minority           6647
subsidiary         3126
other               572
bankrupt            322
government          109
Name: count, dtype: int64

In [177]:
PE_assets = assets[assets['ownership'].isin(['regular','minority'])]

In [178]:
#Here we are considering global add-ons so Europe firms even buying international but just owned by current PE assets, we are also considering add-ons here for assets that were bought before they came into PE acquisition but this would be a low number

PE_assets['asset_id'] = PE_assets['asset_id'].astype(str)

all_ids = set(PE_assets['asset_id'])

PE_add_ons = deals[
    deals['buyer_asset_ids'].apply(
        lambda x: isinstance(x, list) and any(item in all_ids for item in x)
    )
]

PE_add_ons.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PE_assets['asset_id'] = PE_assets['asset_id'].astype(str)


Unnamed: 0,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,majority_seller_linked_id,majority_seller_name,majority_seller_share,majority_seller_type,majority_seller_share_pct,name,asset_id,...,net_debt_by_ebitda_2024,capex_to_sales_2024,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range
0,10590355,5,2024,JUNO,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590355,,2025-01-20 13:22:03.841000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],[],[Josh Hotsenpiller],[other],"[""""]",[false],[minority],[],"[""GrowthZone""]","[""Josh Hotsenpiller""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,[148434],,,,,,[GrowthZone],,148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
1,10590356,9,2023,MemberSuite,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590356,107119.0,2025-01-20 13:22:38.193000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],"[5408, 5366]","[Revolution, Arrowroot Capital]","[investor, investor]","[""strategicExit"",""strategicExit""]","[false, false]","[minority, minority]","[, ]","[""GrowthZone""]","[""Revolution"",""Arrowroot Capital""]","[""asset""]","[""investor"",""investor""]","[""majority""]","[""minority"",""minority""]",2023-09,2023Q3,2025-01,United States of America,North America,US,-,,[148434],"[5408, 5366]",,"[Revolution, Arrowroot Capital]",,,[GrowthZone],"['strategicExit', 'strategicExit']",148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
3,10590358,2,2020,Resource-One,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590358,,2025-01-20 16:16:11.060000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2020-02,2020Q1,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
4,10590359,1,2023,Mountain Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590359,,2025-01-20 13:21:25.924000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2023-01,2023Q1,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
5,10590360,5,2024,Arrowhead Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,50.0,,,,,,,,,,,https://app.gain.pro/deal/10590360,,2025-01-20 13:23:46.815000+00:00,US,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[Michael Luter],[other],"[""""]",[false],[minority],[],"[""Superior Environmental Solutions""]","[""Michael Luter""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown


In [179]:
PE_add_ons.shape

(50615, 514)

**Only take majority investments, minority investments are not neeeded or VC rounds, these are typically not acquisitions, e.g. Open AI with Microsoft or Claude with Amazon etc.**

In [180]:
PE_add_ons = PE_add_ons[
    PE_add_ons['buyer_share_values'].apply(
        lambda x: any(i in ['majority', 'sharedMajority'] for i in x) if isinstance(x, list) else False
    ) &
    (~PE_add_ons['buyer_reasons'].str.contains("(?i)vcRound", na=False))
]

PE_add_ons

Unnamed: 0,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,majority_seller_linked_id,majority_seller_name,majority_seller_share,majority_seller_type,majority_seller_share_pct,name,asset_id,...,net_debt_by_ebitda_2024,capex_to_sales_2024,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range
0,10590355,5,2024,JUNO,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590355,,2025-01-20 13:22:03.841000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],[],[Josh Hotsenpiller],[other],"[""""]",[false],[minority],[],"[""GrowthZone""]","[""Josh Hotsenpiller""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,[148434],,,,,,[GrowthZone],,148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
1,10590356,9,2023,MemberSuite,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590356,107119.00,2025-01-20 13:22:38.193000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],"[5408, 5366]","[Revolution, Arrowroot Capital]","[investor, investor]","[""strategicExit"",""strategicExit""]","[false, false]","[minority, minority]","[, ]","[""GrowthZone""]","[""Revolution"",""Arrowroot Capital""]","[""asset""]","[""investor"",""investor""]","[""majority""]","[""minority"",""minority""]",2023-09,2023Q3,2025-01,United States of America,North America,US,-,,[148434],"[5408, 5366]",,"[Revolution, Arrowroot Capital]",,,[GrowthZone],"['strategicExit', 'strategicExit']",148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
3,10590358,2,2020,Resource-One,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590358,,2025-01-20 16:16:11.060000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2020-02,2020Q1,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
4,10590359,1,2023,Mountain Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590359,,2025-01-20 13:21:25.924000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2023-01,2023Q1,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
5,10590360,5,2024,Arrowhead Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,50.00,,,,,,,,,,,https://app.gain.pro/deal/10590360,,2025-01-20 13:23:46.815000+00:00,US,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[Michael Luter],[other],"[""""]",[false],[minority],[],"[""Superior Environmental Solutions""]","[""Michael Luter""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202528,10394375,4,2023,Nura Pain Clinics,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10394375,163390.00,2025-08-21 01:39:04.538000+00:00,US,,,,scienceHealth,"[\n {\n ""leading_party"": false,\n ""link...",healthcareServices,,,,curated,[256188],[Capitol Pain Institute],[asset],"[""""]",[false],[majority],[],[],[David Schultz],[other],"[""""]",[false],[minority],[],"[""Capitol Pain Institute""]","[""David Schultz""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2023-04,2023Q2,2025-08,United States of America,North America,US,-,,[256188],,,,,,[Capitol Pain Institute],,256188,Capitol Pain Institute,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
202531,10394416,12,2023,La Colombe,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,900.00,,,,,788.29,,,,,2023.00,,,,,,,,,,,,https://app.gain.pro/deal/10394416,159450.00,2025-08-11 09:29:41.639000+00:00,US,,,,consumer,"[\n {\n ""leading_party"": false,\n ""link...",food,,,,curated,[149515],[Chobani],[asset],"[""""]",[false],[majority],[],"[160203, , ]","[Keurig Dr Pepper, Todd Carmichael, JP Iberti]","[asset, other, other]","[""divestiture"","""",""""]","[false, false, false]","[minority, minority, minority]","[, , ]","[""Chobani""]","[""Keurig Dr Pepper"",""Todd Carmichael"",""JP Iber...","[""asset""]","[""asset"",""other"",""other""]","[""majority""]","[""minority"",""minority"",""minority""]",2023-12,2023Q4,2025-08,United States of America,North America,US,-,,[149515],,[160203],,[Keurig Dr Pepper],,[Chobani],['divestiture'],149515,Chobani,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
202534,10394452,5,2022,Advanced Assembly,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10394452,171783.00,2024-11-13 11:53:14.033000+00:00,US,,,,industrials,"[\n {\n ""leading_party"": false,\n ""link...",manufacturing,,,,curated,[272706],[Summit Interconnect],[asset],"[""""]",[false],[majority],[],[],[Lawrence Davis],[other],"[""""]",[false],[minority],[],"[""Summit Interconnect""]","[""Lawrence Davis""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2022-05,2022Q2,2024-11,United States of America,North America,US,-,,[272706],,,,,,[Summit Interconnect],,272706,Summit Interconnect,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
202564,10394756,4,2022,Chateaux,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10394756,163734.00,2024-11-21 05:21:04.191000+00:00,US,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",professionalServices,,,,curated,[106143],[Coretelligent],[asset],"[""""]",[false],[majority],[],[],[Ken Zimmerman],[other],"[""""]",[false],[minority],[],"[""Coretelligent""]","[""Ken Zimmerman""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2022-04,2022Q2,2024-11,United States of America,North America,US,-,,[106143],,,,,,[Coretelligent],,106143,Coretelligent,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown


### Mapping Historical addons of PE owned companies that might have exited

In [181]:
#Not sure here if we should take the min appraoch — now we're taking the approach that every entry and exit is a holding period, maybe for add-ons works

PE_entry_date_add_ons = PE_entries_explode.groupby(
    ['linked_asset_id', 'investor_id', 'ownership', 'sector'], dropna=False
).agg(entry_date=('entry_date', 'min')).reset_index()

    
#There are historical entries that don't have a linked id so that means it's going to be hard to tell their add-on entries as well
PE_entry_date_add_ons = PE_entry_date_add_ons[PE_entry_date_add_ons['linked_asset_id'].notna()]
PE_entry_date_add_ons = PE_entry_date_add_ons[PE_entry_date_add_ons['investor_id'].notna()]

PE_entry_date_add_ons['investor_id'] = pd.to_numeric(PE_entry_date_add_ons['investor_id'], errors='coerce').astype('Int64')
PE_entry_date_add_ons['linked_asset_id'] = pd.to_numeric(PE_entry_date_add_ons['linked_asset_id'], errors='coerce').astype('Int64')


PE_entry_date_add_ons


Unnamed: 0,linked_asset_id,investor_id,ownership,sector,entry_date
0,1,4,regular,industrials,2015-07
1,1,33,regular,industrials,2015-07
2,1,60,regular,industrials,2015-07
3,2,33,regular,industrials,2017-12
4,3,57,bankrupt,industrials,1992-02
...,...,...,...,...,...
37301,4694735,21369,listed,materialsEnergy,2025-08
37302,4694748,76,,consumer,2020-07
37303,4694748,2132,,consumer,2020-07
37304,4694748,2691,,consumer,2020-07


In [182]:
PE_exit_date_add_ons = PE_exits_explode.groupby(['linked_asset_id','investor_id'],dropna = False).agg(exit_date = ('exit_date','max'))
PE_exit_date_add_ons = PE_exit_date_add_ons.reset_index()

PE_exit_date_add_ons = PE_exit_date_add_ons[PE_exit_date_add_ons['linked_asset_id'].notna()]
PE_exit_date_add_ons = PE_exit_date_add_ons[PE_exit_date_add_ons['investor_id'].notna()]

PE_exit_date_add_ons['investor_id'] = pd.to_numeric(PE_exit_date_add_ons['investor_id'], errors='coerce').astype('Int64')
PE_exit_date_add_ons['linked_asset_id'] = pd.to_numeric(PE_exit_date_add_ons['linked_asset_id'], errors='coerce').astype('Int64')



In [183]:
PE_exit_date_add_ons[PE_exit_date_add_ons['linked_asset_id']==5]

Unnamed: 0,linked_asset_id,investor_id,exit_date
0,5,52,2020-01


In [184]:
PE_entry_exit_add_ons = pd.merge(PE_entry_date_add_ons,PE_exit_date_add_ons,how = 'left',left_on = ['linked_asset_id','investor_id'],right_on = ['linked_asset_id','investor_id']).reset_index()
PE_entry_exit_add_ons.rename(columns={'linked_asset_id': 'asset_id'},inplace = True)

In [185]:
PE_entry_exit_add_ons[PE_entry_exit_add_ons['asset_id']==1][['entry_date','exit_date']]

Unnamed: 0,entry_date,exit_date
0,2015-07,
1,2015-07,
2,2015-07,


In [186]:
'''PE_entry_exit_majority = pd.merge(PE_entry_date_majority,PE_exit_date,how = 'left',left_on = ['linked_asset_id','investor_id'],right_on = ['linked_asset_id','investor_id']).reset_index()
PE_entry_exit_majority.rename(columns={'linked_asset_id': 'asset_id'},inplace = True)'''

"PE_entry_exit_majority = pd.merge(PE_entry_date_majority,PE_exit_date,how = 'left',left_on = ['linked_asset_id','investor_id'],right_on = ['linked_asset_id','investor_id']).reset_index()\nPE_entry_exit_majority.rename(columns={'linked_asset_id': 'asset_id'},inplace = True)"

In [187]:
assets['ownership'].value_counts()

ownership
listed            46513
private           14419
regular           13414
ventureCapital     9891
minority           6647
subsidiary         3126
other               572
bankrupt            322
government          109
Name: count, dtype: int64

In [188]:
PE_entry_date_add_ons['ownership'].value_counts(dropna = False)

ownership
regular       22187
minority       9034
subsidiary     2462
listed         1871
NaN             757
private         602
bankrupt        247
other           133
government       13
Name: count, dtype: int64

In [189]:
#PE_entry_date_majority['ownership'].value_counts()

In [190]:
PE_entry_exit_add_ons[(PE_entry_exit_add_ons['exit_date'].isna())]['ownership'].value_counts()

ownership
regular       15185
minority       7479
listed         1060
subsidiary      447
bankrupt        177
private          98
other            55
government       11
Name: count, dtype: int64

In [191]:
# Strip trailing ".0" and cast to string
PE_entry_exit_add_ons['asset_id'] = PE_entry_exit_add_ons['asset_id'].astype('Int64').astype(str)
PE_assets['asset_id'] = PE_assets['asset_id'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PE_assets['asset_id'] = PE_assets['asset_id'].astype(str)


In [192]:
PE_entry_exit_add_ons.shape

(37306, 7)

In [193]:
PE_entry_exit_historical_not_pe = PE_entry_exit_add_ons[(PE_entry_exit_add_ons['exit_date'].notna()) & (-PE_entry_exit_add_ons['asset_id'].isin(PE_assets['asset_id']))]
PE_entry_exit_historical_not_pe

Unnamed: 0,index,asset_id,investor_id,ownership,sector,entry_date,exit_date
22,22,12,15,subsidiary,materialsEnergy,2016-03,2019-05
23,23,13,15,subsidiary,consumer,2014-06,2022-08
32,32,19,15,subsidiary,services,2017-05,2024-11
41,41,25,15,subsidiary,scienceHealth,2015-08,2022-02
42,42,26,15,subsidiary,consumer,2016-02,2023-01
...,...,...,...,...,...,...,...
37241,37241,4692896,302,listed,services,2005-07,2010-01
37242,37242,4692896,330,listed,services,2012-11,2017-02
37244,37244,4692916,2733,,scienceHealth,2016-12,2021-09
37262,37262,4694530,160,listed,financial,2010-09,2021-01


In [194]:
PE_entry_exit_historical_not_pe['ownership'].value_counts()

ownership
subsidiary    2015
listed         811
private        504
other           78
bankrupt        70
government       2
Name: count, dtype: int64

In [195]:
deals['buyer_reasons']

0                                                      [""]
1                                                      [""]
2                        ["platform","platform","platform"]
3                                                      [""]
4                                                      [""]
                                ...                        
202569    ["vcRound","vcRound","vcRound","vcRound","vcRo...
202570                      ["vcRound","vcRound","vcRound"]
202571                                              ["",""]
202572                                                 [""]
202573    ["vcRound","vcRound","vcRound","vcRound","vcRo...
Name: buyer_reasons, Length: 202574, dtype: object

In [196]:
# Step 1: Keep only rows with buyer_share_values including majority/sharedMajority
# Step 2: Ensure buyer_types include 'asset' even when comma-separated
# Step 3: Exclude reasons containing 'vcRound' (case-insensitive)
deals_asset_majority = deals[
    deals['buyer_share_values'].apply(
        lambda x: isinstance(x, list) and any(i.lower() in ['majority', 'sharedmajority'] for i in x)
    ) &
    deals['buyer_types_str'].str.contains(r'\basset\b', case=False, na=False) &
    ~deals['buyer_reasons'].str.contains(r'vcRound', case=False, na=False)
]


# Step 4: Explode buyer_asset_ids (assumes it is a list or comma-separated field)
deals_asset_majority = deals_asset_majority.explode('buyer_asset_ids')
deals_asset_majority

Unnamed: 0,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,majority_seller_linked_id,majority_seller_name,majority_seller_share,majority_seller_type,majority_seller_share_pct,name,asset_id,...,net_debt_by_ebitda_2024,capex_to_sales_2024,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range
0,10590355,5,2024,JUNO,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590355,,2025-01-20 13:22:03.841000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],[],[Josh Hotsenpiller],[other],"[""""]",[false],[minority],[],"[""GrowthZone""]","[""Josh Hotsenpiller""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,148434,,,,,,[GrowthZone],,148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
1,10590356,9,2023,MemberSuite,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590356,107119.00,2025-01-20 13:22:38.193000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],"[5408, 5366]","[Revolution, Arrowroot Capital]","[investor, investor]","[""strategicExit"",""strategicExit""]","[false, false]","[minority, minority]","[, ]","[""GrowthZone""]","[""Revolution"",""Arrowroot Capital""]","[""asset""]","[""investor"",""investor""]","[""majority""]","[""minority"",""minority""]",2023-09,2023Q3,2025-01,United States of America,North America,US,-,,148434,"[5408, 5366]",,"[Revolution, Arrowroot Capital]",,,[GrowthZone],"['strategicExit', 'strategicExit']",148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
3,10590358,2,2020,Resource-One,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590358,,2025-01-20 16:16:11.060000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2020-02,2020Q1,2025-01,United States of America,North America,US,-,,89704,,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
4,10590359,1,2023,Mountain Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590359,,2025-01-20 13:21:25.924000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2023-01,2023Q1,2025-01,United States of America,North America,US,-,,89704,,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
5,10590360,5,2024,Arrowhead Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,50.00,,,,,,,,,,,https://app.gain.pro/deal/10590360,,2025-01-20 13:23:46.815000+00:00,US,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[Michael Luter],[other],"[""""]",[false],[minority],[],"[""Superior Environmental Solutions""]","[""Michael Luter""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,89704,,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202536,10394568,8,2020,Inland Plywood Company,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,46.00,,,,,40.29,0.77,2020.00,,,2020.00,,,,,,,,,,,,https://app.gain.pro/deal/10394568,2159273.00,2024-09-19 06:52:35.001000+00:00,US,60.00,52.55,,industrials,"[\n {\n ""leading_party"": false,\n ""link...",manufacturing,,,,curated,[248962],[Patrick Industries],[asset],"[""""]",[false],[majority],[],"[, ]","[Tim MacEachern, Steve MacEachern]","[other, other]","["""",""""]","[false, false]","[minority, minority]","[, ]","[""Patrick Industries""]","[""Tim MacEachern"",""Steve MacEachern""]","[""asset""]","[""other"",""other""]","[""majority""]","[""minority"",""minority""]",2020-08,2020Q3,2024-09,United States of America,North America,US,-,,248962,,,,,,[Patrick Industries],,248962,Patrick Industries,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2_medium_50_250m_eur,5_unknown
202562,10394747,11,2021,Bottlepay,"[\n {\n ""leading_party"": false,\n ""link...",USD,,4.17,3.66,2021.00,3.93,3.44,,,,,300.00,,,76.39,,262.76,4149.38,,,,2021.00,26.00,,,,,,,,,,,https://app.gain.pro/deal/10394747,2411207.00,2025-04-10 09:04:22.134000+00:00,GB,0.07,0.06,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,9.97,8.73,2021.00,curated,[191412],[NYDIG],[asset],"[""""]",[false],[majority],[1],"[, 3047, , ]","[Pete Cheyne, FinTech Collective, Alan Howard,...","[other, investor, other, other]","["""",""strategicExit"","""",""strategicExit""]","[false, false, false, false]","[majority, minority, minority, minority]","[, , , ]","[""NYDIG""]","[""Pete Cheyne"",""FinTech Collective"",""Alan Howa...","[""asset""]","[""other"",""investor"",""other"",""other""]","[""majority""]","[""majority"",""minority"",""minority"",""minority""]",2021-11,2021Q4,2025-04,United Kingdom,Europe,UK&I,UK,,191412,[3047],,[FinTech Collective],,,[NYDIG],"['strategicExit', 'strategicExit']",191412,NYDIG,majority,asset,1,,Pete Cheyne,majority,other,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1_small_lt_50m_eur,1_small_lt_10m_eur
202564,10394756,4,2022,Chateaux,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10394756,163734.00,2024-11-21 05:21:04.191000+00:00,US,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",professionalServices,,,,curated,[106143],[Coretelligent],[asset],"[""""]",[false],[majority],[],[],[Ken Zimmerman],[other],"[""""]",[false],[minority],[],"[""Coretelligent""]","[""Ken Zimmerman""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2022-04,2022Q2,2024-11,United States of America,North America,US,-,,106143,,,,,,[Coretelligent],,106143,Coretelligent,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
202567,10394761,2,2024,Grenzebach Glier and Associates,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,100.00,,,,,,,,,,,https://app.gain.pro/deal/10394761,,2025-07-17 09:23:40.119000+00:00,US,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",professionalServices,,,,curated,[335788],[Huron],[asset],"[""""]",[false],[majority],[],"[, ]","[Martin Grenzebach, John Glier]","[other, other]","["""",""""]","[false, false]","[minority, minority]","[, ]","[""Huron""]","[""Martin Grenzebach"",""John Glier""]","[""asset""]","[""other"",""other""]","[""majority""]","[""minority"",""minority""]",2024-02,2024Q1,2025-07,United States of America,North America,US,-,,335788,,,,,,[Huron],,335788,Huron,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown


In [197]:
#exclude current PE assets
PE_entry_exit_addons = pd.merge(PE_entry_exit_historical_not_pe,deals_asset_majority, left_on = 'asset_id', right_on = 'buyer_asset_ids', how = 'left')
PE_entry_exit_addons = PE_entry_exit_addons[(PE_entry_exit_addons['announcement_date']>= PE_entry_exit_addons['entry_date']) & 
                                                 (PE_entry_exit_addons['announcement_date']<= PE_entry_exit_addons['exit_date'])]
PE_entry_exit_addons

Unnamed: 0,index,asset_id_x,investor_id,ownership_x,sector_x,entry_date,exit_date,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector_y,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,...,net_debt_by_ebitda_2024,capex_to_sales_2024,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range
1,22,12,15,subsidiary,materialsEnergy,2016-03,2019-05,12435.00,10,2018,CSL Silicones,"[\n {\n ""leading_party"": false,\n ""link...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/12435,,2020-08-10 19:31:48.071000+00:00,CA,,,,materialsEnergy,[],chemicals,,,,curated,[12],[BRB International],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""BRB International""]",[],"[""asset""]",[],"[""majority""]",[],2018-10,2018Q4,2020-08,Canada,North America,Canada,-,,12,,,,,,[BRB International],,12,BRB International,majority,asset,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
2,23,13,15,subsidiary,consumer,2014-06,2022-08,14922.00,8,2016,Telson,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/14922,,2020-10-13 09:55:58.450000+00:00,NL,,,,consumer,[],food,,,,curated,[13],[Shore],[asset],"[""""]",[false],[sharedMajority],[0.5],[],[],[],,[],[],[],"[""Shore""]",[],"[""asset""]",[],"[""sharedMajority""]",[],2016-08,2016Q3,2020-10,Netherlands,Europe,Benelux,-,,13,,,,,,[Shore],,13,Shore,sharedMajority,asset,0.5,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
3,23,13,15,subsidiary,consumer,2014-06,2022-08,45750.00,2,2016,Ristic,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/45750,,2022-08-16 21:03:03.911000+00:00,DE,,,,consumer,[],food,,,,curated,[13],[Shore],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Shore""]",[],"[""asset""]",[],"[""majority""]",[],2016-02,2016Q1,2022-08,Germany,Europe,DACH,-,,13,,,,,,[Shore],,13,Shore,majority,asset,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
6,42,26,15,subsidiary,consumer,2016-02,2023-01,18986.00,9,2020,Frank Delmote,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/18986,,2024-03-11 15:02:21.135000+00:00,BE,,,,consumer,"[\n {\n ""leading_party"": false,\n ""link...",food,,,,curated,[26],[Vleeswaren de Keyser],[asset],"[""""]",[false],[majority],[],[],[Willy Naessens Group],[other],"[""divestiture""]",[false],[majority],[],"[""Vleeswaren de Keyser""]","[""Willy Naessens Group""]","[""asset""]","[""other""]","[""majority""]","[""majority""]",2020-09,2020Q3,2024-03,Belgium,Europe,Benelux,-,,26,,,,,,[Vleeswaren de Keyser],['divestiture'],26,Vleeswaren de Keyser,majority,asset,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
7,42,26,15,subsidiary,consumer,2016-02,2023-01,18987.00,9,2020,Vleeswaren Peeters,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/18987,,2021-02-08 07:56:20.142000+00:00,BE,,,,consumer,"[\n {\n ""leading_party"": false,\n ""link...",food,,,,curated,[26],[Vleeswaren de Keyser],[asset],"[""""]",[false],[majority],[],[],[Willy Naessens Group],[other],"[""divestiture""]",[false],[majority],[],"[""Vleeswaren de Keyser""]","[""Willy Naessens Group""]","[""asset""]","[""other""]","[""majority""]","[""majority""]",2020-09,2020Q3,2021-02,Belgium,Europe,Benelux,-,,26,,,,,,[Vleeswaren de Keyser],['divestiture'],26,Vleeswaren de Keyser,majority,asset,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11711,36593,4587511,2007,subsidiary,consumer,2019-04,2025-05,10597634.00,7,2019,Ribeiro & Guimarães,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10597634,,2025-02-14 16:15:39.105000+00:00,PT,,,,consumer,[],food,,,,curated,[4587511],[Queijos Tavares Group],[asset],"[""""]",[false],[majority],[1],[],[],[],,[],[],[],"[""Queijos Tavares Group""]",[],"[""asset""]",[],"[""majority""]",[],2019-07,2019Q3,2025-02,Portugal,Europe,Iberia,-,,4587511,,,,,,[Queijos Tavares Group],,4587511,Queijos Tavares Group,majority,asset,1,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
11712,36593,4587511,2007,subsidiary,consumer,2019-04,2025-05,10597635.00,9,2019,Damar,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10597635,,2025-02-14 16:16:02.973000+00:00,PT,,,,consumer,[],food,,,,curated,[4587511],[Queijos Tavares Group],[asset],"[""""]",[false],[majority],[1],[],[],[],,[],[],[],"[""Queijos Tavares Group""]",[],"[""asset""]",[],"[""majority""]",[],2019-09,2019Q3,2025-02,Portugal,Europe,Iberia,-,,4587511,,,,,,[Queijos Tavares Group],,4587511,Queijos Tavares Group,majority,asset,1,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
11713,36593,4587511,2007,subsidiary,consumer,2019-04,2025-05,10597637.00,12,2020,Serqueijos,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10597637,,2025-02-14 16:19:14.649000+00:00,PT,,,,consumer,[],food,,,,curated,[4587511],[Queijos Tavares Group],[asset],"[""""]",[false],[majority],[1],[],[],[],,[],[],[],"[""Queijos Tavares Group""]",[],"[""asset""]",[],"[""majority""]",[],2020-12,2020Q4,2025-02,Portugal,Europe,Iberia,-,,4587511,,,,,,[Queijos Tavares Group],,4587511,Queijos Tavares Group,majority,asset,1,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
11714,36626,4603333,17832,subsidiary,services,2017-08,2025-04,10600149.00,9,2019,Luxfords Of Weybridge,"[\n {\n ""leading_party"": false,\n ""link...",GBP,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10600149,4550015.00,2025-02-19 16:44:43.897000+00:00,GB,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",logistics,,,,curated,[4603333],[Doree Bonner ],[asset],"[""""]",[false],[majority],[],[],[Jak Luxford],[other],"[""""]",[false],[minority],[],"[""Doree Bonner ""]","[""Jak Luxford""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2019-09,2019Q3,2025-02,United Kingdom,Europe,UK&I,UK,,4603333,,,,,,[Doree Bonner],,4603333,Doree Bonner,majority,asset,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown


In [198]:
PE_historical_addon_ids = PE_entry_exit_addons['deal_id'].unique()
PE_historical_addons = deals[deals['deal_id'].isin(PE_historical_addon_ids)]
PE_historical_addons

Unnamed: 0,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,majority_seller_linked_id,majority_seller_name,majority_seller_share,majority_seller_type,majority_seller_share_pct,name,asset_id,...,net_debt_by_ebitda_2024,capex_to_sales_2024,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range
92,10590456,1,2025,Iris Eye Clinics,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590456,2981686.00,2025-01-21 14:56:36.068000+00:00,NL,,,,scienceHealth,[],healthcareServices,,,,curated,"[183, 5901]","[Quadrum Capital, Optegra]","[investor, asset]","[""platform"",""""]","[false, false]","[minority, majority]","[0.49, 0.51]",[],[],[],,[],[],[],"[""Quadrum Capital"",""Optegra""]",[],"[""investor"",""asset""]",[],"[""minority"",""majority""]",[],2025-01,2025Q1,2025-01,Netherlands,Europe,Benelux,-,[183],[5901],,,,,[Quadrum Capital],[Optegra],['platform'],5901,Optegra,majority,asset,0.51,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
298,10590722,4,2024,4Com Technologies,"[\n {\n ""leading_party"": false,\n ""link...",GBP,,15.44,17.86,2024.00,25.09,29.01,,,,,215.00,13.92,2024.00,8.57,2024.00,248.64,1.57,2024.00,,,2024.00,571.00,,,,,,,,,,,https://app.gain.pro/deal/10590722,15065.00,2025-04-04 05:58:03.048000+00:00,GB,136.55,157.92,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",telecom,105.27,121.74,2024.00,curated,[2885],[Daisy Group],[asset],"[""""]",[false],[majority],[1],[],[Daron Hutt],[other],"[""""]",[false],[majority],[],"[""Daisy Group""]","[""Daron Hutt""]","[""asset""]","[""other""]","[""majority""]","[""majority""]",2024-04,2024Q2,2025-04,United Kingdom,Europe,UK&I,UK,,[2885],,,,,,[Daisy Group],,2885,Daisy Group,majority,asset,1,,Daron Hutt,majority,other,,4Com Technologies,15065.00,...,2.85,0.13,0.13,,,,,1.19,1.11,1.44,0.88,1.24,1.15,1.11,0.16,0.15,0.16,0.08,0.16,0.20,0.15,0.17,0.34,0.31,0.18,0.32,0.74,0.27,0.05,0.10,0.10,0.10,0.13,0.10,0.13,,,,,0.16,0.09,0.11,0.15,0.09,0.16,0.19,0.18,,571.00,136.74,157.98,25.09,11.14,13.02,16.69,,,6.32,20.02,47.46,,,2.43,20.74,73.67,,,14.20,18.59,11.22,2.55,7.00,4.04,14.82,19.50,,,"(2.0, 5.0]",1.00,0.00,26.00,28.99,123.69,142.14,157.98,,20.12,27.26,28.99,,406.00,500.00,571.00,,54.57,67.90,82.61,,2_medium_50_250m_eur,2_medium_10_50m_eur
363,10590802,1,2025,DW Facility Services BV,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590802,1026420.00,2025-08-20 20:32:20.711000+00:00,NL,,,,services,[],professionalServices,,,,curated,[11950],[Capital Cleaning Group],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Capital Cleaning Group""]",[],"[""asset""]",[],"[""majority""]",[],2025-01,2025Q1,2025-08,Netherlands,Europe,Benelux,-,,[11950],,,,,,[Capital Cleaning Group],,11950,Capital Cleaning Group,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
396,10590836,10,2021,Beton Tool Company,"[\n {\n ""leading_party"": false,\n ""link...",EUR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590836,3641952.00,2025-01-22 18:59:34.462000+00:00,IT,,,,industrials,[],manufacturing,,,,curated,[1898449],[Barikell Group],[asset],"[""""]",[false],[majority],[1],[],[],[],,[],[],[],"[""Barikell Group""]",[],"[""asset""]",[],"[""majority""]",[],2021-10,2021Q4,2025-01,Italy,Europe,Italy,-,,[1898449],,,,,,[Barikell Group],,1898449,Barikell Group,majority,asset,1,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
432,10590881,1,2022,Crest Physical Therapy,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590881,299577.00,2025-01-22 14:32:45.293000+00:00,US,,,,scienceHealth,"[\n {\n ""leading_party"": false,\n ""link...",healthcareServices,,,,curated,[261667],[JAG-ONE Physical Therapy],[asset],"[""""]",[false],[majority],[],[],[Brian Micheletti],[other],"[""""]",[false],[minority],[],"[""JAG-ONE Physical Therapy""]","[""Brian Micheletti""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2022-01,2022Q1,2025-01,United States of America,North America,US,-,,[261667],,,,,,[JAG-ONE Physical Therapy],,261667,JAG-ONE Physical Therapy,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200907,10374852,11,2022,MDcentric Technologies,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,21.00,,,,,,,,,,,https://app.gain.pro/deal/10374852,2333573.00,2024-11-11 08:39:15.171000+00:00,US,,,,services,[],professionalServices,,,,curated,[410742],[Medicus IT],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Medicus IT""]",[],"[""asset""]",[],"[""majority""]",[],2022-11,2022Q4,2024-11,United States of America,North America,US,-,,[410742],,,,,,[Medicus IT],,410742,Medicus IT,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
201255,10378461,8,2022,Dickstein Associates Agency,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10378461,2387377.00,2024-09-26 09:23:51.851000+00:00,US,,,,financial,"[\n {\n ""leading_party"": false,\n ""link...",insurance,,,,curated,[269885],[Accession Risk Management],[asset],"[""""]",[false],[majority],[],[],[Kevin McDonough],[other],"[""""]",[false],[minority],[],"[""Accession Risk Management""]","[""Kevin McDonough""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2022-08,2022Q3,2024-09,United States of America,North America,US,-,,[269885],,,,,,[Accession Risk Management],,269885,Accession Risk Management,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
201310,10379527,5,2023,International Insurance Brokers,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10379527,2254427.00,2024-09-26 09:39:48.476000+00:00,US,,,,financial,"[\n {\n ""leading_party"": false,\n ""link...",insurance,,,,curated,[269885],[Accession Risk Management],[asset],"[""""]",[false],[majority],[],[],[Caroline Sniff],[other],"[""""]",[false],[minority],[],"[""Accession Risk Management""]","[""Caroline Sniff""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2023-05,2023Q2,2024-09,United States of America,North America,US,-,,[269885],,,,,,[Accession Risk Management],,269885,Accession Risk Management,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
201399,10380443,12,2023,Setnor Byer Insurance & Risk,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10380443,2459424.00,2024-09-26 09:30:20.918000+00:00,US,,,,financial,"[\n {\n ""leading_party"": false,\n ""link...",insurance,,,,curated,[269885],[Accession Risk Management],[asset],"[""""]",[false],[majority],[],[],[Anita Byer],[other],"[""""]",[false],[minority],[],"[""Accession Risk Management""]","[""Anita Byer""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2023-12,2023Q4,2024-09,United States of America,North America,US,-,,[269885],,,,,,[Accession Risk Management],,269885,Accession Risk Management,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown


### Final Add-on Dataset

In [199]:
#Append Historical Add-ons

PE_add_ons = pd.concat([PE_add_ons,PE_historical_addons], ignore_index=True)
PE_add_ons = PE_add_ons[PE_add_ons['majority_buyer_type']=='asset']
PE_add_ons

Unnamed: 0,deal_id,announcement_date_month,announcement_date_year,asset,buyers,currency,deal_status,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_year,equity,equity_eur,equity_year,ev,ev_ebit_multiple,ev_ebit_multiple_year,ev_ebitda_multiple,ev_ebitda_multiple_year,ev_eur,ev_revenue_multiple,ev_revenue_multiple_year,ev_total_assets_multiple,ev_total_assets_multiple_year,ev_year,fte,fte_year,funding_round_amount_raised,funding_round_amount_raised_eur,funding_round_post_money_valuation,funding_round_post_money_valuation_eur,funding_round_post_money_valuation_year,funding_round_pre_money_valuation,funding_round_pre_money_valuation_eur,funding_round_pre_money_valuation_year,funding_round_type,gain_pro_url_x,linked_asset_id,publication_date,country_code,revenue,revenue_eur,revenue_year,sector,sellers,subsector,total_assets,total_assets_eur,total_assets_year,type,buyer_linked_ids,buyer_names,buyer_types,buyer_reasons,buyer_leading_parties,buyer_share_values,buyer_share_pcts,seller_linked_ids,seller_names,seller_types,seller_reasons,seller_leading_parties,seller_share_values,seller_share_pcts,buyer_names_str,seller_names_str,buyer_types_str,seller_types_str,buyer_share_values_str,seller_share_values_str,announcement_date,announcement_date_quarter,publication_date_year_month,country_name,region,sub_region,sub_region_2_x,buyer_investor_ids,buyer_asset_ids,seller_investor_ids,seller_asset_ids,seller_investor_names,seller_asset_names,buyer_investor_names,buyer_asset_names,combined_reasons,majority_buyer_linked_id,majority_buyer_name,majority_buyer_share,majority_buyer_type,majority_buyer_share_pct,majority_seller_linked_id,majority_seller_name,majority_seller_share,majority_seller_type,majority_seller_share_pct,name,asset_id,...,net_debt_by_ebitda_2024,capex_to_sales_2024,capex_to_sales,revenue_chg_2014_2013,revenue_chg_2015_2014,revenue_chg_2016_2015,revenue_chg_2017_2016,revenue_chg_2018_2017,revenue_chg_2019_2018,revenue_chg_2020_2019,revenue_chg_2021_2020,revenue_chg_2022_2021,revenue_chg_2023_2022,revenue_chg_2024_2023,revenue_chg_2022_2017,revenue_chg_2023_2018,revenue_chg_2022_2019,revenue_chg_2023_2020,revenue_chg_2023_2019,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,revenue_-2,revenue_-1,revenue_+0,revenue_+1,ebitda_-2,ebitda_-1,ebitda_+0,ebitda_+1,fte_-2,fte_-1,fte_+0,fte_+1,net_debt_-2,net_debt_-1,net_debt_+0,net_debt_+1,deal_revenue_range,deal_ebitda_range
0,10590355,5,2024,JUNO,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590355,,2025-01-20 13:22:03.841000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],[],[Josh Hotsenpiller],[other],"[""""]",[false],[minority],[],"[""GrowthZone""]","[""Josh Hotsenpiller""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,[148434],,,,,,[GrowthZone],,148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
1,10590356,9,2023,MemberSuite,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590356,107119.00,2025-01-20 13:22:38.193000+00:00,US,,,,tmt,"[\n {\n ""leading_party"": false,\n ""link...",software,,,,curated,[148434],[GrowthZone],[asset],"[""""]",[false],[majority],[],"[5408, 5366]","[Revolution, Arrowroot Capital]","[investor, investor]","[""strategicExit"",""strategicExit""]","[false, false]","[minority, minority]","[, ]","[""GrowthZone""]","[""Revolution"",""Arrowroot Capital""]","[""asset""]","[""investor"",""investor""]","[""majority""]","[""minority"",""minority""]",2023-09,2023Q3,2025-01,United States of America,North America,US,-,,[148434],"[5408, 5366]",,"[Revolution, Arrowroot Capital]",,,[GrowthZone],"['strategicExit', 'strategicExit']",148434,GrowthZone,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
2,10590358,2,2020,Resource-One,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590358,,2025-01-20 16:16:11.060000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2020-02,2020Q1,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
3,10590359,1,2023,Mountain Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10590359,,2025-01-20 13:21:25.924000+00:00,US,,,,services,[],technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Superior Environmental Solutions""]",[],"[""asset""]",[],"[""majority""]",[],2023-01,2023Q1,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
4,10590360,5,2024,Arrowhead Environmental Services,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,50.00,,,,,,,,,,,https://app.gain.pro/deal/10590360,,2025-01-20 13:23:46.815000+00:00,US,,,,services,"[\n {\n ""leading_party"": false,\n ""link...",technicalServices,,,,curated,[89704],[Superior Environmental Solutions],[asset],"[""""]",[false],[majority],[],[],[Michael Luter],[other],"[""""]",[false],[minority],[],"[""Superior Environmental Solutions""]","[""Michael Luter""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2024-05,2024Q2,2025-01,United States of America,North America,US,-,,[89704],,,,,,[Superior Environmental Solutions],,89704,Superior Environmental Solutions,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51883,10374852,11,2022,MDcentric Technologies,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,21.00,,,,,,,,,,,https://app.gain.pro/deal/10374852,2333573.00,2024-11-11 08:39:15.171000+00:00,US,,,,services,[],professionalServices,,,,curated,[410742],[Medicus IT],[asset],"[""""]",[false],[majority],[],[],[],[],,[],[],[],"[""Medicus IT""]",[],"[""asset""]",[],"[""majority""]",[],2022-11,2022Q4,2024-11,United States of America,North America,US,-,,[410742],,,,,,[Medicus IT],,410742,Medicus IT,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
51884,10378461,8,2022,Dickstein Associates Agency,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10378461,2387377.00,2024-09-26 09:23:51.851000+00:00,US,,,,financial,"[\n {\n ""leading_party"": false,\n ""link...",insurance,,,,curated,[269885],[Accession Risk Management],[asset],"[""""]",[false],[majority],[],[],[Kevin McDonough],[other],"[""""]",[false],[minority],[],"[""Accession Risk Management""]","[""Kevin McDonough""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2022-08,2022Q3,2024-09,United States of America,North America,US,-,,[269885],,,,,,[Accession Risk Management],,269885,Accession Risk Management,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
51885,10379527,5,2023,International Insurance Brokers,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10379527,2254427.00,2024-09-26 09:39:48.476000+00:00,US,,,,financial,"[\n {\n ""leading_party"": false,\n ""link...",insurance,,,,curated,[269885],[Accession Risk Management],[asset],"[""""]",[false],[majority],[],[],[Caroline Sniff],[other],"[""""]",[false],[minority],[],"[""Accession Risk Management""]","[""Caroline Sniff""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2023-05,2023Q2,2024-09,United States of America,North America,US,-,,[269885],,,,,,[Accession Risk Management],,269885,Accession Risk Management,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown
51886,10380443,12,2023,Setnor Byer Insurance & Risk,"[\n {\n ""leading_party"": false,\n ""link...",USD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://app.gain.pro/deal/10380443,2459424.00,2024-09-26 09:30:20.918000+00:00,US,,,,financial,"[\n {\n ""leading_party"": false,\n ""link...",insurance,,,,curated,[269885],[Accession Risk Management],[asset],"[""""]",[false],[majority],[],[],[Anita Byer],[other],"[""""]",[false],[minority],[],"[""Accession Risk Management""]","[""Anita Byer""]","[""asset""]","[""other""]","[""majority""]","[""minority""]",2023-12,2023Q4,2024-09,United States of America,North America,US,-,,[269885],,,,,,[Accession Risk Management],,269885,Accession Risk Management,majority,asset,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5_unknown,5_unknown


In [200]:
PE_add_ons['majority_buyer_linked_id']

0        148434
1        148434
2         89704
3         89704
4         89704
          ...  
51883    410742
51884    269885
51885    269885
51886    269885
51887    158889
Name: majority_buyer_linked_id, Length: 51849, dtype: Int64

In [201]:
assets_automated

Unnamed: 0,id,name,country_code,sector,revenue,revenue_eur,investors,last_deal_month,last_deal_year,revenue_is_ai_generated,revenue_with_ai_generated,revenue_with_ai_generated_eur,revenue_year,subsector,subsidiary_asset_ids,year_founded,ebitda,ebitda_eur,ebitda_is_ai_generated,ebitda_pct_revenue,ebitda_with_ai_generated,ebitda_with_ai_generated_eur,ebitda_year,enterprise_value,enterprise_value_eur,fte,fte_range,fte_year,ownership,headquarters_region,headquarters_city,profile_type,asset_id,country_name,region,sub_region,sub_region_2
0,6547,IndustrieElektrik,DE,industrials,51.02,51.02,[],,,False,51.02,51.02,2022.00,automotive,[],1971.00,10.26,10.26,False,20.10,10.26,10.26,2022.00,,,28.00,11-50,2022.00,private,Bayern,Gundelfingen an der Donau,minimal,6547,Germany,Europe,DACH,-
1,6548,Doctor Care Anywhere,GB,scienceHealth,39.33,45.49,[],,,False,39.33,45.49,2024.00,healthcareServices,[],2013.00,-2.51,-2.90,False,-6.37,-2.51,-2.90,2024.00,,,610.00,"501-1,000",2024.00,listed,,London,limited,6548,United Kingdom,Europe,UK&I,UK
2,6549,Boer Group,NL,materialsEnergy,90.31,90.31,[],,,False,90.31,90.31,2023.00,rawMaterials,[\n 4091621\n],1908.00,14.64,14.64,False,16.21,14.64,14.64,2023.00,,,470.00,201-500,2023.00,private,Zuid-Holland,Dordrecht,full,6549,Netherlands,Europe,Benelux,-
3,6552,Pack-it Group,NL,consumer,130.56,130.56,[],,,False,130.56,130.56,2022.00,consumerGoods,[\n 3449199\n],1984.00,4.12,4.12,False,3.15,4.12,4.12,2022.00,,,85.58,51-200,2022.00,private,Zuid-Holland,Oud-Beijerland,full,6552,Netherlands,Europe,Benelux,-
4,6553,Finetech Group,DE,tmt,25.02,25.02,[],,,False,25.02,25.02,2023.00,technology,[],1992.00,3.40,3.40,False,13.58,3.40,3.40,2023.00,,,162.00,51-200,2023.00,private,Berlin,Berlin,full,6553,Germany,Europe,DACH,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4020590,644188,Arti Grafiche Julia S.p.A.,IT,tmt,,,[],,,True,8.00,8.00,2025.00,technology,[],1967.00,,,True,,0.72,0.72,2025.00,,,30.00,11-50,2025.00,private,Friuli-Venezia Giulia,San Dorligo della Valle,automated,644188,Italy,Europe,Italy,-
4020591,644189,Tetra Tech Proteus,AU,services,,,[],,,True,30.00,16.89,2025.00,professionalServices,[],,,,True,,2.70,1.52,2025.00,,,65.00,51-200,2025.00,private,Western Australia,Perth,automated,644189,Australia,Australia,Australia,-
4020592,644190,Innovative Tutors,IN,services,,,[],,,True,10.00,0.10,2025.00,education,[],2012.00,,,True,,1.90,0.02,2025.00,,,2.00,1-10,2025.00,private,Uttar Pradesh,Ghaziabad,automated,644190,India,Asia,Asia,-
4020593,644191,Röpa Römer-Metallbau GmbH,DE,industrials,,,[],,,False,,,,construction,[],1981.00,,,False,,,,,,,,51-200,,private,Nordrhein-Westfalen,Drolshagen,automated,644191,Germany,Europe,DACH,-


In [202]:
df_summary = assets_automated[['asset_id','country_name','sub_region','region','subsector','sector']]

df_summary = df_summary.rename(columns={"country_name":"buyer_country_name",
                                       "sub_region":"buyer_sub_region",
                                       "region":"buyer_region",
                                       "subsector":"buyer_subsector",
                                       "sector":"buyer_sector",})


In [203]:
PE_add_ons = pd.merge(
    PE_add_ons,
    df_summary,
    left_on='majority_buyer_linked_id',
    right_on='asset_id', 
    how = 'left')

PE_entries.drop(columns='asset_id', inplace=True)



In [204]:
PE_add_ons_EU = PE_add_ons[PE_add_ons['buyer_region']=='Europe']

## Creating combined PE deals datasets

In [205]:
PE_combined_entries_exits = pd.concat([PE_entries,PE_exits],ignore_index = True)
PE_combined_entries_exits = PE_combined_entries_exits.drop_duplicates(subset='deal_id')


PE_combined_entries_exits_add_ons = pd.concat([PE_entries,PE_exits,PE_add_ons],ignore_index = True)
PE_combined_entries_exits_add_ons = PE_combined_entries_exits_add_ons.drop_duplicates(subset='deal_id')

## Holding Period

**<span style="color:blue"> TODO: A LOT OF DUPLICATES HERE**


In [206]:
PE_entry_exit = pd.merge(
    PE_entries_explode,
    PE_exits_explode,
    on=['linked_asset_id', 'investor_id'],
    suffixes=('_entry', '_exit'),
    how = 'left'
).sort_values(by=['linked_asset_id', 'investor_id'])
len(PE_entry_exit)

46447

In [207]:
PE_entry_exit['entry_date'] = pd.to_datetime(PE_entry_exit['entry_date'], format='%Y-%m', errors='coerce')
PE_entry_exit['exit_date'] = pd.to_datetime(PE_entry_exit['exit_date'], format='%Y-%m', errors='coerce')


PE_entry_exit['exit_year'] = PE_entry_exit['exit_date'].dt.year
PE_entry_exit['entry_year'] = PE_entry_exit['entry_date'].dt.year


In [208]:

PE_entry_exit['holding_period_years'] = (PE_entry_exit['exit_date'] - PE_entry_exit['entry_date']).dt.days/365
PE_entry_exit['holding_period_years'] = (PE_entry_exit['exit_date'] - PE_entry_exit['entry_date']).dt.days/365
PE_entry_exit['holding_period_months'] = (PE_entry_exit['exit_date'] - PE_entry_exit['entry_date']) / pd.Timedelta(days=30)


In [209]:
# We get rid of NaN asset ids -> these holdings do not make sense as the logic is to combine entries and exits
# based on the same linked_asset_id so random deals without asset ID get combined
PE_entry_exit.dropna(subset='linked_asset_id', inplace=True)
len(PE_entry_exit)

42614

In [210]:
PE_entry_exit = PE_entry_exit[-(PE_entry_exit['holding_period_years']<=0)]
len(PE_entry_exit)

41896

In [211]:
PE_entry_exit = pd.merge(PE_entry_exit ,assets[['asset_id','name','sub_region','region', 'country_code', 'ownership','owner_names','owner_ids']], left_on = 'linked_asset_id', right_on = 'asset_id', how = 'left')

**Creating unsold investors dataabase**

In [212]:
PE_entry_exit_unsold=PE_entry_exit.copy()

#Delete entries where there is clearly an exit but we haven't tracked the exit clearly, if there is no investor owner that means it's been sold

PE_entry_exit_unsold = PE_entry_exit_unsold[-((PE_entry_exit_unsold['exit_year'].isna()) & (PE_entry_exit_unsold['owner_ids'].isna()))]

print(PE_entry_exit_unsold.shape)

#This step is done to remove assets that might have not had a exit deal clearly marked. So check if the investor id is still in a current owner to make sure it's unsold 

mask = PE_entry_exit_unsold.apply(
    lambda r: str(r['investor_id']) in {str(x) for x in (r['owner_ids'] if isinstance(r['owner_ids'], list) else [])},
    axis=1)

PE_entry_exit_unsold = PE_entry_exit_unsold[ (PE_entry_exit_unsold['exit_year'].notna()) | (mask & PE_entry_exit_unsold['exit_year'].isna()) ]


print(PE_entry_exit_unsold.shape)

#We are creating this as we would be removing duplicates dealid and investor id pairs.. as we don't one 1 deal with 10 minority investors to be there 10 times but just 1 time
PE_entry_exit_unsold_investor = PE_entry_exit_unsold.copy()

PE_entry_exit_unsold = PE_entry_exit_unsold.drop_duplicates(subset=['deal_id_entry', 'deal_id_exit'])

print(PE_entry_exit_unsold.shape)


(41188, 1048)
(38772, 1048)
(32642, 1048)


In [213]:
PE_entry_exit.shape

(41896, 1048)

In [214]:
PE_entry_exit = PE_entry_exit[PE_entry_exit['exit_date'] > PE_entry_exit['entry_date']].copy()
print(PE_entry_exit.shape)
PE_entry_exit = PE_entry_exit[PE_entry_exit['exit_date'].notna()]
print(PE_entry_exit.shape)

(14059, 1048)
(14059, 1048)


In [215]:
PE_entry_exit['holding_period_years'].describe()

count   14059.00
mean        5.70
std         3.54
min         0.08
25%         3.42
50%         4.92
75%         7.09
max        68.71
Name: holding_period_years, dtype: float64

In [216]:
# Not sure about this one to be honest

PE_entry_exit = PE_entry_exit[~(PE_entry_exit['holding_period_years']>15)]

In [217]:

#Need this dataset for investor level analysis
PE_entry_exit_investor = PE_entry_exit.copy()


#We are creating this as we would be removing duplicates dealid and investor id pairs.. as we don't one 1 deal with 10 minority investors to be there 10 times but just 1 time

PE_entry_exit = PE_entry_exit.drop_duplicates(subset=['deal_id_entry', 'deal_id_exit'])

**<span style="color:blue"> TODO: CREATE A MAJORITY ONE**


In [218]:
# Use another code to create majority here

#deals_entry_exit_holding_majority = PE_entry_exit_majority[PE_entry_exit_majority['exit_date'].notna()]
#deals_entry_exit_holding_majority

# INVESTOR RANKING

### Entry and exit count

Last 6 years of data to filter only for active investors later

In [542]:
# Creating datasets to count entries for the last 6 years

entries_count_explode = PE_entries_explode[PE_entries_explode['announcement_date_year']>=(current_year-6)]
exits_count_explode = PE_exits_explode[PE_exits_explode['announcement_date_year']>=(current_year-6)]

In [543]:
entries_count = entries_count_explode.groupby(['investor_id','region','sub_region','country_name','sector'],dropna=False)[['asset']].nunique()
entries_count = entries_count.rename(columns={"asset": "entries_count"})

exits_count = exits_count_explode.groupby(['investor_id','region','sub_region','country_name','sector'],dropna=False)[['asset']].nunique()
exits_count = exits_count.rename(columns={"asset": "exits_count"})

entries_exits_count = pd.concat(
    [entries_count, exits_count],axis =1)

entries_exits_count = entries_exits_count.reset_index()

In [544]:
# Rolling up the values to get a good summary table

entries_exits_count_sector = entries_exits_count.groupby(['investor_id','sector'])[["entries_count", "exits_count"]].sum().reset_index()
entries_exits_count_region =  entries_exits_count.groupby(['investor_id','region'])[["entries_count", "exits_count"]].sum().reset_index()
entries_exits_count_sub_region =  entries_exits_count.groupby(['investor_id','sub_region'])[["entries_count", "exits_count"]].sum().reset_index()
entries_exits_count_country =  entries_exits_count.groupby(['investor_id','country_name'])[["entries_count", "exits_count"]].sum().reset_index()

In [545]:
entries_exits_count_sector

Unnamed: 0,investor_id,sector,entries_count,exits_count
0,1,consumer,8.00,6.00
1,1,financial,0.00,1.00
2,1,industrials,2.00,4.00
3,1,materialsEnergy,3.00,3.00
4,1,other,1.00,2.00
...,...,...,...,...
13202,21366,services,1.00,0.00
13203,21369,materialsEnergy,1.00,0.00
13204,21370,industrials,1.00,1.00
13205,21371,industrials,0.00,1.00


In [546]:
entries_exits_count_US = entries_exits_count_sub_region[entries_exits_count_sub_region['sub_region']=='US'].reset_index(drop = True)
entries_exits_count_EU = entries_exits_count_region[entries_exits_count_region['region']=='Europe'].reset_index(drop = True)

### One Off Cleaning

**<span style="color:blue"> TODO: Follow-up with Vincent Miko Basil on this to fix Platinum Equity**


**Platinum Equity Correct number of entries**

In [547]:
entries_exits_count.loc[entries_exits_count['investor_id'] == 13401, 'entries_count'] = 9

In [548]:
#replicator(PE_entries[['deal_id','buyer_names_str','buyer_linked_ids']])

**Reclassify sector of Mileaway**

In [549]:
assets.loc[assets['asset_id']==10982, 'sector'] = 'industrials'
assets.loc[assets['asset_id'] == 10982, 'subsector'] = 'logistics'

**Headquarters Change for a few investors**

**<span style="color:blue"> TODO: Samyam Checking**


In [550]:
investors.loc[investors['investor_id'] == 1134, 'operational_hq_country_code'] = 'GB'
investors.loc[investors['investor_id'] == 8650, 'operational_hq_country_code'] = 'SE'
investors.loc[investors['investor_id'] == 2364, 'operational_hq_country_code'] = 'SE'
investors.loc[investors['investor_id'] == 8922, 'operational_hq_country_code'] = 'LU'
investors.loc[investors['investor_id'] == 8137, 'operational_hq_country_code'] = 'PL'
investors.loc[investors['investor_id'] == 265, 'operational_hq_country_code'] = 'SE'

**Renaming investor names**

In [551]:
# Update investor_name where investor_id is 4736
investors.loc[investors['investor_id'] == 3269, 'investor_name'] = 'CDC France'

### Investors Explode

Steps: 

1. Explode investors data so you have investor level holding
2. Merge with assets data to get asset financials

In [552]:
## Explode investors table to individual shareholding

investors_explode = investors[['investor_id','investor_name','investor_hq_city','investor_country_name',
                               'investor_region','investor_sub_region','funds_raised_last_five_years_eur','asset_id']
                             ].explode('asset_id')

In [553]:
investors_explode['asset_id'] = pd.to_numeric(investors_explode['asset_id'], errors='coerce').astype('Int64') 
investors_explode['asset_id'] = investors_explode['asset_id'].fillna(-1).astype(int)

**Merging the Asset Dataset:**

We are merging the asset dataset to get underlying portfolio trends such as growth rates or EBITDA margins and add on count at the portfolio level.

Note: We are taking some automated datasets here but not all VC-backed assets fully in terms of count. The issue is for PEI 300 all assets should be well curated can see later in assets_all_automated.

At the moment automated assets are not high quality. And sometimes they could be PE Minority or Majority as well or private so they need to be curated.

**<span style="color:orange"> FOR LATER: INCLUDE AUTOMATED VC-BACKED ASSETS FOR ANALYSIS**


In [554]:
#CODE TO TEST FOR AUTOMATED ASSETS

df_summary = assets_automated[['asset_id','name','region','ownership']]
df_summary = df_summary.rename(columns={"name": "asset_automated_name","region": "asset_automated_region","ownership": "asset_automated_ownership"  })
investors_explode_test = pd.merge(investors_explode, df_summary, left_on = ['asset_id'], right_on = ['asset_id'], how='left')


In [555]:
#Export automated assets with profile type automated

#replicator(investors_explode_test[investors_explode_test['profile_type']=='automated'])

In [556]:
#Export automated assets with profile type is also automated but is not in the original assets table


#replicator(investors_explode_test[investors_explode_test['profile_type'].isna()])

In [557]:
#replicator(investors_explode_test[investors_explode_test['ownership'].isna()])

**<span style="color:blue"> TODO: ASK ASSETS TO BE CURATED THAT ARE NOT CURATED AND PEI 300**


In [558]:
assets_explode = assets.explode(['owner_ids','owner_shares','owner_names'])
assets_explode = assets_explode.rename(columns={"name": "asset_name"})
assets_explode['owner_ids'] = pd.to_numeric(assets_explode['owner_ids'], errors='coerce').astype('Int64') 
assets_explode['owner_ids'] = assets_explode['owner_ids'].fillna(-1).astype(int)

In [559]:
investors_explode = pd.merge(investors_explode, assets_explode, left_on = ['investor_id','asset_id'], right_on = ['owner_ids','asset_id'], how='left')

### Ranking Methodology Note 

The ranking methodology is based on EV at the asset level and we sum this bottoms up to get the investor EV. 

For Europe: Where we have EV that is reported by a deal that happened in the last 2 years so 2023 and 2024 we use this as a basis to calculate the Asset EV. All the other times we estimate the EV based on the below formula below


Estimated EV == Last reported EBITDA * ownership share * predicted EXIT multiple

Ownership share:  If there are 1 majority owned 10 minority owners we divide their ownership in a way that is representative. Owner shares tell if that asset by an investor is owned as majority or minority

Estimating EBITDA: Last Reported EBITDA coverage is good in Europe but at time there are missing values. Here is how we impute it: 

- Option 1: where revenue is available assume terminal subsector EBITDA margin and based on that impute the EBITDA

- Option 2: I think we should move this to revenue/FTE appraoch... Impute based on the investors other EBITDAs (atleast 3 values should be there for majority and minority seperate imputation and cap at 0.5m * FTE)

CHECK: If we are using 0.5m

For USA: Use last 6 years of deal reported EVs as the metric coverage for EBITDAs etc. is quite bad


**<span style="color:blue"> TODO: UPDATE OWNERSHIP SHARE TO GIVE 90% share for majority ownership with single holding as well**


### Fill ownership pct

In [560]:
# Count how many majority and minority owners own an investment to get ownership share and merge it back

investor_ownership_counts = investors_explode.groupby(['asset_id']).agg(
    majority_owner_count=('owner_shares', lambda x: (x == 'majority').sum()),
    minority_owner_count=('owner_shares', lambda x: (x == 'minority').sum())
)

investors_explode = pd.merge(investors_explode, investor_ownership_counts, left_on = ['asset_id'], right_on = ['asset_id'], how='left')

In [561]:
# Look up the percentage that you need to assign for majority and minority holdings

investors_explode = pd.merge(investors_explode, owner_pct_mapping, left_on = ['ownership','majority_owner_count','minority_owner_count'], right_on = ['ownership','majority_owner_count','minority_owner_count'], how='left')

#Assign values based on whether the asset is majority or minority owned

investors_explode['ownership_pct'] = np.where(investors_explode['owner_shares'] == 'majority', investors_explode['majority_pct'],investors_explode['minority_pct'])

#Fill values where because of some errors the vaues is not there

investors_explode['ownership_pct'] = investors_explode['ownership_pct'].fillna(
    0.9 / investors_explode['count_owners'])



**Merging actual deal share**

Where we have actual values using them instead of estimated values. Also using the last value where the data is available.

In [562]:
deal_pcts = deals.copy()

deal_pcts=deals.explode(['buyer_linked_ids','buyer_share_pcts','buyer_types'])[['linked_asset_id','announcement_date','buyer_linked_ids','buyer_share_pcts','buyer_types']]

deal_pcts = deal_pcts[deal_pcts['buyer_types']=='investor']


deal_pcts['buyer_share_pcts'] = pd.to_numeric(deal_pcts['buyer_share_pcts'], errors='coerce')

deal_pcts = deal_pcts[deal_pcts['buyer_share_pcts'].notna()]

deal_pcts = deal_pcts.sort_values(by=['announcement_date','buyer_share_pcts'], ascending= [False, False])


deal_pcts = (
    deal_pcts
    .groupby(['linked_asset_id', 'buyer_linked_ids'], dropna=False)
    .agg({
        'buyer_share_pcts': lambda x: [f"{v*100:.0f}%" for v in x]  # convert to %
    })
    .reset_index()
)



#deal_pcts.drop(columns=['announcement_date'], inplace=True)

deal_pcts['buyer_linked_ids'] = pd.to_numeric(deal_pcts['buyer_linked_ids'], errors='coerce').astype('Int64') 


deal_pcts

Unnamed: 0,linked_asset_id,buyer_linked_ids,buyer_share_pcts
0,6.00,52,[56%]
1,14.00,2193,[52%]
2,20.00,15,[41%]
3,22.00,15,[60%]
4,24.00,15,[50%]
...,...,...,...
10167,,974,"[100%, 85%]"
10168,,980,[100%]
10169,,9898,[47%]
10170,,992,[40%]


In [563]:
#Merge actual deal pcts with investors explode to cross check data

investors_explode = pd.merge(investors_explode,deal_pcts, how = 'left', left_on = ['investor_id','asset_id'], right_on = ['buyer_linked_ids','linked_asset_id'])
investors_explode.drop(columns=['linked_asset_id','buyer_linked_ids'], inplace=True)

### Family office cleaning

These are holding companies and we don't the biggest asset of the holding to be there — just secondary assets 

- Lego Group by KIRKBI 448 
- SHV Energy by SHV 694
- Tencent with Supercell
- Heartland with BESTSELLER
- Exclude second profile for idiCo

In [564]:
investors_explode = investors_explode[investors_explode['asset_id'] != 685042]
investors_explode = investors_explode[investors_explode['asset_id'] != 4071]
investors_explode = investors_explode[investors_explode['asset_id'] != 792131]
investors_explode = investors_explode[investors_explode['asset_id'] != 1460297]
investors_explode = investors_explode[investors_explode['asset_id'] != 3180]

### Creating Regional Investor Explode

In [594]:
# Include only PE ownership assets in Europe for the ranking

investors_mm_europe = investors_explode[
    (investors_explode['ownership'].isin(['regular','minority']) ) &
    (investors_explode['region']=='Europe')]

investors_mm_europe = investors_mm_europe.copy()
investors_mm_europe['majority'] = (investors_mm_europe['owner_shares'] == 'majority').astype(int)

In [595]:
# Include only PE ownership assets in US for the ranking

investors_mm_us = investors_explode[
    (investors_explode['ownership'].isin(['regular','minority']) ) &
    (investors_explode['country_code']=='US')]


investors_mm_us = investors_mm_us.copy() 
investors_mm_us['majority'] = (investors_mm_us['owner_shares'] == 'majority').astype(int)

In [596]:
# Include only PE ownership assets in NA for the ranking

investors_mm_na = investors_explode[
    (investors_explode['ownership'].isin(['regular','minority']) ) &
    (investors_explode['region']=='North America')]

investors_mm_na = investors_mm_na.copy() 
investors_mm_na['majority'] = (investors_mm_na['owner_shares'] == 'majority').astype(int)

In [597]:
investors_vc_europe = investors_explode[
    (investors_explode['ownership'].isin(['ventureCapital'])) &
    (investors_explode['region']=='Europe')]

**Getting Median Metrics**


In [598]:
investor_dfs_explode = [investors_mm_europe, investors_mm_us, investors_mm_na]

#Q: do we need this fill na terminology — it's a bit complex...

# Get median EBTIDA at an investor level

for df in investor_dfs_explode:
    df['median_ebitda_pos'] = df.groupby('investor_id')['ebitda_eur_pos'].transform('median')
    df['ebitda_eur_pos_fill_na']=df['ebitda_eur_pos']
    df['predicted_exit_multiple_fill_na']=df['predicted_exit_multiple']
    df['revenue_eur_fill_na']=df['revenue_eur']
    

In [599]:
# Maybe we don't need it by country as US not as much data anyway so can do a bit more global here for now
# Think about EV to Sales methodology and take maybe for software assets which is an increasing 

subsector_margin = assets_EU_PE.groupby('subsector')['ebitda_pct_revenue'].median()
subsector_multiple = assets_EU_PE.groupby('subsector')['predicted_exit_multiple'].mean()

In [600]:
# Testing if we can seperately take US subsector data and tht answer seems to be no as the margins are a lot lower in the US generally particularly in Software so the results might be understated or not enough data points so fine to take European subsector average for now

# Very small assets in ths US tend to have negative margin and that is expected maybe move towards a more EV to sales approach

df_summary = (
    assets
    .groupby(['ownership', 'subsector', 'region'])
    .agg(
        median_ebitda_pct_revenue=('ebitda_pct_revenue', 'median'),
        count=('ebitda_pct_revenue', 'count')
    )
    .reset_index()
)

replicator(df_summary)


HBox(children=(Button(description='Copy', style=ButtonStyle()), Dropdown(options=('Full DataFrame/Series', 'Ex…

Unnamed: 0,ownership,subsector,region,median_ebitda_pct_revenue,count
0,bankrupt,agriculture,Europe,-0.24,4
1,bankrupt,agriculture,North America,-18.70,1
2,bankrupt,automotive,Europe,4.73,8
3,bankrupt,automotive,North America,-10682.62,1
4,bankrupt,banking,Europe,-147.73,5
...,...,...,...,...,...
817,ventureCapital,technology,LatAm,,0
818,ventureCapital,technology,North America,-170.96,6
819,ventureCapital,telecom,Asia,,0
820,ventureCapital,telecom,Europe,-46.30,9


In [601]:
# Testing data for accurarcy

replicator(assets[assets['ownership']=='listed'][['asset_id','name','ebitda_pct_revenue','region','subsector']])

HBox(children=(Button(description='Copy', style=ButtonStyle()), Dropdown(options=('Full DataFrame/Series', 'Ex…

Unnamed: 0,asset_id,name,ebitda_pct_revenue,region,subsector
1,4263690,Sheffield Resources Ltd,,Australia,energy
2,567107,KANADEN CORPORATION,4.06,Asia,construction
3,488144,KEN ENTERPRISES PRIVATE LIMITED,4.87,Asia,manufacturing
4,4275541,Berli Jucker Public Company Limited (BJC),5.73,Asia,food
5,4273880,Wan Hwa Enterprise Co. Ltd.,60.09,Asia,
...,...,...,...,...,...
94998,4307614,Canvest Environmental Protection Group Co. Ltd.,56.86,Asia,
95002,4305350,"Guizhou Colorful New Media Co., Ltd.",35.26,Asia,
95003,532790,Energi Mega Persada,55.74,Asia,energy
95004,4277238,"Ministop Co., Ltd.",0.53,Asia,


**<span style="color:blue"> TODO: NEED TO MAKE THIS CODE A BIT MORE MODULAR FOR THE REGIONS SUCH AS US RANKING**



**<span style="color:blue"> TODO: THINK ABOUT EV TO SALES FOR SOFTWARE ASSETS**

-  Think about EV to Sales methodology and take maybe for software assets which is an increasing 



**<span style="color:blue"> TODO: USE AI EBITDA ESTIMATES AHEAD OF THIS FILLING BY TERMINAL MARGIN**


### Fill EBITDA for Europe

Steps to fill EBITDA

1. Fill AI Generated EBITDA
2. Fill revenue (incl.) AI Generated (higher priority) and estimated based on Revenue/FTE * Subsector Margin
3. Fill based on investors other holdings



**Check fill rates for AI generated values in Europe and US**

In [602]:
#Checking fill rates for revenue and ebitda normal and ai generated in Europe

print(investors_mm_europe[['revenue_with_ai_generated']].notna().value_counts(normalize = True))
print("\n")
print(investors_mm_europe[['ebitda_with_ai_generated']].notna().value_counts(normalize = True))
print("\n")
print(investors_mm_europe[['revenue_with_ai_generated','ebitda_with_ai_generated']].notna().value_counts(normalize = True))


revenue_with_ai_generated
True                        0.93
False                       0.07
Name: proportion, dtype: float64


ebitda_with_ai_generated
True                       0.82
False                      0.18
Name: proportion, dtype: float64


revenue_with_ai_generated  ebitda_with_ai_generated
True                       True                       0.79
                           False                      0.14
False                      False                      0.04
                           True                       0.03
Name: proportion, dtype: float64


In [603]:
#Checking fill rates for revenue and ebitda normal and ai generated in US

print(investors_mm_us[['revenue_with_ai_generated']].notna().value_counts(normalize = True))
print("\n")
print(investors_mm_us[['ebitda_with_ai_generated']].notna().value_counts(normalize = True))
print("\n")
print(investors_mm_us[['revenue_with_ai_generated','ebitda_with_ai_generated']].notna().value_counts(normalize = True))


revenue_with_ai_generated
True                        0.96
False                       0.04
Name: proportion, dtype: float64


ebitda_with_ai_generated
True                       0.79
False                      0.21
Name: proportion, dtype: float64


revenue_with_ai_generated  ebitda_with_ai_generated
True                       True                       0.77
                           False                      0.19
False                      False                      0.02
                           True                       0.02
Name: proportion, dtype: float64


**Fill NA values based on AI EBITDAs and Revenue values**

In [604]:
# STEPS TO FILL EBITDAs where value is not available

# 1st — Fill using AI EBITDA Estimates

# 2nd — Fill using AI Revenue * Subsector Margin

# 3rd — Fill using Est. Revenue (Revenue/FTE approach) * Subsector Margin


mask = (
    (investors_mm_europe['ebitda_eur'].isna()) &
    (investors_mm_europe['ebitda_with_ai_generated_eur'].notna()) & (investors_mm_europe['ebitda_with_ai_generated_eur']>0)
)

investors_mm_europe.loc[mask,'ebitda_eur_pos_fill_na'] = investors_mm_europe['ebitda_with_ai_generated_eur']


# First fill by AI revenue where the data is available, if not then use revenue to FTE approach

# Fill revenue eur based on AI estimates

mask = (
    (investors_mm_europe['revenue_eur'].isna()) &
    (investors_mm_europe['revenue_with_ai_generated_eur'].notna())
)

investors_mm_europe.loc[mask,'revenue_eur_fill_na'] = investors_mm_europe['revenue_with_ai_generated_eur']

# Fill revenue eur based on calculated FTE estimates based on FTE Range

mask = (
    (investors_mm_europe['revenue_eur_fill_na'].isna()) &
    (investors_mm_europe['estimated_revenues_calc_eur'].notna())
)

investors_mm_europe.loc[mask,'revenue_eur_fill_na'] = investors_mm_europe['estimated_revenues_calc_eur']

# Estimate EBITDA based on terminal margin where revenue is present

investors_mm_europe['subsector_margin'] = investors_mm_europe['subsector'].map(subsector_margin)/100

mask = (
    (investors_mm_europe['ebitda_eur_pos_fill_na'].isna()) &
    (investors_mm_europe['revenue_eur_fill_na'].notna())
)

investors_mm_europe.loc[mask,'ebitda_eur_pos_fill_na'] = investors_mm_europe['revenue_eur_fill_na'] * investors_mm_europe['subsector_margin']



In [605]:
# Fill Multiple where it's missing

investors_mm_europe['subsector_multiple'] = investors_mm_europe['subsector'].map(subsector_multiple)

investors_mm_europe.loc[investors_mm_europe['predicted_exit_multiple'].isna(),'predicted_exit_multiple_fill_na'] = investors_mm_europe['subsector_multiple']


In [606]:
# Now where we don't have both revenue and EBITDA fill based on the 10th percentile of assets other holding (these assets tend to be small anyway because they don't have any reporting)

def fill_missing_ebitda(df):
    percentile_10th_ebitda = df.groupby(['investor_id'])['ebitda_eur_pos_fill_na'].transform(lambda x: x.quantile(0.1))
    df['ebitda_eur_pos_fill_na'] = df['ebitda_eur_pos_fill_na'].fillna(percentile_10th_ebitda)
    return df

investors_mm_europe = fill_missing_ebitda(investors_mm_europe)


In [607]:

# Exit Multiple Missing Fill
def fill_missing_multiple(df):

    mean_multiple = df.groupby(['investor_id'])['predicted_exit_multiple_fill_na'].transform('mean')
    df['predicted_exit_multiple_fill_na'] = df['predicted_exit_multiple_fill_na'].fillna(mean_multiple)
    return df

investors_mm_europe = fill_missing_multiple(investors_mm_europe)

### Last Reported Deal EV

In [608]:
#Estimating EV based on last reported deal value if it was reported in 2023 or 2024 (i.e. recently)
#merge last deal data


# Search for last reported deal values
deals_ev = deals.sort_values(by='announcement_date',ascending = False).drop_duplicates(subset='linked_asset_id', keep='first')

deals_ev = deals_ev.rename(columns={
    'ev_eur': 'ev_eur_last_deal',
    'ev_year': 'ev_year_last_deal',
    'equity_eur': 'equity_eur_last_deal',
    'equity_year': 'equity_year_last_deal',
    'ebitda_eur': 'ebitda_eur_last_deal',
    'ebitda_year': 'ebitda_year_last_deal',
    'revenue_eur': 'revenue_eur_last_deal',
    'revenue_year': 'revenue_year_last_deal',
    'ev_ebitda_multiple':'ev_ebitda_multiple_last_deal' })


# Add extra historical deal columns before the last reported date


deals_ev_array = (
    deals.sort_values(by="announcement_date", ascending = False)
    .groupby("linked_asset_id")
    .agg(
        deals_ev_eur_array=("ev_eur", lambda x: [round(v, 1) for v in x]),
        deals_ev_eur_max=("ev_eur", lambda x: round(x.max(), 1)),
        deals_ev_ebitda_max=("ev_ebitda_multiple", lambda x: round(x.max(), 1)),
        deals_ev_years_array=("ev_year", lambda x: [round(v, 0) for v in x]),
        deals_equity_eur_array=("equity_eur", lambda x: [round(v, 1) for v in x]),
        deals_equity_eur_max=("equity_eur", lambda x: round(x.max(), 1)))
    .reset_index()
)


# Add the max date separately

# For EV
ev_max_idx = (
    deals.dropna(subset=['ev_eur'])
         .groupby('linked_asset_id')['ev_eur']
         .idxmax()
)

ev_max_dates = deals.loc[ev_max_idx, ['linked_asset_id', 'announcement_date']]
ev_max_dates = ev_max_dates.rename(columns={'announcement_date': 'deals_ev_eur_max_date'})

# For Equity
equity_max_idx = (
    deals.dropna(subset=['equity_eur'])
         .groupby('linked_asset_id')['equity_eur']
         .idxmax()
)

equity_max_dates = deals.loc[equity_max_idx, ['linked_asset_id', 'announcement_date']]
equity_max_dates = equity_max_dates.rename(columns={'announcement_date': 'deals_equity_eur_max_date'})

# Merge back into deals_ev_array
deals_ev_array = (
    deals_ev_array
    .merge(ev_max_dates, on='linked_asset_id', how='left')
    .merge(equity_max_dates, on='linked_asset_id', how='left')
)



deals_ev = pd.merge(deals_ev,deals_ev_array, how = 'left', on ='linked_asset_id')

deals_ev = deals_ev[['linked_asset_id','ev_eur_last_deal','ev_year_last_deal','ev_ebitda_multiple_last_deal','deals_ev_ebitda_max','equity_eur_last_deal','equity_year_last_deal','deals_ev_eur_array','deals_ev_eur_max','deals_ev_years_array','deals_equity_eur_array','deals_equity_eur_max','ebitda_eur_last_deal','ebitda_year_last_deal','revenue_eur_last_deal','revenue_year_last_deal','deals_ev_eur_max_date','deals_equity_eur_max_date']]


#Where EV not available use equity value
deals_ev['ev_eur_last_deal'] = deals_ev['ev_eur_last_deal'].fillna(deals_ev['equity_eur_last_deal'])
deals_ev['ev_year_last_deal'] = deals_ev['ev_year_last_deal'].fillna(deals_ev['equity_year_last_deal'])

deals_ev['deals_ev_eur_max'] = deals_ev['deals_ev_eur_max'].fillna(deals_ev['deals_equity_eur_max'])
deals_ev['deals_ev_eur_max_date'] = deals_ev['deals_ev_eur_max_date'].fillna(deals_ev['deals_equity_eur_max_date'])
deals_ev['deals_ev_eur_max_date'] = (deals_ev['deals_ev_eur_max_date'].astype(str).str[:4].replace('NaT', '').replace('nan', ''))



investors_mm_europe = pd.merge(investors_mm_europe,deals_ev, how = 'left' ,  left_on='asset_id', right_on='linked_asset_id')




### Estimate EV

In [610]:
investors_mm_europe['years_since_last_deal'] = 2024-investors_mm_europe['ev_year_last_deal']

# FTE Multiplier
investors_mm_europe.loc[investors_mm_europe['years_since_last_deal'] < 2, 'fte_multiplier'] = 1
investors_mm_europe.loc[investors_mm_europe['years_since_last_deal'] == 2, 'fte_multiplier'] = (1+ investors_mm_europe['fte_growth_twoyears']/100) ** investors_mm_europe['years_since_last_deal']
investors_mm_europe.loc[(investors_mm_europe['years_since_last_deal'] >= 3) & (investors_mm_europe['years_since_last_deal'] <= 6), 'fte_multiplier'] = (1+ investors_mm_europe['fte_growth_threeyears']/100) ** investors_mm_europe['years_since_last_deal']
investors_mm_europe['fte_multiplier'] = investors_mm_europe['fte_multiplier'].clip(upper=2)


# Revenue Growth Multiplier
investors_mm_europe.loc[investors_mm_europe['years_since_last_deal'] < 2, 'revenue_multiplier'] = 1
investors_mm_europe.loc[investors_mm_europe['years_since_last_deal'] == 2, 'revenue_multiplier'] = (1+ investors_mm_europe['revenue_growth_twoyears']/100) ** investors_mm_europe['years_since_last_deal']
investors_mm_europe.loc[(investors_mm_europe['years_since_last_deal'] >= 3) & (investors_mm_europe['years_since_last_deal'] <= 6), 'revenue_multiplier'] = (1+ investors_mm_europe['revenue_growth_threeyears']/100) ** investors_mm_europe['years_since_last_deal']
investors_mm_europe['revenue_multiplier'] = investors_mm_europe['revenue_multiplier'].clip(upper=2)

# Use revenue multiplier first if not there use fte multiplier
investors_mm_europe['multiplier'] = investors_mm_europe['revenue_multiplier']
investors_mm_europe['multiplier'] = investors_mm_europe['multiplier'].fillna(investors_mm_europe['fte_multiplier'])


In [611]:
# Estimate EV managed post 2023 based on deal value
investors_mm_europe.loc[investors_mm_europe['ev_year_last_deal'] >= 2023, 'estd_ev_managed'] = investors_mm_europe['ownership_pct'] * investors_mm_europe['ev_eur_last_deal'] * investors_mm_europe['multiplier']

# Compare to deal value reported whenever 
#investors_mm_europe['verify_historical_estd_ev_managed'] = (investors_mm_europe['ownership_pct'] * investors_mm_europe['ev_eur_last_deal'])
investors_mm_europe['verify_historical_estd_ev_managed_based_on_max_deal_ev'] = (investors_mm_europe['ownership_pct'] * investors_mm_europe['deals_ev_eur_max'])


In [612]:
#Estimating rest of EV based on formula (where we don't fill based on last reported EVs)


investors_mm_europe['estd_ev_managed'] = investors_mm_europe['estd_ev_managed'].fillna(
    investors_mm_europe['ebitda_eur_pos_fill_na'] * 
    investors_mm_europe['predicted_exit_multiple_fill_na'] * 
    investors_mm_europe['ownership_pct']
)

# Delta with last reported historical values

investors_mm_europe['ratio_to_deal_max'] = (investors_mm_europe['estd_ev_managed']/investors_mm_europe['verify_historical_estd_ev_managed_based_on_max_deal_ev']) 

### Check for overrides

In [616]:
columns_filter = [
'ratio_to_median_ev',
'percentage_of_ev',
'count',
'asset_id',
'asset_name',
'investor_id',
'investor_name',
'estd_ev_managed',
'verify_historical_estd_ev_managed_based_on_max_deal_ev',
'ratio_to_deal_max',
'ownership_pct',
'buyer_share_pcts',
'owner_shares',
'deals_ev_eur_max',
'ev_eur_last_deal',
'deals_ev_eur_max_date',
'multiplier',
'ev_year_last_deal',
'last_deal_year',
'predicted_exit_multiple_fill_na',
'ev_ebitda_multiple_last_deal',
'deals_ev_ebitda_max',
'equity_eur_last_deal',
'equity_year_last_deal',
'ebitda_eur_last_deal',
'ebitda_year_last_deal',
'revenue_eur_last_deal',
'revenue_year_last_deal',
'sector',
'subsector',
'ownership',
'sub_region',
'predicted_exit_multiple',
'revenue_eur',
'ebitda_eur',
'net_debt_eur',
'total_ownership_pct',
'median_ebitda_pos',
'ebitda_eur_pos_fill_na',
'revenue_eur_fill_na',
'fte_multiplier',
'revenue_multiplier',
'investor_country_name',
'investor_sub_region',
'above_10_pct_flag',
'above_25_pct_flag',
'above_50_pct_flag',
'total_estd_ev_managed',
'median_ev'
]

#### Ratio to Median EV

In [617]:
#Check for OUTLIERS so assets where the investment EV is 10x the median (only check manually check ownership shares over 1bn EV)

investor_medians = (
    investors_mm_europe.groupby(['investor_id'])['estd_ev_managed']
    .agg(median_ev='median', count='count')
    .reset_index()
)


df_summary = pd.merge(investors_mm_europe, investor_medians, left_on = 'investor_id', right_on = 'investor_id', how='left')
df_summary['ratio_to_median_ev'] = df_summary['estd_ev_managed']/df_summary['median_ev'] 
df_summary['ev_eur_last_deal_share'] = df_summary['ev_eur_last_deal'] * df_summary['median_ev'] 




#### Percentage of EV

In [618]:
# Get percentage of EV each asset represents

df_summary['total_estd_ev_managed'] = df_summary.groupby('investor_id')['estd_ev_managed'].transform('sum')
#df_summary['rank'] = df_summary.groupby('investor_id')['estd_ev_managed'].rank(method='first', ascending=False)
df_summary['percentage_of_ev'] = (df_summary['estd_ev_managed'] / df_summary['total_estd_ev_managed']) * 100
df_summary['owner_shares'] = df_summary['owner_shares'].apply(lambda x: 'MINORITY' if x == 'minority' else x)

df_summary['above_10_pct_flag'] = df_summary['percentage_of_ev'] > 10
df_summary['above_25_pct_flag'] = df_summary['percentage_of_ev'] > 25
df_summary['above_50_pct_flag'] = df_summary['percentage_of_ev'] > 50



df_summary['above_10_pct_flag'] = df_summary['above_10_pct_flag'].astype(int)
df_summary['above_25_pct_flag'] = df_summary['above_25_pct_flag'].astype(int)
df_summary['above_50_pct_flag'] = df_summary['above_50_pct_flag'].astype(int)

df_summary = df_summary[columns_filter]

df_summary


Unnamed: 0,ratio_to_median_ev,percentage_of_ev,count,asset_id,asset_name,investor_id,investor_name,estd_ev_managed,verify_historical_estd_ev_managed_based_on_max_deal_ev,ratio_to_deal_max,ownership_pct,buyer_share_pcts,owner_shares,deals_ev_eur_max,ev_eur_last_deal,deals_ev_eur_max_date,multiplier,ev_year_last_deal,last_deal_year,predicted_exit_multiple_fill_na,ev_ebitda_multiple_last_deal,deals_ev_ebitda_max,equity_eur_last_deal,equity_year_last_deal,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,sector,subsector,ownership,sub_region,predicted_exit_multiple,revenue_eur,ebitda_eur,net_debt_eur,total_ownership_pct,median_ebitda_pos,ebitda_eur_pos_fill_na,revenue_eur_fill_na,fte_multiplier,revenue_multiplier,investor_country_name,investor_sub_region,above_10_pct_flag,above_25_pct_flag,above_50_pct_flag,total_estd_ev_managed,median_ev
0,0.31,0.17,32,13325,Konges Sløjd,1,3i,56.44,,,0.60,,majority,,,,,,2022.00,9.00,,,,,10.78,2021.00,49.84,2022.00,consumer,consumerGoods,regular,Nordics,9.00,49.84,10.45,-6.41,0.90,37.01,10.45,49.84,,,United Kingdom,UK&I,0,0,0,33020.85,184.60
1,0.06,0.04,32,13318,Digital Barriers,1,3i,11.65,,,0.60,,majority,,,,,,2022.00,13.79,,,,,2.50,,30.67,,tmt,software,regular,UK&I,,9.04,-9.06,54.38,0.90,37.01,1.41,9.04,,,United Kingdom,UK&I,0,0,0,33020.85,184.60
2,1.04,0.58,32,10595,Mepal,1,3i,192.46,,,0.90,,majority,,,,,,2021.00,15.00,,,,,5.89,2021.00,65.00,,consumer,consumerGoods,regular,Benelux,15.00,83.59,14.26,-11.04,0.90,37.01,14.26,83.59,,,United Kingdom,UK&I,0,0,0,33020.85,184.60
3,118.96,66.50,32,200,Action,1,3i,21960.00,6150.00,3.57,0.60,[80%],majority,10250.00,10250.00,2019,2.00,2019.00,2019.00,15.00,18.95,18.90,,,541.00,,5114.00,,consumer,retail,regular,Benelux,15.00,13781.00,2440.00,1641.00,0.90,37.01,2440.00,13781.00,2.00,2.00,United Kingdom,UK&I,1,1,1,33020.85,184.60
4,8.76,4.90,32,198,Royal Sanders,1,3i,1616.72,,,0.90,,majority,,,,,,2018.00,15.00,,,,,25.21,2017.00,127.00,,consumer,consumerGoods,regular,Benelux,15.00,517.07,119.76,370.45,0.90,37.01,119.76,517.07,,,United Kingdom,UK&I,0,0,0,33020.85,184.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15858,1.00,100.00,1,2855581,Dourogás GNV,20997,Mello RDC,9.00,9.00,1.00,0.30,,MINORITY,30.00,30.00,2025,1.00,2025.00,2025.00,11.00,10.30,10.30,,,2.90,,37.10,,materialsEnergy,energy,regular,Iberia,11.00,37.13,2.95,,0.90,2.95,2.95,37.13,1.00,1.00,Portugal,Iberia,1,1,1,9.00,9.00
15859,1.00,100.00,1,1371317,FeelEverywhere,21063,Menlo Capital,6.64,,,0.90,[60%],majority,,,,,,2025.00,5.00,,,,,1.48,,5.56,,consumer,leisure,regular,Iberia,5.00,5.56,1.48,3.46,0.90,1.48,1.48,5.56,,,Portugal,Iberia,1,1,1,6.64,6.64
15860,1.00,100.00,1,30773,Questback,21085,Veld Capital,26.41,,,0.90,[75%],majority,,,,,,2023.00,12.00,,,,,2.33,2023.00,11.04,2023.00,tmt,software,regular,Nordics,12.00,10.99,2.44,-2.56,0.90,2.44,2.44,10.99,,,United Kingdom,UK&I,1,1,1,26.41,26.41
15861,1.00,100.00,1,653685,Fabergé,21283,SMG Capital,39.41,111.96,0.35,0.90,,majority,124.40,43.79,2012,1.00,2025.00,2025.00,10.15,,,,,,,,,consumer,consumerGoods,regular,UK&I,,18.43,-2.79,-1.50,0.90,,1.80,18.43,1.00,1.00,United States of America,US,1,1,1,39.41,39.41


#### Largest 250 Holdings Check

In [619]:

df_summary = df_summary.sort_values(by='estd_ev_managed', ascending = False).reset_index(drop = True)

replicator(df_summary)


HBox(children=(Button(description='Copy', style=ButtonStyle()), Dropdown(options=('Full DataFrame/Series', 'Ex…

Unnamed: 0,ratio_to_median_ev,percentage_of_ev,count,asset_id,asset_name,investor_id,investor_name,estd_ev_managed,verify_historical_estd_ev_managed_based_on_max_deal_ev,ratio_to_deal_max,ownership_pct,buyer_share_pcts,owner_shares,deals_ev_eur_max,ev_eur_last_deal,deals_ev_eur_max_date,multiplier,ev_year_last_deal,last_deal_year,predicted_exit_multiple_fill_na,ev_ebitda_multiple_last_deal,deals_ev_ebitda_max,equity_eur_last_deal,equity_year_last_deal,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,sector,subsector,ownership,sub_region,predicted_exit_multiple,revenue_eur,ebitda_eur,net_debt_eur,total_ownership_pct,median_ebitda_pos,ebitda_eur_pos_fill_na,revenue_eur_fill_na,fte_multiplier,revenue_multiplier,investor_country_name,investor_sub_region,above_10_pct_flag,above_25_pct_flag,above_50_pct_flag,total_estd_ev_managed,median_ev
0,3599.56,66.54,337,1081931,CMA CGM,659,Bpifrance,39867.88,656.91,60.69,0.30,[6%],MINORITY,2189.70,,2010,,,2021.00,9.88,,,,,5429.33,,27542.20,,services,logistics,minority,France,,55475.80,13447.50,15907.70,0.30,10.00,13447.50,55475.80,,,France,France,1,1,1,59918.36,11.08
1,47.44,62.63,23,4638501,The Citco Group,135,GIC,38670.04,508.02,76.12,0.30,,MINORITY,1693.40,,2011,,,2025.00,13.22,,,,,,,,,financial,assetManagement,minority,UK&I,,,,,0.30,357.81,9752.85,17110.26,,,Singapore,Asia,1,1,1,61740.12,815.10
2,36.11,79.89,11,2287522,Aleatica Group,647,IFM Investors,32495.85,1942.20,16.73,0.90,[100%],majority,2158.00,2158.00,2018,2.00,2018.00,2018.00,15.00,,,2158.00,2018.00,916.60,2018.00,1223.93,2018.00,other,infrastructure,regular,Iberia,15.00,3220.19,2407.10,8402.63,0.90,379.32,2407.10,3220.19,1.47,2.00,Australia,Australia,1,1,1,40677.86,900.00
3,35.41,97.23,3,882553,NEO Energy,2639,HitecVision,26966.75,,,0.90,,majority,,,,,,2019.00,15.00,,,,,139.66,2019.00,199.78,2019.00,materialsEnergy,energy,regular,UK&I,15.00,2579.37,1997.54,556.17,0.90,999.78,1997.54,2579.37,,,Norway,Nordics,1,1,1,27733.85,761.65
4,118.96,66.50,32,200,Action,1,3i,21960.00,6150.00,3.57,0.60,[80%],majority,10250.00,10250.00,2019,2.00,2019.00,2019.00,15.00,18.95,18.90,,,541.00,,5114.00,,consumer,retail,regular,Benelux,15.00,13781.00,2440.00,1641.00,0.90,37.01,2440.00,13781.00,2.00,2.00,United Kingdom,UK&I,1,1,1,33020.85,184.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15858,,,1,12242,Imagination Technologies,2721,Canyon Bridge,0.00,572.49,0.00,0.90,[100%],majority,636.10,636.06,2017,,2017.00,2017.00,12.59,,,636.06,2017.00,,,138.78,,tmt,technology,regular,UK&I,,,0.00,-0.00,0.90,,0.00,0.00,,,United States of America,US,0,0,0,0.00,0.00
15859,0.00,0.00,23,4308158,CareHub,1768,Quadrivio Group,0.00,,,0.90,[95%],majority,,,,,,2024.00,10.97,,,,,,,,,scienceHealth,healthcareServices,regular,Italy,,,,,0.90,4.48,0.00,0.00,,,Italy,Italy,0,0,0,1047.94,28.07
15860,,,0,922935,Magency,3598,500 Global,,,,0.23,,MINORITY,,,,,,,12.38,,,,,,,,,services,education,minority,France,,,,,0.45,,,,,,United States of America,US,0,0,0,0.00,
15861,,,0,1461625,Bio Vitos,7734,Craven House Capital,,,,0.30,[30%],MINORITY,,,,,,2020.00,11.19,,,,,,,,,scienceHealth,pharmaceuticals,minority,UK&I,,,,,0.30,,,,,,United Kingdom,UK&I,0,0,0,0.00,


#### Case Studies

In [273]:
investors_mm_europe[investors_mm_europe['asset_id']==985249]

Unnamed: 0,investor_id,investor_name,investor_hq_city,investor_country_name,investor_region,investor_sub_region,funds_raised_last_five_years_eur,asset_id,asset_name,sector,subsector,ownership,add_on_deal_count_l3y,add_on_deal_count_l5y,business_activity,capex,capex_eur,capex_year,capital,capital_eur,capital_year,cash,cash_conversion_cycle,cash_conversion_cycle_year,cash_eur,cash_year,ceo_age,ceo_tenure,competitor_asset_ids,consolidated_net_income,consolidated_net_income_eur,consolidated_net_income_year,currency,currency_to_eur,customer_base,debt,debt_eur,debt_year,earnings_per_share,earnings_per_share_eur,earnings_per_share_year,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_is_ai_generated,ebitda_minus_capex,ebitda_minus_capex_eur,ebitda_minus_capex_year,ebitda_pct_revenue,ebitda_with_ai_generated,ebitda_with_ai_generated_eur,ebitda_year,enterprise_value,enterprise_value_eur,esg,esg_outperformer,financials_at,free_cash_flow,free_cash_flow_eur,free_cash_flow_year,fte,fte_range,fte_year,gain_pro_url,gross_margin,gross_margin_eur,gross_margin_year,headquarters_city,headquarters_country_code,headquarters_region,inventory,inventory_eur,inventory_year,last_deal_month,last_deal_year,latest_deal_post_money_valuation_eur,latest_deal_post_money_valuation_year,latest_deal_pre_money_valuation_eur,latest_deal_pre_money_valuation_year,latest_deal_round_size_eur,latest_deal_round_type,latest_deal_round_year,latest_industry_rating_environmental,latest_industry_rating_overall,latest_industry_rating_social,latest_share_price,latest_share_price_date,latest_share_price_eur,linkedin_external_id,managers_linked_in_urls,market_capitalization,market_capitalization_eur,net_debt,net_debt_ebitda_ratio,net_debt_ebitda_ratio_year,net_debt_eur,net_debt_year,next_year_predicted_ev,...,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,revenue_range,ebitda_range,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,majority_owner_count,minority_owner_count,total_ownership_pct,majority_pct,minority_pct,ownership_pct,buyer_share_pcts,majority,median_ebitda_pos,ebitda_eur_pos_fill_na,predicted_exit_multiple_fill_na,revenue_eur_fill_na,subsector_margin,subsector_multiple,linked_asset_id,ev_eur_last_deal,ev_year_last_deal,ev_ebitda_multiple,equity_eur_last_deal,equity_year_last_deal,deals_ev_eur_array,deals_ev_eur_max,deals_ev_years_array,deals_equity_eur_array,deals_equity_eur_max,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,estd_ev_managed,verify_historical_estd_ev_managed,delta_with_verify,years_since_last_deal,fte_multiplier,revenue_multiplier
1682,71,Triton,London,United Kingdom,Europe,UK&I,3445.00,985249,Fraikin Group,industrials,automotive,minority,0.0,1.0,"[\n ""services""\n]",579.86,579.86,2023.0,-103.46,-103.46,2023.0,101.32,,,101.32,2023.0,56.0,3.0,"[\n 1149704,\n 10745,\n 1224236,\n 1172820...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",244.14,244.14,2023.0,,,,63.6,63.6,2023.0,496.04,496.04,False,-83.83,-83.83,2023.0,49.79,496.04,496.04,2023.0,,,Fraikin claims to be classified among the top ...,False,2025-08-29 00:15:15.080,,,,3200.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/985249/fraikin-group,639.66,639.66,2023.0,Colombes,FR,Île-de-France,7.72,7.72,2023.0,11.0,2017.0,,,,,,,,4.0,3.0,2.0,,,,25065075,"[\n ""https://www.linkedin.com/in/yvespetin"",\...",,,142.82,0.29,2023.0,142.82,2023.0,5470.0,...,0.02,,,,,0.14,,0.17,,0.02,,0.04,0.02,0.02,-0.03,0.03,,,,,,,,0.42,0.38,0.4,0.5,,,3200.0,996.19,996.19,496.04,3.2,1.58,7.76,,,27.68,16.99,13.83,,,64.15,101.31,38.41,,,6.99,1.44,0.12,1.23,1.81,1.71,1.56,6.19,,,3_large_250_1000m_eur,4_mega_large_gt_200m_eur,"(0, 2]",1.0,5.0,81.0,496.04,0,5,0.45,0.0,0.09,0.09,,0,68.44,496.04,8.18,996.19,0.09,8.18,985249.0,,,,,,"[nan, 1350.0, nan, nan, nan]",1350.0,"[nan, 2006.0, nan, nan, nan]","[nan, nan, nan, 550.0, nan]",550.0,,,683.0,,365.02,,,,,
9595,1301,Värde Partners,Minneapolis,United States of America,North America,US,-,985249,Fraikin Group,industrials,automotive,minority,0.0,1.0,"[\n ""services""\n]",579.86,579.86,2023.0,-103.46,-103.46,2023.0,101.32,,,101.32,2023.0,56.0,3.0,"[\n 1149704,\n 10745,\n 1224236,\n 1172820...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",244.14,244.14,2023.0,,,,63.6,63.6,2023.0,496.04,496.04,False,-83.83,-83.83,2023.0,49.79,496.04,496.04,2023.0,,,Fraikin claims to be classified among the top ...,False,2025-08-29 00:15:15.080,,,,3200.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/985249/fraikin-group,639.66,639.66,2023.0,Colombes,FR,Île-de-France,7.72,7.72,2023.0,11.0,2017.0,,,,,,,,4.0,3.0,2.0,,,,25065075,"[\n ""https://www.linkedin.com/in/yvespetin"",\...",,,142.82,0.29,2023.0,142.82,2023.0,5470.0,...,0.02,,,,,0.14,,0.17,,0.02,,0.04,0.02,0.02,-0.03,0.03,,,,,,,,0.42,0.38,0.4,0.5,,,3200.0,996.19,996.19,496.04,3.2,1.58,7.76,,,27.68,16.99,13.83,,,64.15,101.31,38.41,,,6.99,1.44,0.12,1.23,1.81,1.71,1.56,6.19,,,3_large_250_1000m_eur,4_mega_large_gt_200m_eur,"(0, 2]",1.0,5.0,81.0,496.04,0,5,0.45,0.0,0.09,0.09,,0,302.41,496.04,8.18,996.19,0.09,8.18,985249.0,,,,,,"[nan, 1350.0, nan, nan, nan]",1350.0,"[nan, 2006.0, nan, nan, nan]","[nan, nan, nan, 550.0, nan]",550.0,,,683.0,,365.02,,,,,
10211,1574,Barings,Charlotte,United States of America,North America,US,744.50,985249,Fraikin Group,industrials,automotive,minority,0.0,1.0,"[\n ""services""\n]",579.86,579.86,2023.0,-103.46,-103.46,2023.0,101.32,,,101.32,2023.0,56.0,3.0,"[\n 1149704,\n 10745,\n 1224236,\n 1172820...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",244.14,244.14,2023.0,,,,63.6,63.6,2023.0,496.04,496.04,False,-83.83,-83.83,2023.0,49.79,496.04,496.04,2023.0,,,Fraikin claims to be classified among the top ...,False,2025-08-29 00:15:15.080,,,,3200.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/985249/fraikin-group,639.66,639.66,2023.0,Colombes,FR,Île-de-France,7.72,7.72,2023.0,11.0,2017.0,,,,,,,,4.0,3.0,2.0,,,,25065075,"[\n ""https://www.linkedin.com/in/yvespetin"",\...",,,142.82,0.29,2023.0,142.82,2023.0,5470.0,...,0.02,,,,,0.14,,0.17,,0.02,,0.04,0.02,0.02,-0.03,0.03,,,,,,,,0.42,0.38,0.4,0.5,,,3200.0,996.19,996.19,496.04,3.2,1.58,7.76,,,27.68,16.99,13.83,,,64.15,101.31,38.41,,,6.99,1.44,0.12,1.23,1.81,1.71,1.56,6.19,,,3_large_250_1000m_eur,4_mega_large_gt_200m_eur,"(0, 2]",1.0,5.0,81.0,496.04,0,5,0.45,0.0,0.09,0.09,,0,53.65,496.04,8.18,996.19,0.09,8.18,985249.0,,,,,,"[nan, 1350.0, nan, nan, nan]",1350.0,"[nan, 2006.0, nan, nan, nan]","[nan, nan, nan, 550.0, nan]",550.0,,,683.0,,365.02,,,,,
14268,5447,Franklin Templeton,San Mateo,United States of America,North America,US,2900.00,985249,Fraikin Group,industrials,automotive,minority,0.0,1.0,"[\n ""services""\n]",579.86,579.86,2023.0,-103.46,-103.46,2023.0,101.32,,,101.32,2023.0,56.0,3.0,"[\n 1149704,\n 10745,\n 1224236,\n 1172820...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",244.14,244.14,2023.0,,,,63.6,63.6,2023.0,496.04,496.04,False,-83.83,-83.83,2023.0,49.79,496.04,496.04,2023.0,,,Fraikin claims to be classified among the top ...,False,2025-08-29 00:15:15.080,,,,3200.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/985249/fraikin-group,639.66,639.66,2023.0,Colombes,FR,Île-de-France,7.72,7.72,2023.0,11.0,2017.0,,,,,,,,4.0,3.0,2.0,,,,25065075,"[\n ""https://www.linkedin.com/in/yvespetin"",\...",,,142.82,0.29,2023.0,142.82,2023.0,5470.0,...,0.02,,,,,0.14,,0.17,,0.02,,0.04,0.02,0.02,-0.03,0.03,,,,,,,,0.42,0.38,0.4,0.5,,,3200.0,996.19,996.19,496.04,3.2,1.58,7.76,,,27.68,16.99,13.83,,,64.15,101.31,38.41,,,6.99,1.44,0.12,1.23,1.81,1.71,1.56,6.19,,,3_large_250_1000m_eur,4_mega_large_gt_200m_eur,"(0, 2]",1.0,5.0,81.0,496.04,0,5,0.45,0.0,0.09,0.09,,0,23.23,496.04,8.18,996.19,0.09,8.18,985249.0,,,,,,"[nan, 1350.0, nan, nan, nan]",1350.0,"[nan, 2006.0, nan, nan, nan]","[nan, nan, nan, 550.0, nan]",550.0,,,683.0,,365.02,,,,,
14547,6488,Canyon Partners,Dallas,United States of America,North America,US,-,985249,Fraikin Group,industrials,automotive,minority,0.0,1.0,"[\n ""services""\n]",579.86,579.86,2023.0,-103.46,-103.46,2023.0,101.32,,,101.32,2023.0,56.0,3.0,"[\n 1149704,\n 10745,\n 1224236,\n 1172820...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",244.14,244.14,2023.0,,,,63.6,63.6,2023.0,496.04,496.04,False,-83.83,-83.83,2023.0,49.79,496.04,496.04,2023.0,,,Fraikin claims to be classified among the top ...,False,2025-08-29 00:15:15.080,,,,3200.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/985249/fraikin-group,639.66,639.66,2023.0,Colombes,FR,Île-de-France,7.72,7.72,2023.0,11.0,2017.0,,,,,,,,4.0,3.0,2.0,,,,25065075,"[\n ""https://www.linkedin.com/in/yvespetin"",\...",,,142.82,0.29,2023.0,142.82,2023.0,5470.0,...,0.02,,,,,0.14,,0.17,,0.02,,0.04,0.02,0.02,-0.03,0.03,,,,,,,,0.42,0.38,0.4,0.5,,,3200.0,996.19,996.19,496.04,3.2,1.58,7.76,,,27.68,16.99,13.83,,,64.15,101.31,38.41,,,6.99,1.44,0.12,1.23,1.81,1.71,1.56,6.19,,,3_large_250_1000m_eur,4_mega_large_gt_200m_eur,"(0, 2]",1.0,5.0,81.0,496.04,0,5,0.45,0.0,0.09,0.09,,0,500.44,496.04,8.18,996.19,0.09,8.18,985249.0,,,,,,"[nan, 1350.0, nan, nan, nan]",1350.0,"[nan, 2006.0, nan, nan, nan]","[nan, nan, nan, 550.0, nan]",550.0,,,683.0,,365.02,,,,,


In [274]:
investors_mm_europe[investors_mm_europe['asset_id']==5532]

Unnamed: 0,investor_id,investor_name,investor_hq_city,investor_country_name,investor_region,investor_sub_region,funds_raised_last_five_years_eur,asset_id,asset_name,sector,subsector,ownership,add_on_deal_count_l3y,add_on_deal_count_l5y,business_activity,capex,capex_eur,capex_year,capital,capital_eur,capital_year,cash,cash_conversion_cycle,cash_conversion_cycle_year,cash_eur,cash_year,ceo_age,ceo_tenure,competitor_asset_ids,consolidated_net_income,consolidated_net_income_eur,consolidated_net_income_year,currency,currency_to_eur,customer_base,debt,debt_eur,debt_year,earnings_per_share,earnings_per_share_eur,earnings_per_share_year,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_is_ai_generated,ebitda_minus_capex,ebitda_minus_capex_eur,ebitda_minus_capex_year,ebitda_pct_revenue,ebitda_with_ai_generated,ebitda_with_ai_generated_eur,ebitda_year,enterprise_value,enterprise_value_eur,esg,esg_outperformer,financials_at,free_cash_flow,free_cash_flow_eur,free_cash_flow_year,fte,fte_range,fte_year,gain_pro_url,gross_margin,gross_margin_eur,gross_margin_year,headquarters_city,headquarters_country_code,headquarters_region,inventory,inventory_eur,inventory_year,last_deal_month,last_deal_year,latest_deal_post_money_valuation_eur,latest_deal_post_money_valuation_year,latest_deal_pre_money_valuation_eur,latest_deal_pre_money_valuation_year,latest_deal_round_size_eur,latest_deal_round_type,latest_deal_round_year,latest_industry_rating_environmental,latest_industry_rating_overall,latest_industry_rating_social,latest_share_price,latest_share_price_date,latest_share_price_eur,linkedin_external_id,managers_linked_in_urls,market_capitalization,market_capitalization_eur,net_debt,net_debt_ebitda_ratio,net_debt_ebitda_ratio_year,net_debt_eur,net_debt_year,next_year_predicted_ev,...,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,revenue_range,ebitda_range,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,majority_owner_count,minority_owner_count,total_ownership_pct,majority_pct,minority_pct,ownership_pct,buyer_share_pcts,majority,median_ebitda_pos,ebitda_eur_pos_fill_na,predicted_exit_multiple_fill_na,revenue_eur_fill_na,subsector_margin,subsector_multiple,linked_asset_id,ev_eur_last_deal,ev_year_last_deal,ev_ebitda_multiple,equity_eur_last_deal,equity_year_last_deal,deals_ev_eur_array,deals_ev_eur_max,deals_ev_years_array,deals_equity_eur_array,deals_equity_eur_max,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,estd_ev_managed,verify_historical_estd_ev_managed,delta_with_verify,years_since_last_deal,fte_multiplier,revenue_multiplier
1992,91,Antin,Paris,France,Europe,France,13600.0,5532,Babilou,services,education,regular,4.0,9.0,"[\n ""services""\n]",142.65,142.65,2023.0,-14.8,-14.8,2023.0,28.99,,,28.99,2023.0,,,"[\n 213830,\n 371606,\n 1150688,\n 1149064...",,,,EUR,1.0,"[\n ""businessToBusiness"",\n ""businessToConsu...",22.05,22.05,2023.0,,,,60.11,60.11,2023.0,168.21,168.21,False,25.56,25.56,2023.0,19.44,168.21,168.21,2023.0,,,,False,2024-06-05 09:30:22.408,,,,13800.0,"10,001+",2025.0,https://app.gain.pro/asset/5532/babilou,779.19,779.19,2023.0,Bois-Colombes,FR,Île-de-France,,,,8.0,2020.0,,,,,20.0,venture,2015.0,4.5,3.5,2.5,,,,1055000,"[\n ""https://www.linkedin.com/in/cfond"",\n ""...",,,-6.94,-0.04,2023.0,-6.94,2023.0,3780.0,...,0.2,,0.23,0.44,-0.2,1.43,0.21,0.25,,0.39,0.11,0.14,0.11,0.12,0.03,,,,,,0.08,0.08,0.19,0.05,0.18,0.05,0.19,,,13800.0,865.07,865.07,168.21,11.75,19.78,52.68,,,322.9,24.61,142.77,,,,66.06,323.04,,,15.9,5.05,5.08,3.23,6.4,11.22,19.15,49.08,,,3_large_250_1000m_eur,3_large_50_200m_eur,"(5, 1000]",1.0,3.0,22.0,168.21,1,2,0.9,0.6,0.15,0.6,,1,85.88,168.21,13.0,865.07,0.16,12.38,5532.0,1500.0,2020.0,21.43,,,"[1500.0, nan, nan, nan, nan]",1500.0,"[2020.0, nan, nan, nan, nan]","[nan, nan, nan, nan, nan]",,70.0,,500.0,,1312.06,900.0,0.31,4.0,1.22,2.0
2725,180,TA Associates,Boston,United States of America,North America,US,27892.85,5532,Babilou,services,education,regular,4.0,9.0,"[\n ""services""\n]",142.65,142.65,2023.0,-14.8,-14.8,2023.0,28.99,,,28.99,2023.0,,,"[\n 213830,\n 371606,\n 1150688,\n 1149064...",,,,EUR,1.0,"[\n ""businessToBusiness"",\n ""businessToConsu...",22.05,22.05,2023.0,,,,60.11,60.11,2023.0,168.21,168.21,False,25.56,25.56,2023.0,19.44,168.21,168.21,2023.0,,,,False,2024-06-05 09:30:22.408,,,,13800.0,"10,001+",2025.0,https://app.gain.pro/asset/5532/babilou,779.19,779.19,2023.0,Bois-Colombes,FR,Île-de-France,,,,8.0,2020.0,,,,,20.0,venture,2015.0,4.5,3.5,2.5,,,,1055000,"[\n ""https://www.linkedin.com/in/cfond"",\n ""...",,,-6.94,-0.04,2023.0,-6.94,2023.0,3780.0,...,0.2,,0.23,0.44,-0.2,1.43,0.21,0.25,,0.39,0.11,0.14,0.11,0.12,0.03,,,,,,0.08,0.08,0.19,0.05,0.18,0.05,0.19,,,13800.0,865.07,865.07,168.21,11.75,19.78,52.68,,,322.9,24.61,142.77,,,,66.06,323.04,,,15.9,5.05,5.08,3.23,6.4,11.22,19.15,49.08,,,3_large_250_1000m_eur,3_large_50_200m_eur,"(5, 1000]",1.0,3.0,22.0,168.21,1,2,0.9,0.6,0.15,0.15,[0.2],0,38.07,168.21,13.0,865.07,0.16,12.38,5532.0,1500.0,2020.0,21.43,,,"[1500.0, nan, nan, nan, nan]",1500.0,"[2020.0, nan, nan, nan, nan]","[nan, nan, nan, nan, nan]",,70.0,,500.0,,328.02,225.0,0.31,4.0,1.22,2.0
8476,1053,RAISE,Paris,France,Europe,France,240.0,5532,Babilou,services,education,regular,4.0,9.0,"[\n ""services""\n]",142.65,142.65,2023.0,-14.8,-14.8,2023.0,28.99,,,28.99,2023.0,,,"[\n 213830,\n 371606,\n 1150688,\n 1149064...",,,,EUR,1.0,"[\n ""businessToBusiness"",\n ""businessToConsu...",22.05,22.05,2023.0,,,,60.11,60.11,2023.0,168.21,168.21,False,25.56,25.56,2023.0,19.44,168.21,168.21,2023.0,,,,False,2024-06-05 09:30:22.408,,,,13800.0,"10,001+",2025.0,https://app.gain.pro/asset/5532/babilou,779.19,779.19,2023.0,Bois-Colombes,FR,Île-de-France,,,,8.0,2020.0,,,,,20.0,venture,2015.0,4.5,3.5,2.5,,,,1055000,"[\n ""https://www.linkedin.com/in/cfond"",\n ""...",,,-6.94,-0.04,2023.0,-6.94,2023.0,3780.0,...,0.2,,0.23,0.44,-0.2,1.43,0.21,0.25,,0.39,0.11,0.14,0.11,0.12,0.03,,,,,,0.08,0.08,0.19,0.05,0.18,0.05,0.19,,,13800.0,865.07,865.07,168.21,11.75,19.78,52.68,,,322.9,24.61,142.77,,,,66.06,323.04,,,15.9,5.05,5.08,3.23,6.4,11.22,19.15,49.08,,,3_large_250_1000m_eur,3_large_50_200m_eur,"(5, 1000]",1.0,3.0,22.0,168.21,1,2,0.9,0.6,0.15,0.15,,0,17.8,168.21,13.0,865.07,0.16,12.38,5532.0,1500.0,2020.0,21.43,,,"[1500.0, nan, nan, nan, nan]",1500.0,"[2020.0, nan, nan, nan, nan]","[nan, nan, nan, nan, nan]",,70.0,,500.0,,328.02,225.0,0.31,4.0,1.22,2.0


In [275]:
investors_mm_europe[investors_mm_europe['asset_id']==16630]

Unnamed: 0,investor_id,investor_name,investor_hq_city,investor_country_name,investor_region,investor_sub_region,funds_raised_last_five_years_eur,asset_id,asset_name,sector,subsector,ownership,add_on_deal_count_l3y,add_on_deal_count_l5y,business_activity,capex,capex_eur,capex_year,capital,capital_eur,capital_year,cash,cash_conversion_cycle,cash_conversion_cycle_year,cash_eur,cash_year,ceo_age,ceo_tenure,competitor_asset_ids,consolidated_net_income,consolidated_net_income_eur,consolidated_net_income_year,currency,currency_to_eur,customer_base,debt,debt_eur,debt_year,earnings_per_share,earnings_per_share_eur,earnings_per_share_year,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_is_ai_generated,ebitda_minus_capex,ebitda_minus_capex_eur,ebitda_minus_capex_year,ebitda_pct_revenue,ebitda_with_ai_generated,ebitda_with_ai_generated_eur,ebitda_year,enterprise_value,enterprise_value_eur,esg,esg_outperformer,financials_at,free_cash_flow,free_cash_flow_eur,free_cash_flow_year,fte,fte_range,fte_year,gain_pro_url,gross_margin,gross_margin_eur,gross_margin_year,headquarters_city,headquarters_country_code,headquarters_region,inventory,inventory_eur,inventory_year,last_deal_month,last_deal_year,latest_deal_post_money_valuation_eur,latest_deal_post_money_valuation_year,latest_deal_pre_money_valuation_eur,latest_deal_pre_money_valuation_year,latest_deal_round_size_eur,latest_deal_round_type,latest_deal_round_year,latest_industry_rating_environmental,latest_industry_rating_overall,latest_industry_rating_social,latest_share_price,latest_share_price_date,latest_share_price_eur,linkedin_external_id,managers_linked_in_urls,market_capitalization,market_capitalization_eur,net_debt,net_debt_ebitda_ratio,net_debt_ebitda_ratio_year,net_debt_eur,net_debt_year,next_year_predicted_ev,...,revenue_chg_2023_2021,revenue_chg_2024_2019,ebitda_chg_2022_2017,ebitda_chg_2023_2018,ebitda_chg_2022_2019,ebitda_chg_2023_2020,ebitda_chg_2023_2019,ebitda_chg_2023_2021,ebitda_chg_2024_2019,fte_chg_2022_2017,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,revenue_range,ebitda_range,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,majority_owner_count,minority_owner_count,total_ownership_pct,majority_pct,minority_pct,ownership_pct,buyer_share_pcts,majority,median_ebitda_pos,ebitda_eur_pos_fill_na,predicted_exit_multiple_fill_na,revenue_eur_fill_na,subsector_margin,subsector_multiple,linked_asset_id,ev_eur_last_deal,ev_year_last_deal,ev_ebitda_multiple,equity_eur_last_deal,equity_year_last_deal,deals_ev_eur_array,deals_ev_eur_max,deals_ev_years_array,deals_equity_eur_array,deals_equity_eur_max,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,estd_ev_managed,verify_historical_estd_ev_managed,delta_with_verify,years_since_last_deal,fte_multiplier,revenue_multiplier
8797,1132,Crédit Mutuel Arkéa,Le Relecq-Kerhuon,France,Europe,France,95.00,16630,Avril,consumer,food,minority,5.0,8.0,"[\n ""manufacturing"",\n ""distribution""\n]",241.0,241.0,2023.0,798.1,798.1,2023.0,296.3,,,296.3,2023.0,,1.0,"[\n 837434,\n 825996,\n 2318154,\n 717396,...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",1455.1,1455.1,2023.0,,,,185.7,185.7,2023.0,341.2,341.2,False,100.2,100.2,2023.0,4.25,341.2,341.2,2023.0,,,,False,2024-07-25 00:00:00.000,,,,8000.0,"5,001-10,000",2024.0,https://app.gain.pro/asset/16630/avril,1788.7,1788.7,2023.0,Paris,FR,Île-de-France,1162.0,1162.0,2023.0,9.0,2022.0,,,,,145.0,otherUnknown,2022.0,1.5,2.75,4.0,,,,1238517,"[\n ""https://www.linkedin.com/in/jean-philipp...",,,1158.8,3.4,2023.0,1158.8,2023.0,3340.0,...,0.05,,0.37,0.17,0.51,0.12,0.19,-0.02,,-0.01,-0.01,-0.01,-0.01,-0.01,-0.02,0.01,,,,,0.02,0.03,0.03,0.04,0.05,0.06,0.04,,,8000.0,8023.8,8023.8,341.2,-16.89,5.45,10.25,,,-41.48,-2.09,11.98,,,-57.02,-6.41,21.47,,,5.53,3.0,0.45,3.03,6.32,-18.04,4.7,6.96,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(5, 1000]",1.0,4.0,42.0,341.2,0,4,0.45,0.0,0.11,0.11,,0,10.34,341.2,8.76,8023.8,0.1,8.76,16630.0,,,,,,[nan],,[nan],[nan],,355.9,2021.0,2350.0,,336.11,,,,,
10818,1770,Crédit Agricole,Paris,France,Europe,France,300.00,16630,Avril,consumer,food,minority,5.0,8.0,"[\n ""manufacturing"",\n ""distribution""\n]",241.0,241.0,2023.0,798.1,798.1,2023.0,296.3,,,296.3,2023.0,,1.0,"[\n 837434,\n 825996,\n 2318154,\n 717396,...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",1455.1,1455.1,2023.0,,,,185.7,185.7,2023.0,341.2,341.2,False,100.2,100.2,2023.0,4.25,341.2,341.2,2023.0,,,,False,2024-07-25 00:00:00.000,,,,8000.0,"5,001-10,000",2024.0,https://app.gain.pro/asset/16630/avril,1788.7,1788.7,2023.0,Paris,FR,Île-de-France,1162.0,1162.0,2023.0,9.0,2022.0,,,,,145.0,otherUnknown,2022.0,1.5,2.75,4.0,,,,1238517,"[\n ""https://www.linkedin.com/in/jean-philipp...",,,1158.8,3.4,2023.0,1158.8,2023.0,3340.0,...,0.05,,0.37,0.17,0.51,0.12,0.19,-0.02,,-0.01,-0.01,-0.01,-0.01,-0.01,-0.02,0.01,,,,,0.02,0.03,0.03,0.04,0.05,0.06,0.04,,,8000.0,8023.8,8023.8,341.2,-16.89,5.45,10.25,,,-41.48,-2.09,11.98,,,-57.02,-6.41,21.47,,,5.53,3.0,0.45,3.03,6.32,-18.04,4.7,6.96,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(5, 1000]",1.0,4.0,42.0,341.2,0,4,0.45,0.0,0.11,0.11,,0,10.43,341.2,8.76,8023.8,0.1,8.76,16630.0,,,,,,[nan],,[nan],[nan],,355.9,2021.0,2350.0,,336.11,,,,,
12689,3195,Natixis Investment Managers,Paris,France,Europe,France,-,16630,Avril,consumer,food,minority,5.0,8.0,"[\n ""manufacturing"",\n ""distribution""\n]",241.0,241.0,2023.0,798.1,798.1,2023.0,296.3,,,296.3,2023.0,,1.0,"[\n 837434,\n 825996,\n 2318154,\n 717396,...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",1455.1,1455.1,2023.0,,,,185.7,185.7,2023.0,341.2,341.2,False,100.2,100.2,2023.0,4.25,341.2,341.2,2023.0,,,,False,2024-07-25 00:00:00.000,,,,8000.0,"5,001-10,000",2024.0,https://app.gain.pro/asset/16630/avril,1788.7,1788.7,2023.0,Paris,FR,Île-de-France,1162.0,1162.0,2023.0,9.0,2022.0,,,,,145.0,otherUnknown,2022.0,1.5,2.75,4.0,,,,1238517,"[\n ""https://www.linkedin.com/in/jean-philipp...",,,1158.8,3.4,2023.0,1158.8,2023.0,3340.0,...,0.05,,0.37,0.17,0.51,0.12,0.19,-0.02,,-0.01,-0.01,-0.01,-0.01,-0.01,-0.02,0.01,,,,,0.02,0.03,0.03,0.04,0.05,0.06,0.04,,,8000.0,8023.8,8023.8,341.2,-16.89,5.45,10.25,,,-41.48,-2.09,11.98,,,-57.02,-6.41,21.47,,,5.53,3.0,0.45,3.03,6.32,-18.04,4.7,6.96,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(5, 1000]",1.0,4.0,42.0,341.2,0,4,0.45,0.0,0.11,0.11,,0,10.0,341.2,8.76,8023.8,0.1,8.76,16630.0,,,,,,[nan],,[nan],[nan],,355.9,2021.0,2350.0,,336.11,,,,,
13290,3801,Esfin Gestion,Nanterre,France,Europe,France,-,16630,Avril,consumer,food,minority,5.0,8.0,"[\n ""manufacturing"",\n ""distribution""\n]",241.0,241.0,2023.0,798.1,798.1,2023.0,296.3,,,296.3,2023.0,,1.0,"[\n 837434,\n 825996,\n 2318154,\n 717396,...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",1455.1,1455.1,2023.0,,,,185.7,185.7,2023.0,341.2,341.2,False,100.2,100.2,2023.0,4.25,341.2,341.2,2023.0,,,,False,2024-07-25 00:00:00.000,,,,8000.0,"5,001-10,000",2024.0,https://app.gain.pro/asset/16630/avril,1788.7,1788.7,2023.0,Paris,FR,Île-de-France,1162.0,1162.0,2023.0,9.0,2022.0,,,,,145.0,otherUnknown,2022.0,1.5,2.75,4.0,,,,1238517,"[\n ""https://www.linkedin.com/in/jean-philipp...",,,1158.8,3.4,2023.0,1158.8,2023.0,3340.0,...,0.05,,0.37,0.17,0.51,0.12,0.19,-0.02,,-0.01,-0.01,-0.01,-0.01,-0.01,-0.02,0.01,,,,,0.02,0.03,0.03,0.04,0.05,0.06,0.04,,,8000.0,8023.8,8023.8,341.2,-16.89,5.45,10.25,,,-41.48,-2.09,11.98,,,-57.02,-6.41,21.47,,,5.53,3.0,0.45,3.03,6.32,-18.04,4.7,6.96,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(5, 1000]",1.0,4.0,42.0,341.2,0,4,0.45,0.0,0.11,0.11,,0,16.75,341.2,8.76,8023.8,0.1,8.76,16630.0,,,,,,[nan],,[nan],[nan],,355.9,2021.0,2350.0,,336.11,,,,,


#### Fill using manual overrides

In [280]:
#Overide estimated multiple and ownership share based on manual checks (this is neccsary for BPI France type owners where they own maybe only 3% in a very large company)
# Need to add multiplier override

#Column overide is 0 where it was manually checked but no overide decsison was made it was 1 where manually checked and override decision was made

investors_mm_europe = pd.merge(investors_mm_europe,investor_ranking_override, on = ['investor_id','asset_id'], how = 'left', suffixes=('', '_override'))

investors_mm_europe.loc[investors_mm_europe['override'] == 1, ['ownership_pct', 'predicted_exit_multiple_fill_na']] = \
    investors_mm_europe.loc[investors_mm_europe['override'] == 1, ['ownership_pct_override', 'predicted_exit_multiple_fill_na_override']].values

# Now, apply overrides for ev_eur_last_deal and ev_year_last_deal ONLY when their override values are not null
mask = (investors_mm_europe['override'] == 1) & (investors_mm_europe['ev_eur_last_deal_override'].notnull())

investors_mm_europe.loc[mask, 'ev_eur_last_deal'] = investors_mm_europe.loc[mask, 'ev_eur_last_deal_override']

mask = (investors_mm_europe['override'] == 1) & (investors_mm_europe['ev_year_last_deal_override'].notnull())

investors_mm_europe.loc[mask, 'ev_year_last_deal'] = investors_mm_europe.loc[mask, 'ev_year_last_deal_override']

#Manually correct a few EBITDAs for BPI France and deal value for Ardonagh Group 2333

investors_mm_europe.loc[investors_mm_europe['asset_id']==1081931,'ebitda_eur_pos_fill_na'] = 8531

investors_mm_europe.loc[investors_mm_europe['override'] == 1, 'estd_ev_managed'] = (
    investors_mm_europe['ebitda_eur_pos_fill_na'] * 
    investors_mm_europe['predicted_exit_multiple_fill_na'] * 
    investors_mm_europe['ownership_pct']
)

investors_mm_europe.loc[investors_mm_europe['ev_year_last_deal'] >= 2023, 'estd_ev_managed'] = investors_mm_europe['ownership_pct'] * investors_mm_europe['ev_eur_last_deal'] * investors_mm_europe['multiplier'] 

investors_mm_europe['verify_historical_estd_ev_managed'] = (investors_mm_europe['ownership_pct'] * investors_mm_europe['ev_eur_last_deal'])


investors_mm_europe['delta_with_verify'] = (investors_mm_europe['estd_ev_managed'] - investors_mm_europe['verify_historical_estd_ev_managed']) / investors_mm_europe['estd_ev_managed']

#where 'estd_ev_managed' is less than historical value use the current value

investors_mm_europe.loc[
    investors_mm_europe['estd_ev_managed'] < investors_mm_europe['verify_historical_estd_ev_managed'],
    'estd_ev_managed'
] = investors_mm_europe['verify_historical_estd_ev_managed']




In [281]:
#Number of assets where manually overwritten

investors_mm_europe['override'].value_counts()

override
1.00    278
0.00    187
Name: count, dtype: int64

In [282]:
investors_mm_europe = investors_mm_europe.sort_values(by='estd_ev_managed', ascending = False)

In [283]:
# Check post manual override which still have more than 10x EV
#Check for OUTLIERS so assets where the investment EV is 10x the median (only check manually check ownership shares over 1bn EV)

investor_medians = investors_mm_europe.groupby(['investor_id'])['estd_ev_managed'].median().reset_index()
investor_medians = investor_medians.rename(columns={"estd_ev_managed":"median_ev"})

df_summary = pd.merge(investors_mm_europe, investor_medians, left_on = 'investor_id', right_on = 'investor_id', how='left')

# Step 3: Filter investors where their median estd_ev is at least 10 times the overall median
df_summary = df_summary[df_summary['estd_ev_managed'] >= 10 * df_summary['median_ev']]

df_summary = df_summary[[
'asset_id',
'asset_name',
'investor_id',
'investor_name',
'last_deal_year',
'ownership_pct', 
'predicted_exit_multiple_fill_na',
'estd_ev_managed',
'median_ev',
'ev_eur_last_deal',
'ev_year_last_deal',
'ev_ebitda_multiple',
'owner_shares',
#'highlighted_buyer_share_pct',                         
'equity_eur_last_deal',
'equity_year_last_deal',
'ebitda_eur_last_deal',
'ebitda_year_last_deal',
'revenue_eur_last_deal',
'revenue_year_last_deal',
'sector',
'subsector',
'ownership',
'predicted_exit_multiple',
'revenue_eur',
'ebitda_eur',
'net_debt_eur',
'total_ownership_pct',
'median_ebitda_pos',
'ebitda_eur_pos_fill_na',
'override'
]]

df_summary


Unnamed: 0,asset_id,asset_name,investor_id,investor_name,last_deal_year,ownership_pct,predicted_exit_multiple_fill_na,estd_ev_managed,median_ev,ev_eur_last_deal,ev_year_last_deal,ev_ebitda_multiple,owner_shares,equity_eur_last_deal,equity_year_last_deal,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,sector,subsector,ownership,predicted_exit_multiple,revenue_eur,ebitda_eur,net_debt_eur,total_ownership_pct,median_ebitda_pos,ebitda_eur_pos_fill_na,override
0,4638501,The Citco Group,135,GIC,2025.00,0.30,13.22,38670.04,815.10,,,,minority,,,,,,,financial,assetManagement,minority,,,,,0.30,357.81,9752.85,
1,200,Action,1,3i,2019.00,0.60,15.00,21960.00,206.90,10250.00,2019.00,18.95,majority,,,541.00,,5114.00,,consumer,retail,regular,15.00,13781.00,2440.00,1641.00,0.90,37.01,2440.00,0.00
2,12286,Mundys,160,Blackstone,2022.00,0.35,14.00,20300.00,559.11,58000.00,2021.00,14.50,minority,19000.00,2021.00,4000.00,,6400.00,,other,infrastructure,regular,8.00,8036.00,3434.00,30943.00,0.90,79.86,3434.00,1.00
4,6384,Visma,40,Hg Capital,2023.00,0.60,17.00,11400.00,258.95,19000.00,2024.00,,majority,19000.00,2024.00,892.65,2024.00,2804.40,2024.00,tmt,software,regular,17.00,2804.40,892.65,2097.25,0.90,44.25,892.65,0.00
5,1252,IVC Evidensia,27,EQT,2021.00,0.60,14.00,10841.04,756.54,,,,majority,,,254.74,2020.00,1527.15,2020.00,scienceHealth,healthcareServices,regular,14.00,3933.96,1290.60,3997.84,0.90,94.20,1290.60,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15849,735997,ETU,13954,Pacific Lake Partners,,0.30,12.38,-4.83,-4.83,,,,minority,,,,,,,services,education,minority,,,,,0.30,,-1.30,
15850,717460,"RocketRoute, an APG Company",6131,AFV Partners,,0.45,13.79,-5.74,-5.74,,,,minority,,,,,,,tmt,software,regular,,,,,0.90,,-0.92,
15851,717460,"RocketRoute, an APG Company",4085,BP Ventures,,0.45,13.79,-5.74,-5.74,,,,minority,,,,,,,tmt,software,regular,,,,,0.90,,-0.92,
15856,1404159,Phorest Salon Software,11808,CIBC Innovation Banking,,0.23,13.79,-13.04,-2.82,,,,minority,,,,,,,tmt,software,minority,,,,,0.45,1.89,-4.20,


In [284]:
df_summary['override'].value_counts()

override
1.00    88
0.00    59
Name: count, dtype: int64

In [285]:
# Check for largest 100 ownership shares

df_summary = investors_mm_europe.sort_values(by='estd_ev_managed', ascending = False).head(100)

df_summary = df_summary[[
'asset_id',
'asset_name',
'investor_id',
'investor_name',
'last_deal_year',
'ownership_pct', 
'predicted_exit_multiple_fill_na',
'estd_ev_managed',
'ev_eur_last_deal',
'ev_year_last_deal',
'ev_ebitda_multiple',
'owner_shares',
#'buyer_share_pcts',                         
'equity_eur_last_deal',
'equity_year_last_deal',
'ebitda_eur_last_deal',
'ebitda_year_last_deal',
'revenue_eur_last_deal',
'revenue_year_last_deal',
'sector',
'subsector',
'ownership',
'predicted_exit_multiple',
'revenue_eur',
'ebitda_eur',
'net_debt_eur',
'total_ownership_pct',
'median_ebitda_pos',
'ebitda_eur_pos_fill_na',
]]

df_summary

Unnamed: 0,asset_id,asset_name,investor_id,investor_name,last_deal_year,ownership_pct,predicted_exit_multiple_fill_na,estd_ev_managed,ev_eur_last_deal,ev_year_last_deal,ev_ebitda_multiple,owner_shares,equity_eur_last_deal,equity_year_last_deal,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,sector,subsector,ownership,predicted_exit_multiple,revenue_eur,ebitda_eur,net_debt_eur,total_ownership_pct,median_ebitda_pos,ebitda_eur_pos_fill_na
2211,4638501,The Citco Group,135,GIC,2025.0,0.3,13.22,38670.04,,,,minority,,,,,,,financial,assetManagement,minority,,,,,0.3,357.81,9752.85
3,200,Action,1,3i,2019.0,0.6,15.0,21960.0,10250.0,2019.0,18.95,majority,,,541.0,,5114.0,,consumer,retail,regular,15.0,13781.0,2440.0,1641.0,0.9,37.01,2440.0
2485,12286,Mundys,160,Blackstone,2022.0,0.35,14.0,20300.0,58000.0,2021.0,14.5,minority,19000.0,2021.0,4000.0,,6400.0,,other,infrastructure,regular,8.0,8036.0,3434.0,30943.0,0.9,79.86,3434.0
4853,5794,Verisure,397,Hellman & Friedman,2019.0,0.6,17.0,13200.0,22000.0,2025.0,16.95,majority,,,1298.1,2023.0,3089.97,2023.0,services,technicalServices,regular,17.0,3089.97,1298.1,7342.11,0.9,386.09,1298.1
1006,6384,Visma,40,Hg Capital,2023.0,0.6,17.0,11400.0,19000.0,2024.0,,majority,19000.0,2024.0,892.65,2024.0,2804.4,2024.0,tmt,software,regular,17.0,2804.4,892.65,2097.25,0.9,44.25,892.65
706,1252,IVC Evidensia,27,EQT,2021.0,0.6,14.0,10841.04,,,,majority,,,254.74,2020.0,1527.15,2020.0,scienceHealth,healthcareServices,regular,14.0,3933.96,1290.6,3997.84,0.9,94.2,1290.6
11631,1271660,CETIN,2237,PPF Group,2024.0,0.9,16.0,10483.2,,,,majority,,,728.0,2023.0,1246.0,2023.0,tmt,telecom,regular,16.0,1246.0,728.0,1631.0,0.9,728.0,728.0
2821,4706,Finastra,182,Vista Equity Partners,2012.0,0.9,14.0,9242.31,3035.2,2017.0,7.02,majority,,,432.52,,1303.24,,tmt,software,regular,14.0,1561.74,733.52,5008.6,0.9,424.73,733.52
986,743,Boskalis,39,HAL Investments,2022.0,0.9,8.0,9085.18,4330.0,2022.0,9.37,majority,,,462.0,,2957.0,,other,infrastructure,regular,8.0,4362.23,1261.83,-518.17,0.9,37.09,1261.83
12162,882553,NEO Energy,2639,HitecVision,2019.0,0.9,5.0,8988.92,,,,majority,,,139.66,2019.0,199.78,2019.0,materialsEnergy,energy,regular,15.0,2579.37,1997.54,556.17,0.9,999.78,1997.54


### Post Overide Lookup

In [286]:
#Check for OUTLIERS so assets where the investment EV is 10x the median (only check manually check ownership shares over 1bn EV)

investor_medians = (
    investors_mm_europe.groupby(['investor_id'])['estd_ev_managed']
    .agg(median_ev='median', count='count')
    .reset_index()
)

df_summary = pd.merge(investors_mm_europe, investor_medians, left_on = 'investor_id', right_on = 'investor_id', how='left')

# Step 3: Filter investors where their median estd_ev is at least 10 times the overall median
df_summary = df_summary.sort_values(by='estd_ev_managed', ascending = False)

df_summary['10x'] = df_summary['estd_ev_managed']/df_summary['median_ev'] 

df_summary = df_summary[columns_filter]


### Case Studies

In [287]:
columns = [
'asset_id',
'asset_name',
'investor_id',
'investor_name',
'last_deal_year',
'ownership_pct', 
'predicted_exit_multiple_fill_na',
'estd_ev_managed',
'ev_eur_last_deal',
'ev_year_last_deal',
'ev_ebitda_multiple',
'owner_shares',
'buyer_share_pcts',                         
'equity_eur_last_deal',
'equity_year_last_deal',
'ebitda_eur_last_deal',
'ebitda_year_last_deal',
'revenue_eur_last_deal',
'revenue_year_last_deal',
'sector',
'subsector',
'ownership_old',
'predicted_exit_multiple',
'revenue_eur',
'ebitda_eur',
'net_debt_eur',
'total_ownership_pct',
'median_ebitda_pos',
'ebitda_eur_pos_fill_na',
]

In [288]:
investors_mm_europe[investors_mm_europe['override'].notna()]

Unnamed: 0,investor_id,investor_name,investor_hq_city,investor_country_name,investor_region,investor_sub_region,funds_raised_last_five_years_eur,asset_id,asset_name,sector,subsector,ownership,add_on_deal_count_l3y,add_on_deal_count_l5y,business_activity,capex,capex_eur,capex_year,capital,capital_eur,capital_year,cash,cash_conversion_cycle,cash_conversion_cycle_year,cash_eur,cash_year,ceo_age,ceo_tenure,competitor_asset_ids,consolidated_net_income,consolidated_net_income_eur,consolidated_net_income_year,currency,currency_to_eur,customer_base,debt,debt_eur,debt_year,earnings_per_share,earnings_per_share_eur,earnings_per_share_year,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_is_ai_generated,ebitda_minus_capex,ebitda_minus_capex_eur,ebitda_minus_capex_year,ebitda_pct_revenue,ebitda_with_ai_generated,ebitda_with_ai_generated_eur,ebitda_year,enterprise_value,enterprise_value_eur,esg,esg_outperformer,financials_at,free_cash_flow,free_cash_flow_eur,free_cash_flow_year,fte,fte_range,fte_year,gain_pro_url,gross_margin,gross_margin_eur,gross_margin_year,headquarters_city,headquarters_country_code,headquarters_region,inventory,inventory_eur,inventory_year,last_deal_month,last_deal_year,latest_deal_post_money_valuation_eur,latest_deal_post_money_valuation_year,latest_deal_pre_money_valuation_eur,latest_deal_pre_money_valuation_year,latest_deal_round_size_eur,latest_deal_round_type,latest_deal_round_year,latest_industry_rating_environmental,latest_industry_rating_overall,latest_industry_rating_social,latest_share_price,latest_share_price_date,latest_share_price_eur,linkedin_external_id,managers_linked_in_urls,market_capitalization,market_capitalization_eur,net_debt,net_debt_ebitda_ratio,net_debt_ebitda_ratio_year,net_debt_eur,net_debt_year,next_year_predicted_ev,...,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,revenue_range,ebitda_range,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,majority_owner_count,minority_owner_count,total_ownership_pct,majority_pct,minority_pct,ownership_pct,buyer_share_pcts,majority,median_ebitda_pos,ebitda_eur_pos_fill_na,predicted_exit_multiple_fill_na,revenue_eur_fill_na,subsector_margin,subsector_multiple,linked_asset_id,ev_eur_last_deal,ev_year_last_deal,ev_ebitda_multiple,equity_eur_last_deal,equity_year_last_deal,deals_ev_eur_array,deals_ev_eur_max,deals_ev_years_array,deals_equity_eur_array,deals_equity_eur_max,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,estd_ev_managed,verify_historical_estd_ev_managed,delta_with_verify,years_since_last_deal,fte_multiplier,revenue_multiplier,asset_name_override,investor_name_override,last_deal_year_override,ownership_pct_override,predicted_exit_multiple_fill_na_override,ev_eur_last_deal_override,ev_year_last_deal_override,override,notes,notes_detail
3,1,3i,London,United Kingdom,Europe,UK&I,652.30,200,Action,consumer,retail,regular,0.00,0.00,"[\n ""retail""\n]",360.00,360.00,2024.00,-105.00,-105.00,2024.00,755.00,,,755.00,2024.00,45.00,3.00,"[\n 17436,\n 240,\n 16571,\n 16464,\n 164...",,,,EUR,1.00,"[\n ""businessToConsumer""\n]",2396.00,2396.00,2024.00,,,,1879.00,1879.00,2024.00,2440.00,2440.00,False,2080.00,2080.00,2024.00,17.71,2440.00,2440.00,2024.00,,,,False,2024-05-13 15:59:50.564,,,,79681.00,"10,001+",2024.00,https://app.gain.pro/asset/200/action,4558.00,4558.00,2024.00,Zwaagdijk,NL,Noord-Holland,1567.00,1567.00,2024.00,11.00,2019.00,,,,,,,,3.00,3.50,4.00,,,,623842,"[\n ""https://www.linkedin.com/in/hajir-hajji-...",,,1641.00,0.67,2024.00,1641.00,2024.00,52710.00,...,0.30,0.40,0.42,0.33,0.54,0.29,0.11,0.11,0.11,0.12,0.11,0.11,0.14,0.14,0.15,0.16,0.17,0.18,,79681.00,13781.00,13781.00,2440.00,21.70,24.72,26.38,,,26.03,29.95,32.50,,,30.40,36.46,40.95,,,15.41,15.19,40.06,5.44,12.63,22.46,26.29,27.50,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(-1, 0]",1.00,2.00,32.00,2440.00,1,1,0.90,0.60,0.30,0.60,[0.8],1,37.01,2440.00,15.00,13781.00,0.07,7.51,200.00,10250.00,2019.00,18.95,,,"[10250.0, 650.0]",10250.00,"[2019.0, 2011.0]","[nan, nan]",,541.00,,5114.00,,21960.00,6150.00,0.72,5.00,2.00,2.00,Action,3i,2019.00,0.60,15.00,,,0.00,No change,
2485,160,Blackstone,Manhattan,United States of America,North America,US,140455.62,12286,Mundys,other,infrastructure,regular,1.00,4.00,"[\n ""services"",\n ""operator""\n]",307.00,307.00,2024.00,1364.00,1364.00,2024.00,5717.00,71.62,2024.00,5717.00,2024.00,62.00,9.00,"[\n 1125185,\n 1082935,\n 4610151,\n 45867...",-36.00,-36.00,2025.00,EUR,1.00,"[\n ""businessToBusiness"",\n ""businessToConsu...",37826.00,37826.00,2024.00,0.26,0.26,2025.00,-876.00,-876.00,2025.00,3434.00,3434.00,False,3062.00,3062.00,2024.00,42.73,3434.00,3434.00,2025.00,39067.08,39067.08,,False,2025-06-08 21:16:33.831,71.62,71.62,2024.00,23108.00,"10,001+",2024.00,https://app.gain.pro/asset/12286/mundys,3790.00,3790.00,2024.00,Roma,IT,Lazio,83.00,83.00,2024.00,4.00,2022.00,,,,,,,,,,,22.99,2023-03-24 00:00:00.000,22.99,6093168,"[\n ""https://www.linkedin.com/in/giampiero-ma...",18833.08,18833.08,30943.00,9.18,2024.00,30943.00,2024.00,25190.00,...,,,-0.08,,0.08,,,,,,,,,0.25,0.49,0.44,0.39,0.42,,23108.00,8036.00,8036.00,3434.00,4.24,7.65,10.05,,,11.89,5.41,5.01,,,-520.10,-266.77,,,,-2.15,5.88,4.37,0.63,7.43,7.92,5.22,2.02,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(2, 5]",1.00,1.00,75.00,3434.00,0,1,0.90,0.60,0.30,0.35,,0,79.86,3434.00,14.00,8036.00,0.30,10.86,12286.00,58000.00,2021.00,14.50,19000.00,2021.00,"[58000.0, nan, nan]",58000.00,"[2021.0, nan, nan]","[19000.0, 8300.0, nan]",19000.00,4000.00,,6400.00,,20300.00,20300.00,-0.21,3.00,1.14,1.33,Atlantia Group,Blackstone,2022.00,0.35,14.00,,,1.00,Change Ownership,
4853,397,Hellman & Friedman,San Francisco,United States of America,North America,US,40903.69,5794,Verisure,services,technicalServices,regular,1.00,1.00,"[\n ""services"",\n ""manufacturing"",\n ""distr...",884.00,884.00,2023.00,325.82,325.82,2023.00,21.40,,,21.40,2023.00,60.00,11.00,"[\n 1336391,\n 16559,\n 1380406,\n 420398,...",,,,EUR,1.00,"[\n ""businessToBusiness"",\n ""businessToConsu...",7363.51,7363.51,2023.00,,,,213.04,213.04,2023.00,1298.10,1298.10,False,414.09,414.09,2023.00,42.01,1298.10,1298.10,2023.00,,,,False,2025-03-28 14:24:37.320,,,,24404.00,"10,001+",2023.00,https://app.gain.pro/asset/5794/verisure,1485.92,1485.92,2023.00,Versoix,CH,Genève,296.44,296.44,2023.00,4.00,2019.00,,,,,,,,4.50,3.25,2.00,,,,22322941,"[\n ""https://www.linkedin.com/in/austinlally""...",,,7342.11,5.66,2023.00,7342.11,2023.00,31420.00,...,0.10,0.10,0.09,0.09,0.10,,,,,0.36,0.37,0.38,0.40,0.43,0.41,0.39,0.42,,,24404.00,3089.97,3089.97,1298.10,9.30,10.98,13.05,,,16.28,12.63,12.18,,,59.16,28.42,-13.05,,,7.55,9.82,8.58,3.32,6.96,11.57,9.27,12.49,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(0, 2]",1.00,3.00,27.00,1298.10,1,2,0.90,0.60,0.15,0.60,,1,386.09,1298.10,17.00,3089.97,0.12,12.09,5794.00,22000.00,2025.00,16.95,,,"[22000.0, 11200.0, nan, nan, 2300.0, 1392.7]",22000.00,"[2025.0, 2019.0, nan, nan, 2011.0, 2007.0]","[nan, 7400.0, nan, 1313.8, nan, nan]",7400.00,1298.10,2023.00,3089.97,2023.00,13200.00,13200.00,0.00,-1.00,1.00,1.00,Verisure,Hellman & Friedman,2019.00,0.60,17.00,,,0.00,No change,
1006,40,Hg Capital,London,United Kingdom,Europe,UK&I,19910.91,6384,Visma,tmt,software,regular,50.00,85.00,"[\n ""services"",\n ""engineering""\n]",27.54,27.54,2024.00,197.33,197.33,2024.00,1143.00,,,1143.00,2024.00,48.00,5.00,"[\n 15821,\n 6650,\n 109,\n 239,\n 714,\n...",,,,EUR,1.00,"[\n ""businessToBusiness"",\n ""businessToGover...",3240.25,3240.25,2024.00,,,,457.68,457.68,2024.00,892.65,892.65,False,865.10,865.10,2024.00,31.83,892.65,892.65,2024.00,,,,False,2023-03-20 13:28:12.591,,,,16395.00,"10,001+",2024.00,https://app.gain.pro/asset/6384/visma,,,,Oslo,NO,Oslo,0.69,0.69,2024.00,12.00,2023.00,,,,,,,,4.50,2.50,0.50,,,,166815,"[\n ""https://www.linkedin.com/in/merete-hverv...",,,2097.25,2.35,2024.00,2097.25,2024.00,19210.00,...,0.10,0.07,0.10,0.08,0.03,0.08,,,0.23,0.24,0.25,0.28,0.28,0.29,0.28,0.29,0.29,0.32,,16395.00,2804.40,2804.40,892.65,17.26,16.78,10.46,,,26.67,23.34,14.81,,,57.16,37.08,27.46,,,8.53,8.68,4.94,-0.61,0.55,,,,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(5, 1000]",1.00,8.00,29.00,892.65,1,7,0.90,0.60,0.04,0.60,,1,44.25,892.65,17.00,2804.40,0.16,13.79,6384.00,19000.00,2024.00,,19000.00,2024.00,"[nan, 19000.0, 16000.0, 9320.4, 6500.0, 4642.2...",19000.00,"[nan, 2023.0, 2021.0, 2020.0, 2019.0, 2017.0, ...","[19000.0, nan, nan, nan, nan, nan, nan, nan, nan]",19000.00,892.65,2024.00,2804.40,2024.00,11400.00,11400.00,0.00,0.00,1.00,1.00,Visma,Hg,2023.00,0.60,17.00,,,0.00,No change,
706,27,EQT,Stockholm,Sweden,Europe,Nordics,91867.93,1252,IVC Evidensia,scienceHealth,healthcareServices,regular,0.00,5.00,"[\n ""services""\n]",134.50,155.39,2023.00,59.00,68.16,2023.00,496.40,,,573.50,2023.00,53.00,2.00,"[\n 1286853,\n 1104957,\n 4603639,\n 45862...",,,,GBP,1.16,"[\n ""businessToBusiness"",\n ""businessToConsu...",3956.80,4571.34,2023.00,,,,798.30,922.29,2023.00,1117.10,1290.60,False,982.60,1135.21,2023.00,32.81,1117.10,1290.60,2023.00,,,,False,2024-12-13 07:00:21.651,,,,41017.00,"10,001+",2023.00,https://app.gain.pro/asset/1252/ivc-evidensia,1504.10,1737.71,2023.00,Bristol,GB,,99.10,114.49,2023.00,6.00,2021.00,,,,,,,,4.50,4.00,3.50,,,,13034752,"[\n ""https://www.linkedin.com/in/simon-smith-...",,,3460.40,3.10,2023.00,3997.84,2023.00,22270.00,...,0.28,0.28,0.28,0.24,0.32,,0.11,0.13,0.12,0.14,0.08,0.12,0.13,0.17,0.21,0.33,0.33,,,41017.00,3405.10,3933.96,1117.10,18.78,38.27,37.08,,,18.06,73.10,71.75,,,15.51,90.06,92.99,,,12.05,31.61,27.54,4.29,8.36,16.69,37.64,39.17,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(2, 5]",1.00,2.00,14.00,1290.60,1,1,0.90,0.60,0.30,0.60,,1,94.20,1290.60,14.00,3933.96,0.12,10.97,1252.00,,,,,,"[nan, nan, nan, 3000.0, nan, nan]",3000.00,"[nan, nan, nan, 2019.0, nan, nan]","[nan, nan, nan, nan, nan, nan]",,254.74,2020.00,1527.15,2020.00,10841.04,,,,,,IVC Evidensia,EQT,2021.00,0.60,14.00,,,0.00,No change,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2322,146,PGGM,Zeist,Netherlands,Europe,Benelux,-,759680,Thames Water,other,infrastructure,regular,0.00,0.00,"[\n ""operator"",\n ""services""\n]",2039.60,2356.38,2024.00,548.20,633.34,2024.00,306.40,24.26,2024.00,353.99,2024.00,,1.00,"[\n 1029148,\n 1806380,\n 690231,\n 838776...",-1513.80,-1748.91,2024.00,GBP,1.16,"[\n ""businessToConsumer"",\n ""businessToBusin...",17100.40,19756.31,2024.00,,,,-606.10,-700.24,2024.00,175.60,202.87,False,-1864.00,-2153.50,2024.00,6.13,175.60,202.87,2024.00,,,,False,2025-08-17 21:36:24.966,24.26,28.03,2024.00,7000.00,"5,001-10,000",2024.00,https://app.gain.pro/asset/759680/thames-water,2336.20,2699.04,2024.00,Reading,GB,,22.50,26.00,2024.00,12.00,2021.00,,,,,,,,,,,,,,6957,"[\n ""https://www.linkedin.com/in/chris-weston...",,,16794.00,95.64,2024.00,19402.33,2024.00,18410.00,...,,,0.04,,0.07,,,,,,,,,0.49,0.49,0.45,0.44,0.06,,7000.00,2864.70,3309.63,175.60,9.47,10.44,8.63,,,-84.80,-59.34,-45.53,,,-243.63,-93.15,-51.96,,,-12.50,0.00,0.00,2.51,5.29,12.03,13.09,8.69,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(-1, 0]",1.00,8.00,36.00,202.87,0,8,0.90,0.00,0.11,0.05,,0,133.16,202.87,3.00,3309.62,0.30,10.86,759680.00,,,,,,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",9830.00,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, 8085.3, nan, nan, nan, na...",8085.30,202.87,2024.00,3309.63,2024.00,30.43,,,,,,Thames Water,PGGM,2025.00,0.05,3.00,,,1.00,Change Ownership and Multiple,
13948,4809,Moira Capital Partners,Madrid,Spain,Europe,Iberia,-,2079850,Gases Research Innovation & Technology Group,materialsEnergy,chemicals,regular,0.00,1.00,"[\n ""services"",\n ""manufacturing"",\n ""distr...",0.83,0.83,2020.00,4.75,4.75,2020.00,6.53,,,6.53,2020.00,,6.00,"[\n 1403874,\n 1895035,\n 1083140,\n 98104...",,,,EUR,1.00,"[\n ""businessToBusiness""\n]",3.36,3.36,2020.00,,,,2.68,2.68,2020.00,5.23,5.23,False,4.40,4.40,2020.00,19.99,5.23,5.23,2020.00,,,,False,2024-09-19 16:11:06.453,,,,37.50,11-50,2020.00,https://app.gain.pro/asset/2079850/gases-resea...,11.07,11.07,2020.00,Avinyó,ES,Barcelona,1.86,1.86,2020.00,3.00,2018.00,,,,,,,,,,,,,,20363242,"[\n ""https://www.linkedin.com/in/jos%C3%A9lui...",,,-3.18,-0.61,2020.00,-3.18,2020.00,30.00,...,,,,,,,,,,,,,0.24,0.20,,,,,30.50,37.50,26.17,26.17,5.23,-16.89,,,,,-29.76,,,,,-46.13,,,,,33.93,9.18,4.71,3.33,10.71,-17.68,,,,,1_small_lt_50m_eur,1_small_lt_10m_eur,"(0, 2]",1.00,1.00,24.00,5.23,1,0,0.90,0.90,0.00,0.90,[0.65],1,5.23,5.23,6.00,26.17,0.13,9.50,2079850.00,30.77,2018.00,4.13,,,[30.8],30.80,[2018.0],[nan],,7.45,,30.00,,28.25,27.69,0.02,6.00,1.32,,Gases Research Innovation & Technology Group,Moira Capital Partners,2018.00,0.90,6.00,,,0.00,No change,
11253,1991,Galia Gestion,Bordeaux,France,Europe,France,-,459092,H&A Location,services,technicalServices,minority,0.00,0.00,"[\n ""services""\n]",,,,,,,,,,,,51.00,21.00,"[\n 1993712,\n 913315,\n 3318124,\n 240993...",,,,EUR,1.00,"[\n ""businessToBusiness""\n]",,,,,,,,,,,,False,,,,,,,,,,,False,2023-03-12 20:33:59.284,,,,80.00,51-200,2024.00,https://app.gain.pro/asset/459092/h-a-location,,,,Bordeaux,FR,Nouvelle-Aquitaine,,,,3.00,2023.00,,,,,,,,,,,,,,1728177,"[\n ""https://www.linkedin.com/in/florent-arro...",,,,,,,,,...,,,0.17,,,,,,,,,,,,,,,,125.50,80.00,350.00,350.00,,-12.50,4.26,7.09,,,,,,,,,,,,,0.00,0.00,13.15,5.26,6.38,,,,,,3_large_250_1000m_eur,5_unknown,"(-1, 0]",1.00,2.00,21.00,,0,2,0.45,0.00,0.23,0.15,,0,3.31,40.73,4.00,350.00,0.12,12.09,459092.00,40.00,2022.00,,,,"[40.0, nan]",40.00,"[2022.0, nan]","[nan, nan]",,,,350.00,,24.44,6.00,0.75,2.00,1.00,1.09,H&A Location,Galia Gestion,2023.00,0.15,4.00,,,1.00,Change multiple,
6515,659,Bpifrance,Maisons-Alfort,France,Europe,France,400.00,12004,Kintaro Group,consumer,food,minority,0.00,0.00,"[\n ""services"",\n ""retail""\n]",,,,,,,,,,,,45.00,16.00,"[\n 2027917,\n 2184156,\n 4598437,\n 45462...",,,,EUR,1.00,"[\n ""businessToConsumer""\n]",,,,,,,,,,,,True,,,,,2.40,2.40,2020.00,,,,False,2025-03-27 05:24:43.012,,,,160.00,51-200,2020.00,https://app.gain.pro/asset/12004/kintaro-group,,,,Paris,FR,Île-de-France,,,,3.00,2022.00,,,,,,,,4.00,3.75,3.50,,,,77662480,"[\n ""https://www.linkedin.com/in/emmanuel-dav...",,,,,,,,18.00,...,,,,,,,,,,,,,,,,,,,125.50,160.00,30.00,30.00,2.40,,,,,,,,,,,,,,,,28.57,38.81,49.94,0.00,-10.00,,,,,,5_unknown,5_unknown,"(-1, 0]",1.00,2.00,16.00,,0,2,0.45,0.00,0.23,0.23,,0,10.00,2.40,8.76,30.00,0.10,8.76,12004.00,,,,,,[nan],,[nan],[nan],,,,,,4.73,,,,,,Kintaro Group,Bpifrance,2022.00,0.23,8.44,,,0.00,No change,


In [289]:
#Case Study
investors_mm_europe[investors_mm_europe['investor_id']==1].sort_values(by='estd_ev_managed',ascending = False)

Unnamed: 0,investor_id,investor_name,investor_hq_city,investor_country_name,investor_region,investor_sub_region,funds_raised_last_five_years_eur,asset_id,asset_name,sector,subsector,ownership,add_on_deal_count_l3y,add_on_deal_count_l5y,business_activity,capex,capex_eur,capex_year,capital,capital_eur,capital_year,cash,cash_conversion_cycle,cash_conversion_cycle_year,cash_eur,cash_year,ceo_age,ceo_tenure,competitor_asset_ids,consolidated_net_income,consolidated_net_income_eur,consolidated_net_income_year,currency,currency_to_eur,customer_base,debt,debt_eur,debt_year,earnings_per_share,earnings_per_share_eur,earnings_per_share_year,ebit,ebit_eur,ebit_year,ebitda,ebitda_eur,ebitda_is_ai_generated,ebitda_minus_capex,ebitda_minus_capex_eur,ebitda_minus_capex_year,ebitda_pct_revenue,ebitda_with_ai_generated,ebitda_with_ai_generated_eur,ebitda_year,enterprise_value,enterprise_value_eur,esg,esg_outperformer,financials_at,free_cash_flow,free_cash_flow_eur,free_cash_flow_year,fte,fte_range,fte_year,gain_pro_url,gross_margin,gross_margin_eur,gross_margin_year,headquarters_city,headquarters_country_code,headquarters_region,inventory,inventory_eur,inventory_year,last_deal_month,last_deal_year,latest_deal_post_money_valuation_eur,latest_deal_post_money_valuation_year,latest_deal_pre_money_valuation_eur,latest_deal_pre_money_valuation_year,latest_deal_round_size_eur,latest_deal_round_type,latest_deal_round_year,latest_industry_rating_environmental,latest_industry_rating_overall,latest_industry_rating_social,latest_share_price,latest_share_price_date,latest_share_price_eur,linkedin_external_id,managers_linked_in_urls,market_capitalization,market_capitalization_eur,net_debt,net_debt_ebitda_ratio,net_debt_ebitda_ratio_year,net_debt_eur,net_debt_year,next_year_predicted_ev,...,fte_chg_2023_2018,fte_chg_2022_2019,fte_chg_2023_2020,fte_chg_2023_2019,fte_chg_2023_2021,fte_chg_2024_2019,EBITDA_Margin_2013,EBITDA_Margin_2014,EBITDA_Margin_2015,EBITDA_Margin_2016,EBITDA_Margin_2017,EBITDA_Margin_2018,EBITDA_Margin_2019,EBITDA_Margin_2020,EBITDA_Margin_2021,EBITDA_Margin_2022,EBITDA_Margin_2023,EBITDA_Margin_2024,fte_range_avg,estimated_fte,estimated_revenues,estimated_revenues_calc_eur,estimated_ebitda,revenue_growth_oneyear,revenue_growth_twoyears,revenue_growth_threeyears,revenue_growth_threemonths,revenue_growth_sixmonths,ebitda_growth_oneyear,ebitda_growth_twoyears,ebitda_growth_threeyears,ebitda_growth_threemonths,ebitda_growth_sixmonths,ebit_growth_oneyear,ebit_growth_twoyears,ebit_growth_threeyears,ebit_growth_threemonths,ebit_growth_sixmonths,fte_growth_oneyear,fte_growth_twoyears,fte_growth_threeyears,fte_growth_threemonths,fte_growth_sixmonths,gross_margin_growth_oneyear,gross_margin_growth_twoyears,gross_margin_growth_threeyears,gross_margin_growth_threemonths,gross_margin_growth_sixmonths,revenue_range,ebitda_range,add_on_band,cnt,count_owners,company_age,ebitda_eur_pos,majority_owner_count,minority_owner_count,total_ownership_pct,majority_pct,minority_pct,ownership_pct,buyer_share_pcts,majority,median_ebitda_pos,ebitda_eur_pos_fill_na,predicted_exit_multiple_fill_na,revenue_eur_fill_na,subsector_margin,subsector_multiple,linked_asset_id,ev_eur_last_deal,ev_year_last_deal,ev_ebitda_multiple,equity_eur_last_deal,equity_year_last_deal,deals_ev_eur_array,deals_ev_eur_max,deals_ev_years_array,deals_equity_eur_array,deals_equity_eur_max,ebitda_eur_last_deal,ebitda_year_last_deal,revenue_eur_last_deal,revenue_year_last_deal,estd_ev_managed,verify_historical_estd_ev_managed,delta_with_verify,years_since_last_deal,fte_multiplier,revenue_multiplier,asset_name_override,investor_name_override,last_deal_year_override,ownership_pct_override,predicted_exit_multiple_fill_na_override,ev_eur_last_deal_override,ev_year_last_deal_override,override,notes,notes_detail
3,1,3i,London,United Kingdom,Europe,UK&I,652.3,200,Action,consumer,retail,regular,0.0,0.0,"[\n ""retail""\n]",360.0,360.0,2024.0,-105.0,-105.0,2024.0,755.0,,,755.0,2024.0,45.0,3.0,"[\n 17436,\n 240,\n 16571,\n 16464,\n 164...",,,,EUR,1.0,"[\n ""businessToConsumer""\n]",2396.0,2396.0,2024.0,,,,1879.0,1879.0,2024.0,2440.0,2440.0,False,2080.0,2080.0,2024.0,17.71,2440.0,2440.0,2024.0,,,,False,2024-05-13 15:59:50.564,,,,79681.0,"10,001+",2024.0,https://app.gain.pro/asset/200/action,4558.0,4558.0,2024.0,Zwaagdijk,NL,Noord-Holland,1567.0,1567.0,2024.0,11.0,2019.0,,,,,,,,3.0,3.5,4.0,,,,623842.0,"[\n ""https://www.linkedin.com/in/hajir-hajji-...",,,1641.0,0.67,2024.0,1641.0,2024.0,52710.0,...,0.3,0.4,0.42,0.33,0.54,0.29,0.11,0.11,0.11,0.12,0.11,0.11,0.14,0.14,0.15,0.16,0.17,0.18,,79681.0,13781.0,13781.0,2440.0,21.7,24.72,26.38,,,26.03,29.95,32.5,,,30.4,36.46,40.95,,,15.41,15.19,40.06,5.44,12.63,22.46,26.29,27.5,,,4_mega_large_gt_1bn_eur,4_mega_large_gt_200m_eur,"(-1, 0]",1.0,2.0,32.0,2440.0,1,1,0.9,0.6,0.3,0.6,[0.8],1,37.01,2440.0,15.0,13781.0,0.07,7.51,200.0,10250.0,2019.0,18.95,,,"[10250.0, 650.0]",10250.0,"[2019.0, 2011.0]","[nan, nan]",,541.0,,5114.0,,21960.0,6150.0,0.72,5.0,2.0,2.0,Action,3i,2019.0,0.6,15.0,,,0.0,No change,
16,1,3i,London,United Kingdom,Europe,UK&I,652.3,5259,TCR,other,infrastructure,regular,1.0,1.0,"[\n ""services""\n]",68.37,68.37,2016.0,3.43,3.43,2016.0,17.37,,,17.37,2016.0,,0.0,"[\n 1411545,\n 1461652,\n 1556962,\n 6294,...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",186.29,186.29,2016.0,,,,26.54,26.54,2016.0,165.0,165.0,False,-11.28,-11.28,2016.0,66.0,165.0,165.0,2023.0,,,,False,2022-06-23 06:35:24.356,,,,1500.0,"1,001-5,000",2025.0,https://app.gain.pro/asset/5259/tcr,164.36,164.36,2018.0,Steenokkerzeel,BE,Vlaams-Brabant,6.39,6.39,2016.0,6.0,2022.0,,,,,,,,,,,,,,848145.0,"[\n ""https://www.linkedin.com/in/jason-w-b080...",,,168.93,2.96,2016.0,168.93,2016.0,2420.0,...,0.08,0.01,,0.06,,,,0.41,0.41,0.37,0.4,0.4,0.4,,,,0.66,,,1500.0,250.0,250.0,165.0,,,,,,,,,,,5.54,13.66,,,,12.84,0.0,7.72,3.12,5.72,12.1,15.95,17.23,,,3_large_250_1000m_eur,3_large_50_200m_eur,"(0, 2]",1.0,1.0,41.0,165.0,1,0,0.9,0.9,0.0,0.9,"[0.48, 0.46]",1,37.01,165.0,12.0,250.0,0.3,10.86,5259.0,804.91,2022.0,,804.91,2022.0,"[nan, 650.0, nan, nan]",650.0,"[nan, 2016.0, nan, nan]","[804.9, nan, nan, nan]",804.9,165.0,2023.0,250.0,2023.0,1782.0,724.42,0.59,2.0,1.0,,,,,,,,,,,
4,1,3i,London,United Kingdom,Europe,UK&I,652.3,198,Royal Sanders,consumer,consumerGoods,regular,2.0,5.0,"[\n ""manufacturing"",\n ""retail""\n]",13.54,13.54,2024.0,57.32,57.32,2024.0,34.0,,,34.0,2024.0,54.0,18.0,"[\n 31181,\n 714961,\n 13974,\n 14081,\n ...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",404.45,404.45,2024.0,,,,109.25,109.25,2024.0,119.76,119.76,False,106.21,106.21,2024.0,23.16,119.76,119.76,2024.0,,,,False,2025-01-07 13:34:08.908,,,,1176.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/198/royal-sanders,181.33,181.33,2024.0,Vlijmen,NL,Noord-Brabant,52.56,52.56,2024.0,2.0,2018.0,,,,,,,,2.0,3.0,4.0,,,,77181.0,"[\n ""https://www.linkedin.com/in/pieter-a-de-...",,,370.45,3.09,2024.0,370.45,2024.0,2580.0,...,,0.2,0.15,0.14,0.05,0.14,0.15,0.19,,0.19,0.2,,0.16,0.18,0.24,0.17,0.17,0.23,,1176.0,517.07,517.07,119.76,19.13,25.7,22.02,,,63.01,45.48,20.64,,,71.4,49.83,20.8,,,12.21,4.4,7.15,0.49,7.85,48.89,38.59,22.77,,,3_large_250_1000m_eur,3_large_50_200m_eur,"(2, 5]",1.0,1.0,174.0,119.76,1,0,0.9,0.9,0.0,0.9,,1,37.01,119.76,15.0,517.07,0.1,10.15,198.0,,,,,,"[nan, nan, nan]",,"[nan, nan, nan]","[nan, nan, nan]",,25.21,2017.0,127.0,,1616.72,,,,,,,,,,,,,,,
12,1,3i,London,United Kingdom,Europe,UK&I,652.3,31228,Scandlines,services,logistics,regular,0.0,0.0,"[\n ""services"",\n ""retail""\n]",41.9,41.9,2024.0,39.4,39.4,2024.0,58.3,,,58.3,2024.0,,1.0,"[\n 1317268,\n 1295246,\n 3312478,\n 16726...",,,,EUR,1.0,"[\n ""businessToBusiness"",\n ""businessToConsu...",872.6,872.6,2024.0,,,,148.4,148.4,2024.0,181.0,181.0,False,139.1,139.1,2024.0,38.61,181.0,181.0,2024.0,,,Scandlines attempts to minimize its environmen...,True,2025-05-15 23:38:59.679,,,,1377.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/31228/scandlines,373.9,373.9,2024.0,København,DK,,21.2,21.2,2024.0,3.0,2018.0,,,,,,,,,,,,,,457559.0,"[\n ""https://www.linkedin.com/in/eric-gr%C3%A...",,,814.3,4.5,2024.0,814.3,2024.0,3440.0,...,-0.01,-0.03,0.02,-0.01,0.08,-0.02,,,,,0.39,0.39,0.39,0.3,0.38,0.4,0.38,0.39,,1377.0,468.8,468.8,181.0,-0.55,0.3,11.68,,,1.12,-1.09,12.33,,,2.27,-0.4,20.18,,,-5.23,-0.51,3.64,1.32,6.66,0.43,1.59,11.22,,,3_large_250_1000m_eur,3_large_50_200m_eur,"(-1, 0]",1.0,2.0,27.0,181.0,0,2,0.9,0.0,0.45,0.45,"[0.49, 0.1, 0.4]",0,37.01,181.0,15.0,468.8,0.11,9.88,31228.0,2560.0,2018.0,13.4,1700.0,2018.0,"[2560.0, nan, nan, nan]",2560.0,"[2018.0, nan, nan, nan]","[1700.0, nan, nan, nan]",1700.0,191.0,,493.6,,1221.75,1152.0,0.06,6.0,1.24,1.94,,,,,,,,,,
11,1,3i,London,United Kingdom,Europe,UK&I,652.3,6704,Esvagt,services,logistics,regular,0.0,0.0,"[\n ""services""\n]",397.6,53.27,2024.0,129.35,17.33,2024.0,138.97,,,18.62,2024.0,68.0,7.0,"[\n 1289,\n 424316,\n 1023826,\n 1023363,\...",,,,DKK,0.13,"[\n ""businessToBusiness""\n]",3432.78,459.9,2024.0,,,,168.52,22.58,2024.0,490.0,65.65,False,92.4,12.38,2024.0,33.27,490.0,65.65,2024.0,,,,False,2025-07-18 02:54:27.871,,,,1064.0,"1,001-5,000",2024.0,https://app.gain.pro/asset/6704/esvagt,1067.85,143.06,2024.0,Esbjerg,DK,,13.76,1.84,2024.0,5.0,2022.0,,,,,,,,,,,,,,800367.0,"[\n ""https://www.linkedin.com/in/søren-karas-...",,,3293.82,6.72,2024.0,441.29,2024.0,8060.0,...,0.02,0.01,0.02,0.02,0.05,0.03,,,,0.41,0.37,0.31,0.35,0.33,0.34,0.3,0.34,0.33,,1064.0,1472.73,197.31,490.0,2.26,3.71,10.84,,,-1.21,8.96,10.42,,,-5.96,26.2,24.08,,,6.93,6.56,5.31,1.53,2.87,6.32,8.44,12.57,,,2_medium_50_250m_eur,3_large_50_200m_eur,"(-1, 0]",1.0,1.0,44.0,65.65,1,0,0.9,0.9,0.0,0.9,"[0.5, 0.5]",1,37.01,65.65,13.0,197.31,0.11,9.88,6704.0,622.39,2022.0,,622.39,2022.0,"[nan, 804.0, 549.4]",804.0,"[nan, 2021.0, 2015.0]","[622.4, nan, nan]",622.4,48.76,,144.9,,768.07,560.15,0.27,2.0,1.14,1.08,,,,,,,,,,
31,1,3i,London,United Kingdom,Europe,UK&I,652.3,38,European Bakery Group,consumer,food,regular,2.0,3.0,"[\n ""manufacturing"",\n ""distribution""\n]",15.02,15.02,2024.0,27.23,27.23,2024.0,16.51,,,16.51,2024.0,53.0,5.0,"[\n 617,\n 591,\n 16849,\n 13958,\n 8577,...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",144.54,144.54,2024.0,,,,36.89,36.89,2024.0,56.62,56.62,False,41.6,41.6,2024.0,14.14,56.62,56.62,2024.0,,,,False,2022-02-15 17:04:16.668,,,,1277.3,"1,001-5,000",2024.0,https://app.gain.pro/asset/38/european-bakery-...,225.88,225.88,2024.0,Tilburg,NL,Noord-Brabant,23.16,23.16,2024.0,9.0,2021.0,,,,,,,,1.5,2.75,4.0,,,,101865670.0,"[\n ""https://www.linkedin.com/in/raoulvorage""...",,,128.04,2.26,2024.0,128.04,2024.0,1230.0,...,0.31,0.01,0.54,0.38,0.83,0.31,,,,,,0.02,0.05,0.09,0.12,0.2,0.15,0.14,,1277.3,400.37,400.37,56.62,64.12,85.52,54.05,,,58.35,55.35,64.25,,,35.86,55.15,82.04,,,4.71,92.61,52.13,0.0,0.0,75.06,92.24,55.1,,,3_large_250_1000m_eur,3_large_50_200m_eur,"(2, 5]",1.0,1.0,2.0,56.62,1,0,0.9,0.9,0.0,0.9,,1,37.01,56.62,15.0,400.37,0.1,8.76,38.0,,,,,,"[nan, nan, nan]",,"[nan, nan, nan]","[nan, nan, nan]",,9.5,,103.0,,764.36,,,,,,,,,,,,,,,
13,1,3i,London,United Kingdom,Europe,UK&I,652.3,6580,Evernex,tmt,technology,regular,2.0,4.0,"[\n ""services""\n]",,,,,,,,,,,,,2.0,"[\n 1048413,\n 999760,\n 2623,\n 9249,\n ...",,,,EUR,1.0,"[\n ""businessToBusiness""\n]",,,,,,,,,,90.0,90.0,False,,,,,90.0,90.0,2025.0,,,,False,2021-02-11 19:45:36.839,,,,1000.0,"501-1,000",2021.0,https://app.gain.pro/asset/6580/evernex,,,,Courbevoie,FR,Île-de-France,,,,6.0,2020.0,,,,,,,,,,,,,,697726.0,"[\n ""https://www.linkedin.com/in/vianney-du-p...",,,,,,,,1210.0,...,,,,,,,,,,,0.11,,,,,,,,,1000.0,183.09,183.09,90.0,-1.8,1.99,4.38,,,,,,,,,,,,,4.11,14.2,19.52,0.1,1.02,,,,,,2_medium_50_250m_eur,3_large_50_200m_eur,"(2, 5]",1.0,2.0,42.0,90.0,1,1,0.9,0.6,0.3,0.6,,1,37.01,90.0,12.59,183.09,0.14,12.59,6580.0,,,,,,"[nan, nan, 400.0, 137.0, 70.0, 40.0]",400.0,"[nan, nan, 2019.0, 2015.0, 2013.0, 2011.0]","[nan, nan, nan, nan, nan, nan]",,90.0,,,,680.12,,,,,,,,,,,,,,,
18,1,3i,London,United Kingdom,Europe,UK&I,652.3,5349,East Surrey Pipelines,other,infrastructure,regular,0.0,0.0,"[\n ""services"",\n ""engineering""\n]",52.17,60.28,2023.0,-5.99,-6.92,2023.0,58.19,,,67.23,2023.0,49.0,6.0,"[\n 1833855,\n 793021,\n 1936747,\n 959631...",,,,GBP,1.16,"[\n ""businessToBusiness"",\n ""businessToConsu...",696.43,804.59,2023.0,,,,41.8,48.29,2023.0,53.77,62.12,False,1.59,1.84,2023.0,43.59,53.77,62.12,2023.0,,,,False,2024-09-30 00:00:00.000,,,,126.0,51-200,2023.0,https://app.gain.pro/asset/5349/east-surrey-pi...,61.89,71.5,2023.0,Leatherhead,GB,,,,,12.0,2016.0,,,,,,,,,,,,,,1877255.0,"[\n ""https://www.linkedin.com/in/kevin-o-conn...",,,638.24,11.87,2023.0,737.36,2023.0,1160.0,...,0.1,0.13,0.13,0.11,0.1,,,,0.65,0.63,0.59,0.58,0.54,0.51,0.47,0.41,0.44,,125.5,126.0,123.34,142.5,53.77,13.14,17.96,16.38,,,20.31,13.29,10.64,,,24.0,14.98,12.01,,,4.13,9.54,12.71,-3.65,-5.04,19.58,13.54,12.24,,,2_medium_50_250m_eur,3_large_50_200m_eur,"(-1, 0]",1.0,1.0,25.0,62.12,1,0,0.9,0.9,0.0,0.9,,1,37.01,62.12,12.0,142.5,0.3,10.86,5349.0,260.21,2006.0,,,,"[260.2, 523.9]",523.9,"[2006.0, 2005.0]","[nan, nan]",,,,,,670.87,234.19,0.65,18.0,,,East Surrey Pipelines,3i,2016.0,0.9,12.0,,,1.0,Change multiple,
27,1,3i,London,United Kingdom,Europe,UK&I,652.3,5441,Infinis,materialsEnergy,energy,regular,0.0,0.0,"[\n ""services""\n]",43.12,49.82,2024.0,0.66,0.77,2024.0,6.59,,,7.61,2024.0,56.0,8.0,"[\n 1105624,\n 1157505,\n 31167,\n 132017,...",,,,GBP,1.16,"[\n ""businessToBusiness""\n]",437.56,505.52,2024.0,,,,25.84,29.85,2024.0,67.4,77.87,False,24.28,28.05,2024.0,46.69,67.4,77.87,2024.0,,,,False,2024-09-03 12:33:30.006,,,,292.0,201-500,2024.0,https://app.gain.pro/asset/5441/infinis,57.7,66.67,2024.0,Northampton,GB,,6.22,7.19,2024.0,10.0,2016.0,,,,,,,,,,,,,,225626.0,"[\n ""https://www.linkedin.com/in/shane-picker...",,,430.97,6.39,2024.0,497.91,2024.0,400.0,...,0.0,-0.04,-0.02,-0.03,-0.01,-0.01,,,,0.65,0.63,0.52,0.48,0.48,0.5,0.46,0.48,0.47,350.5,292.0,144.34,166.76,67.4,-9.62,-3.5,-0.98,,,-12.58,-2.94,-3.14,,,-26.22,-2.93,4.17,,,5.8,3.99,1.29,2.55,2.93,-14.33,1.12,0.23,,,2_medium_50_250m_eur,3_large_50_200m_eur,"(-1, 0]",1.0,1.0,19.0,77.87,1,0,0.9,0.9,0.0,0.9,[1.0],1,37.01,77.87,6.0,166.76,0.12,8.97,5441.0,213.95,2016.0,,213.95,2016.0,"[nan, nan]",,"[nan, nan]","[213.9, nan]",213.9,112.41,2016.0,172.37,2016.0,420.49,192.55,0.54,8.0,,,,,,,,,,,,
14,1,3i,London,United Kingdom,Europe,UK&I,652.3,732399,Global Cloud Xchange,tmt,technology,regular,0.0,0.0,"[\n ""services"",\n ""engineering""\n]",21.93,18.76,2022.0,46.41,39.7,2022.0,32.13,,,27.48,2022.0,72.0,5.0,"[\n 775371,\n 776249,\n 4585062,\n 3970338...",,,,USD,0.86,"[\n ""businessToBusiness""\n]",230.22,196.95,2022.0,,,,-3.69,-3.16,2022.0,43.26,37.01,False,21.33,18.25,2022.0,18.0,43.26,37.01,2022.0,,,,False,2023-01-07 00:00:00.000,,,,880.0,"501-1,000",2024.0,https://app.gain.pro/asset/732399/global-cloud...,160.35,137.18,2022.0,Hounslow,GB,,,,,9.0,2021.0,,,,,,,,,,,,,,237331.0,"[\n ""https://www.linkedin.com/in/carl-grivner...",,,198.09,4.58,2022.0,169.47,2022.0,430.0,...,,,,,,,,,,,,,,,0.08,0.18,,,,880.0,240.31,205.59,43.26,2.39,,-13.71,,,130.74,,,,,,,,,,-3.3,-2.9,-2.37,-3.49,-8.73,4.13,,,,,2_medium_50_250m_eur,2_medium_10_50m_eur,"(-1, 0]",1.0,1.0,37.0,37.01,1,0,0.9,0.9,0.0,0.9,[1.0],1,37.01,37.01,10.0,205.59,0.14,12.59,732399.0,448.45,2021.0,27.83,330.21,2022.0,"[448.5, 181.3, nan]",448.5,"[2021.0, 2003.0, nan]","[330.2, nan, nan]",330.2,16.12,,205.57,,403.61,403.61,-0.21,3.0,0.93,0.64,,,,,,,,,,


### Europe 250

In [290]:
investor_ranking = investors_mm_europe.groupby(
    ['investor_id']).agg(
    name=('investor_name', 'first'),
    count=('investor_id', 'count'),
    investor_hq_city=('investor_hq_city', 'first'),
    investor_country_name=('investor_country_name', 'first'),
    investor_region=('investor_region', 'first'),
    investor_sub_region=('investor_sub_region', 'first'),
    funds_raised_last_five_years_eur=('funds_raised_last_five_years_eur', 'first'),
    mean_ebitda_pos = ('ebitda_eur_pos','mean'),
    median_ebitda_pos = ('ebitda_eur_pos','median'),
    count_ebitda_pos = ('ebitda_eur_pos','count'),
    estd_ev_managed = ('estd_ev_managed','sum')
)

In [291]:
df_summary = investors_mm_europe.copy()

df_summary['total_estd_ev_managed'] = df_summary.groupby('investor_id')['estd_ev_managed'].transform('sum')
df_summary['rank'] = df_summary.groupby('investor_id')['estd_ev_managed'].rank(method='first', ascending=False)
df_summary['asset_percentage'] = (df_summary['estd_ev_managed'] / df_summary['total_estd_ev_managed']) * 100
df_summary = df_summary.sort_values(['investor_id', 'rank'])
df_summary = df_summary[df_summary['rank'] <= 10].copy()

df_summary['owner_shares'] = df_summary['owner_shares'].apply(lambda x: 'MINORITY' if x == 'minority' else x)

df_summary['above_25_pct_flag'] = df_summary['asset_percentage'] > 25
df_summary['above_50_pct_flag'] = df_summary['asset_percentage'] > 50

df_summary['above_25_pct_flag'] = df_summary['above_25_pct_flag'].astype(int)
df_summary['above_50_pct_flag'] = df_summary['above_50_pct_flag'].astype(int)

df_summary['top_5_info'] = df_summary.apply(
    lambda row: f"{row['asset_name']} ({row['asset_percentage']:.1f}% | {row['owner_shares']} | {row['estd_ev_managed']:,.0f})", axis=1
)


# df_summary['is_infrastructure'] = (df_summary['subsector'] == 'infrastructure').astype(int)

# Group by investor_id and aggregate both top_5_info and the flags


df_summary = df_summary.groupby('investor_id').agg({
    'top_5_info': lambda x: ', '.join(x),
    'above_25_pct_flag': 'max',  # Keep the maximum value of flag_25
    'above_50_pct_flag': 'max',
    'owner_shares': 'first'
}).reset_index()

df_summary['largest_holding_is_minority'] = (df_summary['owner_shares'] == 'MINORITY').astype(int)
df_summary = df_summary.drop(columns=['owner_shares'])


df_summary

Unnamed: 0,investor_id,top_5_info,above_25_pct_flag,above_50_pct_flag,largest_holding_is_minority
0,1,"Action (65.2% | majority | 21,960), TCR (5.3% ...",1,1,0
1,2,"Corendon (46.4% | MINORITY | 44), De IJsvogel ...",1,0,1
2,3,"Oogwereld Groep (58.7% | majority | 126), Lubb...",1,1,0
3,4,"Yielder Group (17.5% | majority | 404), Open L...",0,0,0
4,5,"CleanLease (66.9% | majority | 373), Codi Grou...",1,1,0
...,...,...,...,...,...
2948,20997,Dourogás GNV (100.0% | MINORITY | 9),1,1,1
2949,21063,FeelEverywhere (100.0% | majority | 7),1,1,0
2950,21085,Questback (100.0% | majority | 26),1,1,0
2951,21283,Fabergé (100.0% | majority | 39),1,1,0


In [292]:
df_summary[df_summary['investor_id']==160]

Unnamed: 0,investor_id,top_5_info,above_25_pct_flag,above_50_pct_flag,largest_holding_is_minority
137,160,"Mundys (29.5% | MINORITY | 20,300), iQ Student...",1,0,1


In [293]:

investor_ranking = pd.merge(investor_ranking,df_summary,on = ['investor_id'], how = 'left')

In [294]:
df_summary = investors_mm_europe.copy()

def weighted_avg(group):
    return (group['ebitda_eur_pos_fill_na'] * group['ownership_pct']).sum() / group['ownership_pct'].sum()

# Apply the function to each group
investor_level_avg = df_summary.groupby('investor_id').apply(weighted_avg).reset_index(name='weighted_avg_ebitda')

investor_level_avg

  investor_level_avg = df_summary.groupby('investor_id').apply(weighted_avg).reset_index(name='weighted_avg_ebitda')


Unnamed: 0,investor_id,weighted_avg_ebitda
0,1,100.09
1,2,4.40
2,3,5.85
3,4,7.02
4,5,10.42
...,...,...
2951,20997,2.95
2952,21063,1.48
2953,21085,2.44
2954,21283,1.80


In [295]:
investor_ranking_summary = investor_ranking.sort_values(by = 'estd_ev_managed',ascending = False)

In [296]:
investor_ranking_summary = pd.merge(investor_ranking_summary, entries_exits_count_EU, how='left', on = 'investor_id')

In [297]:
#investor_ranking_summary['is_infrastructure'] = investor_ranking_summary['is_infrastructure']/investor_ranking_summary['count']

In [298]:
europe_all = investor_ranking_summary[
                (investor_ranking_summary['count']>=5) & 
                (investor_ranking_summary['count_ebitda_pos']>=3) &
                (investor_ranking_summary['entries_count']>=5)]

europe_all = europe_all.copy()
europe_all[['entries_count','exits_count']] = europe_all[['entries_count', 'exits_count']].fillna(0)

In [299]:
europe_all['funds_raised_last_five_years_eur'] = europe_all['funds_raised_last_five_years_eur'].replace(0, "-")


In [300]:
europe_all = europe_all.copy()
europe_all['rank'] = europe_all['estd_ev_managed'].rank(method='first', ascending=False)

In [301]:
europe_all = europe_all.reset_index(drop=True)

In [302]:
europe_500 = europe_all.head(500)
europe_250 = europe_all.head(250)

In [303]:
europe_500[['investor_id', 'name']].to_clipboard(index=False)

In [310]:
replicator(europe_250)

HBox(children=(Button(description='Copy', style=ButtonStyle()), Dropdown(options=('Full DataFrame/Series', 'Ex…

Unnamed: 0,investor_id,name,count,investor_hq_city,investor_country_name,investor_region,investor_sub_region,funds_raised_last_five_years_eur,mean_ebitda_pos,median_ebitda_pos,count_ebitda_pos,estd_ev_managed,top_5_info,above_25_pct_flag,above_50_pct_flag,largest_holding_is_minority,region,entries_count,exits_count,rank
0,48,KKR,79,Manhattan,United States of America,North America,US,98858.69,197.42,112.97,63,93329.76,"FiberCop (9.2% | MINORITY | 8,550), ContourGlo...",0.00,0.00,1.00,Europe,90.00,39.00,1.00
1,23,CVC Capital Partners,98,London,United Kingdom,Europe,UK&I,50678.00,129.22,58.68,82,78384.50,"Multiversity Group (5.2% | majority | 4,104), ...",0.00,0.00,0.00,Europe,91.00,38.00,2.00
2,27,EQT,67,Stockholm,Sweden,Europe,Nordics,91867.93,161.31,94.20,50,75152.79,"IVC Evidensia (14.4% | majority | 10,841), IFS...",0.00,0.00,0.00,Europe,76.00,73.00,3.00
3,160,Blackstone,48,Manhattan,United States of America,North America,US,140455.62,276.15,79.86,38,68847.24,"Mundys (29.5% | MINORITY | 20,300), iQ Student...",1.00,0.00,1.00,Europe,45.00,15.00,4.00
4,135,GIC,23,Singapore,Singapore,Asia,Asia,-,453.44,357.81,17,61078.96,"The Citco Group (63.3% | MINORITY | 38,670), V...",1.00,1.00,1.00,Europe,28.00,12.00,5.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,32,G Square,15,London,United Kingdom,Europe,UK&I,1000.00,12.75,7.07,12,1328.94,"Keys Group (37.9% | majority | 503), 3B Scient...",1.00,0.00,0.00,Europe,9.00,3.00,246.00
246,786,Orlando Capital,14,München,Germany,Europe,DACH,255.00,17.88,15.35,8,1325.84,"Westhouse Group (16.7% | majority | 221), Well...",0.00,0.00,0.00,Europe,13.00,3.00,247.00
247,1984,Innova Capital,16,Warszawa,Poland,Europe,CEE,407.00,9.98,8.40,12,1320.71,ProService FinTeco Group (17.0% | majority | 2...,0.00,0.00,0.00,Europe,15.00,7.00,248.00
248,418,NORD Holding,18,Hanover,Germany,Europe,DACH,800.00,10.93,5.98,13,1319.76,all inclusive Fitness Group (42.3% | majority ...,1.00,0.00,0.00,Europe,18.00,10.00,249.00


# REGIONAL RANKING

In [305]:
investor_ranking_region = investors_mm_europe.groupby(
    ['sub_region','investor_id'], as_index=False).agg(
     name=('investor_name', 'first'),
    count=('investor_id', 'count'),
    investor_hq_city=('investor_hq_city', 'first'),
    investor_country_name=('investor_country_name', 'first'),
    investor_region=('investor_region', 'first'),
    investor_sub_region=('investor_sub_region', 'first'),
    funds_raised_last_five_years_eur=('funds_raised_last_five_years_eur', 'first'),
    mean_ebitda_pos = ('ebitda_eur_pos','mean'),
    median_ebitda_pos = ('ebitda_eur_pos','median'),
    count_ebitda_pos = ('ebitda_eur_pos','count'),
    estd_ev_managed = ('estd_ev_managed','sum')
)


In [306]:

df_summary = investors_mm_europe.copy()
df_summary['total_estd_ev_managed'] = df_summary.groupby(['investor_id', 'sub_region'])['estd_ev_managed'].transform('sum')
df_summary['rank'] = df_summary.groupby(['investor_id', 'sub_region'])['estd_ev_managed'].rank(method='first', ascending=False)
df_summary['asset_percentage'] = (df_summary['estd_ev_managed'] / df_summary['total_estd_ev_managed']) * 100
df_summary = df_summary.sort_values(['investor_id', 'sub_region', 'rank'])
df_summary = df_summary[df_summary['rank'] <= 10].copy()

df_summary['owner_shares'] = df_summary['owner_shares'].apply(lambda x: 'MINORITY' if x == 'minority' else x)

df_summary['top_5_info'] = df_summary.apply(
    lambda row: f"{row['asset_name']} ({row['asset_percentage']:.1f}% | {row['owner_shares']} | {row['estd_ev_managed']:,.0f})", axis=1
)
df_summary = df_summary.groupby(['investor_id', 'sub_region'])['top_5_info'] \
                      .apply(lambda x: ', '.join(x)) \
                      .reset_index()

df_summary


Unnamed: 0,investor_id,sub_region,top_5_info
0,1,Benelux,"Action (82.5% | majority | 21,960), TCR (6.7% ..."
1,1,DACH,"Formel D Group (31.1% | majority | 300), DNS:N..."
2,1,France,"Evernex (59.4% | majority | 680), Ionisos Grou..."
3,1,Nordics,"Scandlines (46.8% | MINORITY | 1,222), Esvagt ..."
4,1,UK&I,East Surrey Pipelines (28.7% | majority | 671)...
...,...,...,...
4439,20997,Iberia,Dourogás GNV (100.0% | MINORITY | 9)
4440,21063,Iberia,FeelEverywhere (100.0% | majority | 7)
4441,21085,Nordics,Questback (100.0% | majority | 26)
4442,21283,UK&I,Fabergé (100.0% | majority | 39)


In [307]:
df_summary[df_summary['investor_id']==223]

Unnamed: 0,investor_id,sub_region,top_5_info
565,223,DACH,Kalle Group (100.0% | majority | 428)
566,223,France,"Opella (69.2% | majority | 8,000), BUT (19.5% ..."
567,223,UK&I,"Morrisons (37.3% | majority | 7,390), Motor Fu..."


In [308]:
investor_ranking_region = pd.merge(investor_ranking_region,df_summary,on = ['investor_id','sub_region'], how = 'left')

In [311]:
investor_ranking_summary_region = pd.merge(investor_ranking_region, entries_exits_count_sub_region, on=['sub_region', 'investor_id'],how='left', suffixes=('', '_y'))
investor_ranking_summary_region.drop(investor_ranking_summary_region.filter(regex='_y$').columns, axis=1, inplace=True)

In [312]:
investor_ranking_summary_region = investor_ranking_summary_region.sort_values(by = ['sub_region','estd_ev_managed'],ascending = False)

### Top 50

In [None]:
#Extra Entries condition for individual rankings
region = investor_ranking_summary_region[
                (investor_ranking_summary_region['count']>=3) & 
                (investor_ranking_summary_region['count_ebitda_pos']>=2) &
                (investor_ranking_summary_region['entries_count']>=2)
]

In [None]:
region_50 =  region.groupby('sub_region',as_index= False).apply(lambda x: x.head(50)).sort_values(by ='estd_ev_managed', ascending = False).reset_index(drop=True)

region_50['rank'] = region_50.groupby('sub_region')['estd_ev_managed'].rank(method='dense', ascending=False)

# Reorder the columns
columns = list(region_50.columns)
columns.insert(0, columns.pop(columns.index('rank')))
region_50 = region_50[columns]

region_50['funds_raised_last_five_years_eur'] = region_50['funds_raised_last_five_years_eur'].replace(0, "-")


In [None]:
investors_mm_europe[investors_mm_europe['asset_id']==1361277][['ownership_pct','estd_ev_managed','ebitda_eur']]

In [None]:
region_50_investors_explode =  investors_mm_europe[(investors_mm_europe['investor_id'].isin(region_50.index))]

### Top 100

In [None]:
region_100 =  region.groupby('sub_region',as_index= False).apply(lambda x: x.head(100)).sort_values(by ='estd_ev_managed', ascending = False).reset_index(drop=True)

region_100['rank'] = region_100.groupby('sub_region')['estd_ev_managed'].rank(method='dense', ascending=False)

# Reorder the columns
columns = list(region_100.columns)
columns.insert(0, columns.pop(columns.index('rank')))
region_100 = region_100[columns]

region_100['funds_raised_last_five_years_eur'] = region_100['funds_raised_last_five_years_eur'].replace(0, "-")


In [None]:
region_100_investors_explode =  investors_mm_europe[(investors_mm_europe['investor_id'].isin(region_100.index))]

# COUNTRY RANKING

In [None]:
investor_ranking_country = investors_mm_europe.groupby(
    ['country_name','investor_id'], as_index=False).agg(
     name=('investor_name', 'first'),
    count=('investor_id', 'count'),
    investor_hq_city=('investor_hq_city', 'first'),
    investor_country_name=('investor_country_name', 'first'),
    investor_region=('investor_region', 'first'),
    investor_sub_region=('investor_sub_region', 'first'),
    funds_raised_last_five_years_eur=('funds_raised_last_five_years_eur', 'first'),
    mean_ebitda_pos = ('ebitda_eur_pos','mean'),
    median_ebitda_pos = ('ebitda_eur_pos','median'),
    count_ebitda_pos = ('ebitda_eur_pos','count'),
    estd_ev_managed = ('estd_ev_managed','sum')
)


In [None]:

df_summary = investors_mm_europe.copy()
df_summary['total_estd_ev_managed'] = df_summary.groupby(['investor_id', 'country_name'])['estd_ev_managed'].transform('sum')
df_summary['rank'] = df_summary.groupby(['investor_id', 'country_name'])['estd_ev_managed'].rank(method='first', ascending=False)
df_summary['asset_percentage'] = (df_summary['estd_ev_managed'] / df_summary['total_estd_ev_managed']) * 100
df_summary = df_summary.sort_values(['investor_id', 'country_name', 'rank'])
df_summary = df_summary[df_summary['rank'] <= 10].copy()

df_summary['owner_shares'] = df_summary['owner_shares'].apply(lambda x: 'MINORITY' if x == 'minority' else x)

df_summary['top_5_info'] = df_summary.apply(
    lambda row: f"{row['asset_name']} ({row['asset_percentage']:.1f}% | {row['owner_shares']} | {row['estd_ev_managed']:,.0f})", axis=1
)
df_summary = df_summary.groupby(['investor_id', 'country_name'])['top_5_info'] \
                      .apply(lambda x: ', '.join(x)) \
                      .reset_index()

df_summary


In [None]:
investor_ranking_country = pd.merge(investor_ranking_country,df_summary,on = ['investor_id','country_name'], how = 'left')

In [None]:
investor_ranking_summary_country = pd.merge(investor_ranking_country, entries_exits_count_country, on=['country_name', 'investor_id'],how='left', suffixes=('', '_y'))
investor_ranking_summary_country.drop(investor_ranking_summary_country.filter(regex='_y$').columns, axis=1, inplace=True)

In [None]:
investor_ranking_summary_country = investor_ranking_summary_country.sort_values(by = ['country_name','estd_ev_managed'],ascending = False)

## Top 50

In [None]:
#Extra Entries condition for individual rankings
country = investor_ranking_summary_country[
                (investor_ranking_summary_country['count']>=3) & 
                (investor_ranking_summary_country['count_ebitda_pos']>=2) &
                (investor_ranking_summary_country['entries_count']>=2)
]

In [None]:
country_50 =  country.groupby('country_name',as_index= False).apply(lambda x: x.head(50)).sort_values(by ='estd_ev_managed', ascending = False).reset_index(drop=True)

country_50['rank'] = country_50.groupby('country_name')['estd_ev_managed'].rank(method='dense', ascending=False)

# Reorder the columns
columns = list(country_50.columns)
columns.insert(0, columns.pop(columns.index('rank')))
country_50 = country_50[columns]

country_50['funds_raised_last_five_years_eur'] = country_50['funds_raised_last_five_years_eur'].replace(0, "-")


In [None]:
country_50_investors_explode = investors_mm_europe[(investors_mm_europe['investor_id'].isin(country_50.index))]

# SECTOR RANKING

In [None]:
investor_ranking_sector = investors_mm_europe.groupby(
    ['sector','investor_id'], as_index=False).agg(
     name=('investor_name', 'first'),
    count=('investor_id', 'count'),
    investor_hq_city=('investor_hq_city', 'first'),
    investor_country_name=('investor_country_name', 'first'),
    investor_region=('investor_region', 'first'),
    investor_sub_region=('investor_sub_region', 'first'),
    funds_raised_last_five_years_eur=('funds_raised_last_five_years_eur', 'first'),
    mean_ebitda_pos = ('ebitda_eur_pos','mean'),
    median_ebitda_pos = ('ebitda_eur_pos','median'),
    count_ebitda_pos = ('ebitda_eur_pos','count'),
    estd_ev_managed = ('estd_ev_managed','sum')
)


In [None]:

df_summary = investors_mm_europe.copy()
df_summary['total_estd_ev_managed'] = df_summary.groupby(['investor_id', 'sector'])['estd_ev_managed'].transform('sum')
df_summary['rank'] = df_summary.groupby(['investor_id', 'sector'])['estd_ev_managed'].rank(method='first', ascending=False)
df_summary['asset_percentage'] = (df_summary['estd_ev_managed'] / df_summary['total_estd_ev_managed']) * 100
df_summary = df_summary.sort_values(['investor_id', 'sector', 'rank'])
df_summary = df_summary[df_summary['rank'] <= 10].copy()

df_summary['owner_shares'] = df_summary['owner_shares'].apply(lambda x: 'MINORITY' if x == 'minority' else x)

df_summary['top_5_info'] = df_summary.apply(
    lambda row: f"{row['asset_name']} ({row['asset_percentage']:.1f}% | {row['owner_shares']} | {row['estd_ev_managed']:,.0f})", axis=1
)
df_summary = df_summary.groupby(['investor_id', 'sector'])['top_5_info'] \
                      .apply(lambda x: ', '.join(x)) \
                      .reset_index()

df_summary


In [None]:
investor_ranking_sector = pd.merge(investor_ranking_sector,df_summary,on = ['investor_id','sector'], how = 'left')

In [None]:
investor_ranking_summary_sector = pd.merge(investor_ranking_sector, entries_exits_count_sector, on=['sector', 'investor_id'],how='left', suffixes=('', '_y'))
investor_ranking_summary_sector.drop(investor_ranking_summary_sector.filter(regex='_y$').columns, axis=1, inplace=True)

In [None]:
investor_ranking_summary_sector = investor_ranking_summary_sector.sort_values(by = ['sector','estd_ev_managed'],ascending = False)

### Top 50

In [None]:
#Extra Entries condition for individual rankings
sector = investor_ranking_summary_sector[
                (investor_ranking_summary_sector['count']>=3) & 
                (investor_ranking_summary_sector['count_ebitda_pos']>=2) &
                (investor_ranking_summary_sector['entries_count']>=2)
]

In [None]:
sector_50 =  sector.groupby('sector',as_index= False).apply(lambda x: x.head(50)).sort_values(by ='estd_ev_managed', ascending = False).reset_index(drop=True)

sector_50['rank'] = sector_50.groupby('sector')['estd_ev_managed'].rank(method='dense', ascending=False)

# Reorder the columns
columns = list(sector_50.columns)
columns.insert(0, columns.pop(columns.index('rank')))
sector_50 = sector_50[columns]

sector_50['funds_raised_last_five_years_eur'] = sector_50['funds_raised_last_five_years_eur'].replace(0, "-")


In [None]:
sector_50_investors_explode =  investors_mm_europe[(investors_mm_europe['investor_id'].isin(sector_50.index))]

### Top 100

In [None]:
sector_100 =  sector.groupby('sector',as_index= False).apply(lambda x: x.head(100)).sort_values(by ='estd_ev_managed', ascending = False).reset_index(drop=True)

sector_100['rank'] = sector_100.groupby('sector')['estd_ev_managed'].rank(method='dense', ascending=False)

# Reorder the columns
columns = list(sector_100.columns)
columns.insert(0, columns.pop(columns.index('rank')))
sector_100 = sector_100[columns]

sector_100['funds_raised_last_five_years_eur'] = sector_100['funds_raised_last_five_years_eur'].replace(0, "-")




In [None]:
sector_100_investors_explode =  investors_mm_europe[(investors_mm_europe['investor_id'].isin(sector_100.index))]