# Import Libraries

In [753]:
import pandas as pd
pd.set_option('display.max_columns', None)

# Importind DataFrames

In [754]:
%store -r all_verticals_deals_df
%store -r contacts_df
%store -r events_df
%store -r private_equity_companies_df

# Mapping to 'Company' Target Data Model

In [755]:
#Create columns for the 'Company' DataFrame
columns = [
    "Id",
    "CompanyName",
    "CompanyTypeId",
    "PrivateEquityCompanySectors",
    "PrivateEquityPortfolioCompanies",
    "VerticalID",
    "SubVerticalID",
    "NumberOfEmployees",
    "LTMRevenue",
    "LTM EBITA",
    "CurrencyId",
    "EnterpriseValue",
    "YearFounded",
    "ParentCompany",
    "AssetsUnderManagement",
    "AddressLine1",
    "AddressLine2",
    "City",
    "PostCode",
    "Country",
    "ContactId",
    "Website",
    "AdditionalDescription",
    "CreatedAt",
    "CreatedBy",
    "UpdatedAt",
    "UpdatedBy",
    "File"
]

# Create an empty DataFrame with the columns
company_df = pd.DataFrame(columns=columns)

In [756]:
# Populate the Company DataFrame with Deal's Company Information
transform_company_df = pd.DataFrame()
transform_company_df['CompanyName'] = all_verticals_deals_df['Company Name']
transform_company_df['CompanyTypeId'] = 'Private/Public'
transform_company_df['LTMRevenue'] = all_verticals_deals_df['LTM Revenue']
transform_company_df['LTM EBITA'] = all_verticals_deals_df['LTM EBITDA']
transform_company_df['EnterpriseValue'] = all_verticals_deals_df['Enterprise Value']
transform_company_df['VerticalID'] = all_verticals_deals_df['Vertical']
transform_company_df['SubVerticalID'] = all_verticals_deals_df['Sub Vertical']
transform_company_df['File'] = "all_verticals_deals"

In [757]:
# Populate the Company DataFrame with Deals Investment Bank
transform_investment_df = pd.DataFrame()
transform_investment_df['CompanyName'] = all_verticals_deals_df['Invest. Bank']
transform_investment_df['CompanyTypeId'] = 'Investment Bank'
transform_investment_df['File'] = "all_verticals_deals"

In [758]:
# Populate the Company DataFrame with Private Equity Companies
transform_pe_df = pd.DataFrame()
transform_pe_df['CompanyName'] = all_verticals_deals_df['Current Owner']
transform_pe_df['CompanyTypeId'] = 'Private Equity'
transform_pe_df['File'] = "all_verticals_deals"

In [759]:
# Populate the Company DataFrame with Investment Bank
transform_investment_contact_df = pd.DataFrame()
transform_investment_contact_df['CompanyName'] = contacts_df['Firm']
transform_investment_contact_df['CompanyTypeId'] = 'Investment Bank'
transform_investment_contact_df['File'] = 'contacts'

In [760]:
#Populate the Company DataFrame with Private Equity Companies
transform_private_equity_df = pd.DataFrame()
transform_private_equity_df['CompanyName'] = private_equity_companies_df['Company Name']
transform_private_equity_df['CompanyTypeId'] = 'Private Equity Company'
transform_private_equity_df['Website'] = private_equity_companies_df['Website']
transform_private_equity_df['CurrencyId'] = 'USD'
transform_private_equity_df['AssetsUnderManagement'] = private_equity_companies_df['AUM\n(Bns)']
transform_private_equity_df['PrivateEquityCompanySectors'] = private_equity_companies_df['Sectors']
transform_private_equity_df['PrivateEquityPortfolioCompanies'] = private_equity_companies_df['Sample Portfolio Companies']
transform_private_equity_df['AdditionalDescription'] = private_equity_companies_df['Comments']
transform_investment_contact_df['File'] = 'private_equity_companies'


In [761]:
# Concatenate all transformed DataFrames into a single DataFrame
all_company_transformed_df = pd.concat([
    transform_company_df,
    transform_investment_df,
    transform_pe_df,
    transform_investment_contact_df,
    transform_private_equity_df
], ignore_index=True)

In [762]:
all_company_transformed_df = all_company_transformed_df.fillna('')

In [763]:
#Display the first few rows of the transformed DataFrame
all_company_transformed_df.head()

Unnamed: 0,CompanyName,CompanyTypeId,LTMRevenue,LTM EBITA,EnterpriseValue,VerticalID,SubVerticalID,File,Website,CurrencyId,AssetsUnderManagement,PrivateEquityCompanySectors,PrivateEquityPortfolioCompanies,AdditionalDescription
0,Shermco,Private/Public,,,267.0,Business Services,"Testing, Inspection & Certificaiton",all_verticals_deals,,,,,,
1,Kastle Systems,Private/Public,,,,Business Services,Facilities Services,all_verticals_deals,,,,,,
2,CLEAResult,Private/Public,,,,Business Services,Facilities Services,all_verticals_deals,,,,,,
3,PLH,Private/Public,,,680.0,Business Services,Industrial & Environmental Services,all_verticals_deals,,,,,,
4,BBB Industries,Private/Public,,,1000.0,Business Services,Specialty Distribution,all_verticals_deals,,,,,,


In [764]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(all_company_transformed_df.columns).issubset(company_df.columns):
    raise ValueError(f"all_company_transformed_df has columns not in company_df: {set(all_company_transformed_df.columns) - set(company_df.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


In [765]:
# Group by 'all_company_transformed_df' and concatenate the results
all_company_df = all_company_transformed_df.groupby('CompanyName').agg(lambda x: ' '.join(x.astype(str))).reset_index()
all_company_df.head()

Unnamed: 0,CompanyName,CompanyTypeId,LTMRevenue,LTM EBITA,EnterpriseValue,VerticalID,SubVerticalID,File,Website,CurrencyId,AssetsUnderManagement,PrivateEquityCompanySectors,PrivateEquityPortfolioCompanies,AdditionalDescription
0,,Private/Public Investment Bank Investment Bank...,...,...,...,Business Services ...,Marketing Services ...,all_verticals_deals all_verticals_deals all_ve...,...,...,...,...,...,...
1,100% employee owned (ESOP),Private Equity,,,,,,all_verticals_deals,,,,,,
2,3i,Private Equity,,,,,,all_verticals_deals,,,,,,
3,5-Hour Energy,Private/Public,,265.0,,Consumer Retail,Food & Beverage,all_verticals_deals,,,,,,
4,A Place for Mom,Private/Public,,,269,Business Services,Marketing Services,all_verticals_deals,,,,,,


In [766]:
# Fill NaN values with empty strings
all_company_df = all_company_df.fillna('')

In [767]:
# Create a new column based on substring matching
all_company_df.loc[all_company_df['CompanyTypeId'].str.contains('Investment Bank', case=False, na=False), 'CompanyTypeId'] = 'Investment Bank'
all_company_df.loc[all_company_df['CompanyTypeId'].str.contains('Private Equity', case=False, na=False), 'CompanyTypeId'] = 'Private Equity'
all_company_df.loc[~all_company_df['CompanyTypeId'].isin(['Investment Bank', 'Private Equity']), 'CompanyTypeId'] = 'Private/Public'


In [768]:
all_company_df['CompanyTypeId'].value_counts()

Private/Public     318
Private Equity     155
Investment Bank    129
Name: CompanyTypeId, dtype: int64

In [769]:
all_company_df['Id'] = ['COM_{:03d}'.format(i) for i in range(1, len(all_company_df) + 1)]

In [770]:
all_company_df

Unnamed: 0,CompanyName,CompanyTypeId,LTMRevenue,LTM EBITA,EnterpriseValue,VerticalID,SubVerticalID,File,Website,CurrencyId,AssetsUnderManagement,PrivateEquityCompanySectors,PrivateEquityPortfolioCompanies,AdditionalDescription,Id
0,,Investment Bank,...,...,...,Business Services ...,Marketing Services ...,all_verticals_deals all_verticals_deals all_ve...,...,...,...,...,...,...,COM_001
1,100% employee owned (ESOP),Private Equity,,,,,,all_verticals_deals,,,,,,,COM_002
2,3i,Private Equity,,,,,,all_verticals_deals,,,,,,,COM_003
3,5-Hour Energy,Private/Public,,265.0,,Consumer Retail,Food & Beverage,all_verticals_deals,,,,,,,COM_004
4,A Place for Mom,Private/Public,,,269,Business Services,Marketing Services,all_verticals_deals,,,,,,,COM_005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,Zoë's Kitchen,Private/Public,,30.8,346.5,Consumer Retail,Restaurants,all_verticals_deals,,,,,,,COM_598
598,iCracked Inc.,Private/Public,,50.0,,Consumer Retail,Restaurants,all_verticals_deals,,,,,,,COM_599
599,iHerb,Private/Public,,,,Consumer Retail,Food & Beverage,all_verticals_deals,,,,,,,COM_600
600,littleBits,Private/Public,,66.3,,Consumer Retail,Food & Beverage,all_verticals_deals,,,,,,,COM_601


In [771]:
# Concatenate result DataFrames with company_df to bring all columns together
company_transformed= pd.concat([
    company_df,
    all_company_df
], ignore_index=True)

In [772]:
# Fill NaN values with empty strings
company_transformed = company_transformed.fillna('')

# Validate Company Data Model

In [773]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(company_df.columns).issubset(company_transformed.columns):
    raise ValueError(f"company_df has columns not in company_transformed: {set(company_df.columns) - set(company_transformed.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


In [774]:
company_transformed = company_transformed.drop('File', axis=1)

In [775]:
company_transformed

Unnamed: 0,Id,CompanyName,CompanyTypeId,PrivateEquityCompanySectors,PrivateEquityPortfolioCompanies,VerticalID,SubVerticalID,NumberOfEmployees,LTMRevenue,LTM EBITA,CurrencyId,EnterpriseValue,YearFounded,ParentCompany,AssetsUnderManagement,AddressLine1,AddressLine2,City,PostCode,Country,ContactId,Website,AdditionalDescription,CreatedAt,CreatedBy,UpdatedAt,UpdatedBy
0,COM_001,,Investment Bank,...,...,Business Services ...,Marketing Services ...,,...,...,...,...,,,...,,,,,,,...,...,,,,
1,COM_002,100% employee owned (ESOP),Private Equity,,,,,,,,,,,,,,,,,,,,,,,,
2,COM_003,3i,Private Equity,,,,,,,,,,,,,,,,,,,,,,,,
3,COM_004,5-Hour Energy,Private/Public,,,Consumer Retail,Food & Beverage,,,265.0,,,,,,,,,,,,,,,,,
4,COM_005,A Place for Mom,Private/Public,,,Business Services,Marketing Services,,,,,269,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,COM_598,Zoë's Kitchen,Private/Public,,,Consumer Retail,Restaurants,,,30.8,,346.5,,,,,,,,,,,,,,,
598,COM_599,iCracked Inc.,Private/Public,,,Consumer Retail,Restaurants,,,50.0,,,,,,,,,,,,,,,,,
599,COM_600,iHerb,Private/Public,,,Consumer Retail,Food & Beverage,,,,,,,,,,,,,,,,,,,,
600,COM_601,littleBits,Private/Public,,,Consumer Retail,Food & Beverage,,,66.3,,,,,,,,,,,,,,,,,
