# Import Libraries

In [121]:
import pandas as pd
pd.set_option('display.max_columns', None)

# Importind DataFrames

In [122]:
%store -r all_verticals_deals_df

# Mapping to 'Deals' Target Data Model

In [123]:
# Define the Deals columns
columns = [
    "Id",
    "DealName",
    "CompanyId",
    "DealDate",
    "InvestmentBankId",
    "SourcingId",
    "TransactionTypeId",
    "EstimateEquityInvestment",
    "SaleDate",
    "SalePrice",
    "SaleType",
    "BuyingSponserId",
    "SellingSponserId",
    "StatusId",
    "PortfolioCompanyStatus",
    "ActiveStagesId",
    "PassedRationale",
    "DealLeadId",
    "AdditionalDescription",
    "CreatedAt",
    "CreatedBy",
    "UpdatedAt",
    "UpdatedBy",
    "File"
]

# Create an empty DataFrame with the columns
deals_df = pd.DataFrame(columns=columns)


In [124]:
# Populate the Contacts DataFrame with Contact Information
transform_deals_df = pd.DataFrame()

transform_deals_df['DealName'] = all_verticals_deals_df['Project Name']
transform_deals_df['CompanyId'] = all_verticals_deals_df['Company Name']
transform_deals_df['DealDate'] = all_verticals_deals_df['Date Added']
transform_deals_df['SourcingId'] = all_verticals_deals_df['Sourcing']
transform_deals_df['InvestmentBankId'] = all_verticals_deals_df['Invest. Bank']
transform_deals_df['TransactionTypeId'] = all_verticals_deals_df['Transaction Type']
transform_deals_df['EstimateEquityInvestment'] = all_verticals_deals_df['Equity Investment Est.']
transform_deals_df['StatusId'] = all_verticals_deals_df['Status']
transform_deals_df['PortfolioCompanyStatus'] = all_verticals_deals_df['Portfolio Company Status']
transform_deals_df['ActiveStagesId'] = all_verticals_deals_df['Active Stage']
transform_deals_df['PassedRationale'] = all_verticals_deals_df['Passed Rationale']
transform_deals_df['DealLeadId'] = all_verticals_deals_df['Lead MD']
transform_deals_df['AdditionalDescription'] = all_verticals_deals_df['Business Description']
transform_deals_df['File'] = "all_verticals_deals_df"

In [125]:
transform_deals_df

Unnamed: 0,DealName,CompanyId,DealDate,SourcingId,InvestmentBankId,TransactionTypeId,EstimateEquityInvestment,StatusId,PortfolioCompanyStatus,ActiveStagesId,PassedRationale,DealLeadId,AdditionalDescription,File
0,,Shermco,2018-02-02 00:00:00,Auction,Harris Williams,Sponsor to Sponsor,133.5,Active,,,,Jeannie Blackwood,"Electrical testing, maintenance, and commissio...",all_verticals_deals_df
1,,Kastle Systems,2018-02-02 00:00:00,Trusted Netwok,,Sponsor to Sponsor,,Active,,,,Andrew Mah,"Provider of comprehensive, turnkey security so...",all_verticals_deals_df
2,,CLEAResult,2018-02-02 00:00:00,Trusted Netwok,,Sponsor to Sponsor,,Active,,,,Kripa Shah,Provider of energy efficiency and demand manag...,all_verticals_deals_df
3,,PLH,2018-02-02 00:00:00,Auction,Barclays,Sponsor to Sponsor,340.0,Active,,,,Russ Barner,Specialty contractor serving the electric powe...,all_verticals_deals_df
4,,BBB Industries,2018-02-02 00:00:00,Auction,"Baird, Jefferies",Sponsor to Sponsor,500.0,Active,,,,Matthew Kordonowy,Provider of remanufactured replacement parts t...,all_verticals_deals_df
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314,,Schweiger Dermatology,2018-01-17 00:00:00,Trusted Network,,Sponsor to Sponsor,,Passed/Dead,,New deal,On hold,Andrew Mah,Roll-up of dermatology practices / owned by LL...,all_verticals_deals_df
315,,Firebirds,2017-12-01 00:00:00,,North Point Advisors,,,Passed/Dead,,,,Kripa Shah,Owner and operator of 45 Firebirds branded res...,all_verticals_deals_df
316,,Pacon,2017-12-01 00:00:00,,Baird,,,Passed/Dead,,,,Russ Barner,Producer and marketer of arts and crafts products,all_verticals_deals_df
317,,Potpourri Group,2017-12-01 00:00:00,,Lincoln International,,,Passed/Dead,,,,Matthew Kordonowy,Direct-to-consumer marketer of women's apparel...,all_verticals_deals_df


In [126]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(transform_deals_df.columns).issubset(deals_df.columns):
    raise ValueError(f"transform_deals_df has columns not in deals_df: {set(transform_deals_df.columns) - set(deals_df.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


# Data Cleansing

In [127]:
transform_deals_df['Id'] = ['DT_{:03d}'.format(i) for i in range(1, len(transform_deals_df) + 1)]

In [128]:
transform_deals_df

Unnamed: 0,DealName,CompanyId,DealDate,SourcingId,InvestmentBankId,TransactionTypeId,EstimateEquityInvestment,StatusId,PortfolioCompanyStatus,ActiveStagesId,PassedRationale,DealLeadId,AdditionalDescription,File,Id
0,,Shermco,2018-02-02 00:00:00,Auction,Harris Williams,Sponsor to Sponsor,133.5,Active,,,,Jeannie Blackwood,"Electrical testing, maintenance, and commissio...",all_verticals_deals_df,DT_001
1,,Kastle Systems,2018-02-02 00:00:00,Trusted Netwok,,Sponsor to Sponsor,,Active,,,,Andrew Mah,"Provider of comprehensive, turnkey security so...",all_verticals_deals_df,DT_002
2,,CLEAResult,2018-02-02 00:00:00,Trusted Netwok,,Sponsor to Sponsor,,Active,,,,Kripa Shah,Provider of energy efficiency and demand manag...,all_verticals_deals_df,DT_003
3,,PLH,2018-02-02 00:00:00,Auction,Barclays,Sponsor to Sponsor,340.0,Active,,,,Russ Barner,Specialty contractor serving the electric powe...,all_verticals_deals_df,DT_004
4,,BBB Industries,2018-02-02 00:00:00,Auction,"Baird, Jefferies",Sponsor to Sponsor,500.0,Active,,,,Matthew Kordonowy,Provider of remanufactured replacement parts t...,all_verticals_deals_df,DT_005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314,,Schweiger Dermatology,2018-01-17 00:00:00,Trusted Network,,Sponsor to Sponsor,,Passed/Dead,,New deal,On hold,Andrew Mah,Roll-up of dermatology practices / owned by LL...,all_verticals_deals_df,DT_315
315,,Firebirds,2017-12-01 00:00:00,,North Point Advisors,,,Passed/Dead,,,,Kripa Shah,Owner and operator of 45 Firebirds branded res...,all_verticals_deals_df,DT_316
316,,Pacon,2017-12-01 00:00:00,,Baird,,,Passed/Dead,,,,Russ Barner,Producer and marketer of arts and crafts products,all_verticals_deals_df,DT_317
317,,Potpourri Group,2017-12-01 00:00:00,,Lincoln International,,,Passed/Dead,,,,Matthew Kordonowy,Direct-to-consumer marketer of women's apparel...,all_verticals_deals_df,DT_318


In [129]:
# Concatenate result DataFrames with company_df to bring all columns together
deal_transformed= pd.concat([
    deals_df,
    transform_deals_df
], ignore_index=True)

# Validate Contact Data Model

In [130]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(deals_df.columns).issubset(deal_transformed.columns):
    raise ValueError(f"contact_df has columns not in contact_transformed: {set(deals_df.columns) - set(deal_transformed.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


In [131]:
deal_transformed = deal_transformed.drop('File', axis=1)

In [132]:
# Fill NaN values with empty strings
deal_transformed = deal_transformed.fillna('')

In [133]:
# Trim all string values
deal_transformed = deal_transformed.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [134]:
deal_transformed

Unnamed: 0,Id,DealName,CompanyId,DealDate,InvestmentBankId,SourcingId,TransactionTypeId,EstimateEquityInvestment,SaleDate,SalePrice,SaleType,BuyingSponserId,SellingSponserId,StatusId,PortfolioCompanyStatus,ActiveStagesId,PassedRationale,DealLeadId,AdditionalDescription,CreatedAt,CreatedBy,UpdatedAt,UpdatedBy
0,DT_001,,Shermco,2018-02-02 00:00:00,Harris Williams,Auction,Sponsor to Sponsor,133.5,,,,,,Active,,,,Jeannie Blackwood,"Electrical testing, maintenance, and commissio...",,,,
1,DT_002,,Kastle Systems,2018-02-02 00:00:00,,Trusted Netwok,Sponsor to Sponsor,,,,,,,Active,,,,Andrew Mah,"Provider of comprehensive, turnkey security so...",,,,
2,DT_003,,CLEAResult,2018-02-02 00:00:00,,Trusted Netwok,Sponsor to Sponsor,,,,,,,Active,,,,Kripa Shah,Provider of energy efficiency and demand manag...,,,,
3,DT_004,,PLH,2018-02-02 00:00:00,Barclays,Auction,Sponsor to Sponsor,340.0,,,,,,Active,,,,Russ Barner,Specialty contractor serving the electric powe...,,,,
4,DT_005,,BBB Industries,2018-02-02 00:00:00,"Baird, Jefferies",Auction,Sponsor to Sponsor,500.0,,,,,,Active,,,,Matthew Kordonowy,Provider of remanufactured replacement parts t...,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314,DT_315,,Schweiger Dermatology,2018-01-17 00:00:00,,Trusted Network,Sponsor to Sponsor,,,,,,,Passed/Dead,,New deal,On hold,Andrew Mah,Roll-up of dermatology practices / owned by LL...,,,,
315,DT_316,,Firebirds,2017-12-01 00:00:00,North Point Advisors,,,,,,,,,Passed/Dead,,,,Kripa Shah,Owner and operator of 45 Firebirds branded res...,,,,
316,DT_317,,Pacon,2017-12-01 00:00:00,Baird,,,,,,,,,Passed/Dead,,,,Russ Barner,Producer and marketer of arts and crafts products,,,,
317,DT_318,,Potpourri Group,2017-12-01 00:00:00,Lincoln International,,,,,,,,,Passed/Dead,,,,Matthew Kordonowy,Direct-to-consumer marketer of women's apparel...,,,,


# Export Companies Data

In [135]:
%store deal_transformed

Stored 'deal_transformed' (DataFrame)
