# Import Libraries

In [134]:
import pandas as pd
pd.set_option('display.max_columns', None)

# Importind DataFrames

In [135]:
%store -r all_verticals_deals_df
%store -r contacts_df

# Mapping to 'Employee' Target Data Model

In [136]:
# Define the Events columns
columns = [
    "Id",
    "EmployeeName",
    "Location",
    "File"
]

# Create an empty DataFrame with the columns
employees_df = pd.DataFrame(columns=columns)


In [137]:
# Populate the Contacts DataFrame with Contact Information
transform_employee_vertical_df = pd.DataFrame()

transform_employee_vertical_df['EmployeeName'] = all_verticals_deals_df['Lead MD']
transform_employee_vertical_df['File'] = "events"

In [138]:
# Populate the Contacts DataFrame with Contact Information
transform_employee_contact_df = pd.DataFrame()

transform_employee_contact_df['EmployeeName'] = contacts_df['Coverage Person']
transform_employee_contact_df['File'] = "contact"

In [139]:
# Concatenate all transformed DataFrames into a single DataFrame
employees_transformed_df = pd.concat([
    transform_employee_vertical_df,
    transform_employee_contact_df
], ignore_index=True)

In [140]:
employees_transformed_df

Unnamed: 0,EmployeeName,File
0,Jeannie Blackwood,events
1,Andrew Mah,events
2,Kripa Shah,events
3,Russ Barner,events
4,Matthew Kordonowy,events
...,...,...
625,Emily Royal,contact
626,Kripa Shah,contact
627,Hannah Jumper,contact
628,Jeannie Blackwood,contact


In [141]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(employees_transformed_df.columns).issubset(employees_df.columns):
    raise ValueError(f"employees_transformed_df has columns not in employees_df: {set(employees_transformed_df.columns) - set(employees_df.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


# Data Cleansing

In [142]:

all_employees_transformed_df = employees_transformed_df.groupby(['EmployeeName']).agg(lambda x: ' '.join(x.astype(str))).reset_index()
all_employees_transformed_df

Unnamed: 0,EmployeeName,File
0,Andrew Mah,events events events events events events even...
1,Daniel Ding,contact contact contact contact contact contac...
2,Emily Royal,contact contact contact contact contact contac...
3,Hannah Jumper,contact contact contact contact contact contac...
4,Hannah Wilson,events events events events events events even...
5,Jeannie Blackwood,events events events events events events even...
6,Kripa Shah,events events events events events events even...
7,Matthew Kordonowy,events events events events events events even...
8,Russ Barner,events events events events events events even...


In [143]:
all_employees_transformed_df['Id'] = ['EMP_{:03d}'.format(i) for i in range(1, len(all_employees_transformed_df) + 1)]

In [144]:
all_employees_transformed_df

Unnamed: 0,EmployeeName,File,Id
0,Andrew Mah,events events events events events events even...,EMP_001
1,Daniel Ding,contact contact contact contact contact contac...,EMP_002
2,Emily Royal,contact contact contact contact contact contac...,EMP_003
3,Hannah Jumper,contact contact contact contact contact contac...,EMP_004
4,Hannah Wilson,events events events events events events even...,EMP_005
5,Jeannie Blackwood,events events events events events events even...,EMP_006
6,Kripa Shah,events events events events events events even...,EMP_007
7,Matthew Kordonowy,events events events events events events even...,EMP_008
8,Russ Barner,events events events events events events even...,EMP_009


In [145]:
# Concatenate result DataFrames with company_df to bring all columns together
employees_transformed_df= pd.concat([
    employees_df,
    all_employees_transformed_df
], ignore_index=True)

# Validate Contact Data Model

In [146]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(employees_df.columns).issubset(employees_transformed_df.columns):
    raise ValueError(f"employees_df has columns not in employees_transformed_df: {set(employees_df.columns) - set(employees_transformed_df.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


In [147]:
employees_transformed_df = employees_transformed_df.drop('File', axis=1)

In [148]:
employees_transformed_df = employees_transformed_df.fillna('')

In [149]:
employees_transformed_df

Unnamed: 0,Id,EmployeeName,Location
0,EMP_001,Andrew Mah,
1,EMP_002,Daniel Ding,
2,EMP_003,Emily Royal,
3,EMP_004,Hannah Jumper,
4,EMP_005,Hannah Wilson,
5,EMP_006,Jeannie Blackwood,
6,EMP_007,Kripa Shah,
7,EMP_008,Matthew Kordonowy,
8,EMP_009,Russ Barner,


# Export Companies Data

In [150]:
%store employees_transformed_df

Stored 'employees_transformed_df' (DataFrame)
