# Import Libraries

In [202]:
import pandas as pd
pd.set_option('display.max_columns', None)

# Importind DataFrames

In [203]:
%store -r events_df

# Mapping to 'Events & Participants' Target Data Model

In [204]:
# Define the Events columns
columns = [
    "Id",
    "EventName",
    "EventTypeID",
    "Description",
    "Location",
    "EventStatusID",
    "StartDateTime",
    "EndDateTime",
    "AdditionalDescription",
    "CreatedAt",
    "CreatedBy",
    "UpdatedAt",
    "UpdatedBy",
    "File"
]

# Create an empty DataFrame with the columns
all_events_df = pd.DataFrame(columns=columns)


In [205]:
# Define the Participants columns
columns = [
    "Id",
    "ParticipantID",
    "EventID",
    "RSVPStatusId",
    "File"
]

# Create an empty DataFrame with Participants columns
participants_df = pd.DataFrame(columns=columns)

In [206]:
# Populate the Contacts DataFrame with Contact Information
transform_events_df = pd.DataFrame()

transform_events_df['EventName'] = events_df['EventType']
transform_events_df['File'] = "events"

In [207]:
# Populate the Contacts DataFrame with Contact Information
transform_participants_df = pd.DataFrame()

transform_participants_df['ParticipantID'] = events_df['Name']
transform_participants_df['EventID'] = events_df['EventType']
transform_participants_df['RSVPStatusId'] = events_df['Attendee Status']
transform_participants_df['File'] = "events"

In [208]:
transform_events_df

Unnamed: 0,EventName,File
0,Leaders and Partners Dinner,events
1,Leaders and Partners Dinner,events
2,Leaders and Partners Dinner,events
3,Leaders and Partners Dinner,events
4,Leaders and Partners Dinner,events
...,...,...
105,2019 Market Re-Cap,events
106,2019 Market Re-Cap,events
107,2019 Market Re-Cap,events
108,2019 Market Re-Cap,events


In [209]:
transform_participants_df

Unnamed: 0,ParticipantID,EventID,RSVPStatusId,File
0,Rob Baltimore,Leaders and Partners Dinner,RSVP'd,events
1,Brian Lucas,Leaders and Partners Dinner,Declined,events
2,Luke Semple,Leaders and Partners Dinner,Checked In,events
3,Andrew Spitzer,Leaders and Partners Dinner,No Show,events
4,Derek Lewis,Leaders and Partners Dinner,Declined,events
...,...,...,...,...
105,Greg Urban,2019 Market Re-Cap,Checked In,events
106,Aftab Shahsingh,2019 Market Re-Cap,Checked In,events
107,Brendan Ryan,2019 Market Re-Cap,Checked In,events
108,Garrett DeNinno,2019 Market Re-Cap,Checked In,events


In [210]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(transform_events_df.columns).issubset(all_events_df.columns):
    raise ValueError(f"transform_events_df has columns not in all_events_df: {set(transform_events_df.columns) - set(all_events_df.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


# Data Cleansing

In [211]:

all_transform_events_df = transform_events_df.groupby(['EventName']).agg(lambda x: ' '.join(x.astype(str))).reset_index()
all_transform_events_df

Unnamed: 0,EventName,File
0,2019 Market Re-Cap,events events events events events events even...
1,Leaders and Partners Dinner,events events events events events events even...


In [212]:
all_transform_events_df['Id'] = ['ET_{:03d}'.format(i) for i in range(1, len(all_transform_events_df) + 1)]
transform_participants_df['Id'] = ['PT_{:03d}'.format(i) for i in range(1, len(transform_participants_df) + 1)]

In [213]:
all_transform_events_df

Unnamed: 0,EventName,File,Id
0,2019 Market Re-Cap,events events events events events events even...,ET_001
1,Leaders and Partners Dinner,events events events events events events even...,ET_002


In [214]:
transform_participants_df

Unnamed: 0,ParticipantID,EventID,RSVPStatusId,File,Id
0,Rob Baltimore,Leaders and Partners Dinner,RSVP'd,events,PT_001
1,Brian Lucas,Leaders and Partners Dinner,Declined,events,PT_002
2,Luke Semple,Leaders and Partners Dinner,Checked In,events,PT_003
3,Andrew Spitzer,Leaders and Partners Dinner,No Show,events,PT_004
4,Derek Lewis,Leaders and Partners Dinner,Declined,events,PT_005
...,...,...,...,...,...
105,Greg Urban,2019 Market Re-Cap,Checked In,events,PT_106
106,Aftab Shahsingh,2019 Market Re-Cap,Checked In,events,PT_107
107,Brendan Ryan,2019 Market Re-Cap,Checked In,events,PT_108
108,Garrett DeNinno,2019 Market Re-Cap,Checked In,events,PT_109


In [215]:
# Concatenate result DataFrames with company_df to bring all columns together
transform_events= pd.concat([
    all_events_df,
    all_transform_events_df
], ignore_index=True)

In [216]:
# Concatenate result DataFrames with company_df to bring all columns together
transform_participants= pd.concat([
    participants_df,
    transform_participants_df
], ignore_index=True)

# Validate Contact Data Model

In [217]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(all_events_df.columns).issubset(transform_events.columns):
    raise ValueError(f"all_events_df has columns not in transform_events: {set(all_events_df.columns) - set(transform_events.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


In [218]:
#Check for each column in the transformed DataFrame is part of the original Company DataFrame
if not set(participants_df.columns).issubset(transform_participants.columns):
    raise ValueError(f"participants_df has columns not in transform_participants: {set(participants_df.columns) - set(transform_participants.columns)}")
else:
    print("All columns matched successfully.")

All columns matched successfully.


In [219]:
transform_events = transform_events.drop('File', axis=1)

In [220]:
transform_participants = transform_participants.drop('File', axis=1)

In [221]:
transform_events

Unnamed: 0,Id,EventName,EventTypeID,Description,Location,EventStatusID,StartDateTime,EndDateTime,AdditionalDescription,CreatedAt,CreatedBy,UpdatedAt,UpdatedBy
0,ET_001,2019 Market Re-Cap,,,,,,,,,,,
1,ET_002,Leaders and Partners Dinner,,,,,,,,,,,


In [222]:
transform_participants

Unnamed: 0,Id,ParticipantID,EventID,RSVPStatusId
0,PT_001,Rob Baltimore,Leaders and Partners Dinner,RSVP'd
1,PT_002,Brian Lucas,Leaders and Partners Dinner,Declined
2,PT_003,Luke Semple,Leaders and Partners Dinner,Checked In
3,PT_004,Andrew Spitzer,Leaders and Partners Dinner,No Show
4,PT_005,Derek Lewis,Leaders and Partners Dinner,Declined
...,...,...,...,...
105,PT_106,Greg Urban,2019 Market Re-Cap,Checked In
106,PT_107,Aftab Shahsingh,2019 Market Re-Cap,Checked In
107,PT_108,Brendan Ryan,2019 Market Re-Cap,Checked In
108,PT_109,Garrett DeNinno,2019 Market Re-Cap,Checked In


# Export Companies Data

In [223]:
%store transform_events

Stored 'transform_events' (DataFrame)


In [224]:
%store transform_participants

Stored 'transform_participants' (DataFrame)
