# This is where I use the purchase and transaction data to calculate the total profits for each event and append the attendance data to include total profits

In [85]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [86]:
df_sales = pd.read_csv('Cityspace_Purchase_Transaction.csv')
df_sales.rename(columns={'Date': 'Purchase_Date'}, inplace=True)
df_sales['Purchase_Date'] = pd.to_datetime(df_sales['Purchase_Date'])
df_sales['Show Date'] = pd.to_datetime(df_sales['Show Date'])

In [87]:
df_sales.columns

Index(['Order ID', 'City', 'State', 'Zip code', 'Country', 'Delivery Method',
       'Categories', 'Sale Type', 'Order Status', 'Purchase_Date', 'Time',
       'Ticket Quantity', 'Ticket Amount', 'Donations', 'Other Items',
       'Total Charge', 'Patron Created', 'Patron Id', 'Detail', 'Ticket Type',
       'Section Name', 'Promotion Name', 'Show Name', 'Show Date', 'Show Time',
       'Price', 'Facility Fee', 'Conv. Fee', 'Total'],
      dtype='object')

In [88]:
df_attendance = pd.read_csv('Cityspace_Attendance_Updated.csv')
df_attendance['Date'] = pd.to_datetime(df_attendance['Date'])

In [89]:
df_attendance.columns

Index(['Date', 'Event', 'Registrations', 'All_Attendance', 'Livestream',
       'Room Capacity', 'Category', 'Guest_Speaker', 'in_person', 'Genres'],
      dtype='object')

In [90]:
num_unique_show_names = df_sales['Show Name'].nunique()

print("Number of unique show names in 'df_sales':", num_unique_show_names)

Number of unique show names in 'df_sales': 379


In [91]:
merged_df = pd.merge(df_sales, df_attendance, left_on='Show Date', right_on='Date', how='inner')

# Drop the 'Date' and 'Show Name' column since it's redundant
merged_df.drop(columns=['Date'], inplace=True)
merged_df.drop(columns=['Show Name'], inplace=True)
# Removing attendance columns
merged_df.drop(columns=['Registrations', 'All_Attendance', 'Livestream', 'Room Capacity', 'Category', 'Guest_Speaker', 'in_person', 'Genres'], inplace=True)

# Changing Total to float type
merged_df['Total'] = merged_df['Total'].str.replace('$', '').astype(float)

In [92]:
merged_df

Unnamed: 0,Order ID,City,State,Zip code,Country,Delivery Method,Categories,Sale Type,Order Status,Purchase_Date,...,Ticket Type,Section Name,Promotion Name,Show Date,Show Time,Price,Facility Fee,Conv. Fee,Total,Event
0,PR95605853,Allston,MA,02134,US,Eticket (In-person),,Web,complete,2024-01-01,...,Student,CitySpace Student Tickets,,2024-01-04,06:30 PM,$5.00,$0.00,$0.00,5.0,Real Self Care: How to redefine wellness in th...
1,PR95600140,Roslindale,MA,02131,US,Eticket (In-person),,Web,complete,2023-12-31,...,Premiere,CitySpace Premiere Tickets,,2024-01-04,06:30 PM,$25.00,$0.00,$0.00,25.0,Real Self Care: How to redefine wellness in th...
2,PR95599811,Eastham,MA,02642,US,Eticket (In-person),,Web,complete,2023-12-31,...,General,CitySpace General Tickets,WBUR Staff Discount,2024-01-04,06:30 PM,$0.00,$0.00,$0.00,0.0,Real Self Care: How to redefine wellness in th...
3,PR95599811,Eastham,MA,02642,US,Eticket (In-person),,Web,complete,2023-12-31,...,General,CitySpace General Tickets,WBUR Staff Discount,2024-01-04,06:30 PM,$0.00,$0.00,$0.00,0.0,Real Self Care: How to redefine wellness in th...
4,PR95598564,Brighton,MA,02135,US,Eticket (In-person),,Web,complete,2023-12-31,...,General,CitySpace General Tickets,,2024-01-04,06:30 PM,$15.00,$0.00,$0.00,15.0,Real Self Care: How to redefine wellness in th...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50293,PR70857532,Arlington,MA,02476,US,Eticket (In-person),,Web,complete,2019-02-01,...,General Admission,General Admission,,2019-03-04,06:30 PM,$20.00,$0.00,$0.00,20.0,Curated Cuisine: Andy Husbands
50294,PR70850308,Boxford,MA,01921,US,Eticket (In-person),"Sustainer, Import 01/08/2019",Web,complete,2019-01-31,...,General Admission,General Admission,Sustainer,2019-03-04,06:30 PM,$18.00,$0.00,$0.00,18.0,Curated Cuisine: Andy Husbands
50295,PR70850308,Boxford,MA,01921,US,Eticket (In-person),"Sustainer, Import 01/08/2019",Web,complete,2019-01-31,...,General Admission,General Admission,Sustainer,2019-03-04,06:30 PM,$18.00,$0.00,$0.00,18.0,Curated Cuisine: Andy Husbands
50296,PR70850308,Boxford,MA,01921,US,Eticket (In-person),"Sustainer, Import 01/08/2019",Web,complete,2019-01-31,...,General Admission,General Admission,Sustainer,2019-03-04,06:30 PM,$18.00,$0.00,$0.00,18.0,Curated Cuisine: Andy Husbands


In [93]:
num_unique_names = df_attendance['Event'].nunique()

print("Number of unique names in 'Event' column:", num_unique_names)

Number of unique names in 'Event' column: 324


In [94]:
merged_df.dtypes

Order ID                   object
City                       object
State                      object
Zip code                   object
Country                    object
Delivery Method            object
Categories                 object
Sale Type                  object
Order Status               object
Purchase_Date      datetime64[ns]
Time                       object
Ticket Quantity             int64
Ticket Amount             float64
Donations                 float64
Other Items                 int64
Total Charge               object
Patron Created             object
Patron Id                   int64
Detail                     object
Ticket Type                object
Section Name               object
Promotion Name             object
Show Date          datetime64[ns]
Show Time                  object
Price                      object
Facility Fee               object
Conv. Fee                  object
Total                     float64
Event                      object
dtype: object

In [95]:
event_totals = merged_df.groupby('Event')['Total'].sum()

# Display the total sum for each unique name in 'Event'
print(event_totals)

Event
"Bear Brook: A True Crime Story" Live                                           800.0
"Fellow Travelers" The Opera; Covert Romance, Politics and Power                230.0
"Lenox Street" The Film Screening                                                 0.0
"Mortified" Live                                                               4425.0
"Radiolab" Live: How the Cassette Tape Changed Us                              3460.0
                                                                                ...  
Who Should Sing Ol' Man River? Portraying Race Through Musical Theatre            0.0
Yaa Gyasi: Transcendent Kingdom                                                   0.0
Young Mungo: Douglas Stuart                                                    1035.0
Youth Takeover: Arts and Culture                                                  0.0
Youth Takeover: The missing student voices on gun control and school safety       0.0
Name: Total, Length: 308, dtype: float64


In [96]:
event_totals = merged_df.groupby('Event')['Total'].sum().reset_index()

# Merge df_attendance with event_totals based on 'Event'
df_attendance = df_attendance.merge(event_totals, on='Event', how='left')

# Rename the column to 'Total_Profit'
df_attendance.rename(columns={'Total': 'Total_Profit'}, inplace=True)
df_attendance

Unnamed: 0,Date,Event,Registrations,All_Attendance,Livestream,Room Capacity,Category,Guest_Speaker,in_person,Genres,Total_Profit
0,2019-12-17,LFOD x The ARTery: State of New England Hip Hop,190.0,60,0,254,ARTery,No,60,Arts & Culture,0.0
1,2021-09-23,The ARTery 25,367.0,227,123,254,ARTery,No,104,Entertainment,0.0
2,2019-03-28,The ARTery 25,367.0,227,0,254,ARTery,No,227,Entertainment,0.0
3,2019-08-23,The ARTery's Massachusetts Favorite of the Tin...,151.0,129,0,254,ARTery,No,129,Entertainment,958.0
4,2020-07-30,Black Boston: Building Healthy Communities,402.0,146,0,254,Boston,No,146,Health & Wellness,0.0
...,...,...,...,...,...,...,...,...,...,...,...
324,2021-02-23,WBUR Town Hall: The Success and Challenges of ...,120.0,66,0,254,Town Hall,No,66,Politics,0.0
325,2021-02-16,WBUR Town Hall: The Vaccine Is Here,124.0,65,0,254,Town Hall,No,65,Health & Wellness,0.0
326,2022-03-08,WBUR Town Hall: Understanding the Ukraine Crisis,367.0,192,170,254,Town Hall,No,22,Politics,0.0
327,2023-11-08,Bob Oakes Legacy,173.0,138,19,254,Tribute,No,119,Journalism & Media,


In [97]:
df_attendance = df_attendance.dropna(subset=['Total_Profit'])

# Reset the index after removing rows
df_attendance = df_attendance.reset_index(drop=True)
df_attendance

Unnamed: 0,Date,Event,Registrations,All_Attendance,Livestream,Room Capacity,Category,Guest_Speaker,in_person,Genres,Total_Profit
0,2019-12-17,LFOD x The ARTery: State of New England Hip Hop,190.0,60,0,254,ARTery,No,60,Arts & Culture,0.0
1,2021-09-23,The ARTery 25,367.0,227,123,254,ARTery,No,104,Entertainment,0.0
2,2019-03-28,The ARTery 25,367.0,227,0,254,ARTery,No,227,Entertainment,0.0
3,2019-08-23,The ARTery's Massachusetts Favorite of the Tin...,151.0,129,0,254,ARTery,No,129,Entertainment,958.0
4,2020-07-30,Black Boston: Building Healthy Communities,402.0,146,0,254,Boston,No,146,Health & Wellness,0.0
...,...,...,...,...,...,...,...,...,...,...,...
308,2020-11-17,WBUR Town Hall: Now What? The Impact of the 20...,501.0,369,0,254,Town Hall,No,369,Politics,0.0
309,2021-02-23,WBUR Town Hall: The Success and Challenges of ...,120.0,66,0,254,Town Hall,No,66,Politics,0.0
310,2021-02-16,WBUR Town Hall: The Vaccine Is Here,124.0,65,0,254,Town Hall,No,65,Health & Wellness,0.0
311,2022-03-08,WBUR Town Hall: Understanding the Ukraine Crisis,367.0,192,170,254,Town Hall,No,22,Politics,0.0


In [98]:
df_attendance.to_csv('Cityspace_Attendance_with_Profits.csv', index=False)