In [7]:
%matplotlib inline

import pandas as pd
import numpy as np
import datetime as dt

## User Input

In [149]:
fy_start = dt.datetime(2017, 7, 1) # Start of fiscal year under analysis
fy_marketing_expenses = 2877622

## Import Ticketing Data (3 fiscal years)

In [55]:
# Import ticketing files
clx = pd.concat([
    pd.read_csv("../PSData/Clx18.csv", skiprows=3),
    pd.read_csv("../PSData/Clx17.csv", skiprows=3),
    pd.read_csv("../PSData/Clx16.csv", skiprows=3)
], ignore_index=True)

pop = pd.concat([
    pd.read_csv("../PSData/Pops18.csv", skiprows=3),
    pd.read_csv("../PSData/Pops17.csv", skiprows=3),
    pd.read_csv("../PSData/Pops16.csv", skiprows=3)
], ignore_index=True)

smr = pd.concat([
    pd.read_csv("../PSData/Summer18.csv", skiprows=3),
    pd.read_csv("../PSData/Summer17.csv", skiprows=3),
    pd.read_csv("../PSData/Summer16.csv", skiprows=3)
], ignore_index=True)

spc = pd.read_csv("../PSData/Specials16-18.csv", skiprows=3)

oth = pd.concat([
    pd.read_csv("../PSData/Chamber16-18.csv", skiprows=3),
    pd.read_csv("../PSData/Connections16-18.csv", skiprows=3),
    pd.read_csv("../PSData/Organ16-18.csv", skiprows=3)
], ignore_index=True)

In [56]:
# Insert a series column
clx.insert(0, "series", "clx")
pop.insert(0, "series", "pop")
smr.insert(0, "series", "smr")
spc.insert(0, "series", "spc")
oth.insert(0, "series", "oth")

In [57]:
# Combine into a raw dataframe
df_raw = pd.concat([clx, pop, smr, spc, oth], ignore_index=True)

In [58]:
len(df_raw)

523977

In [92]:
# Create a copy of the raw dataframe to manipulate
df_manip = df_raw.copy()

## Format the Dataframe

##### Remove unsold 

In [93]:
# Check if ticket was sold
sold_ticket = df_manip['summary_cust_id'] > 0

In [94]:
# Remove unsold tickets
df_manip = df_manip[sold_ticket]

In [95]:
len(df_manip)

353597

In [96]:
# Create a function to convert perf_dt to a datetime date
def date_conv(s):
    dates = {date:pd.to_datetime(date) for date in s.unique()}
    return s.map(dates)

In [97]:
# Convert perf_dt
df_manip['perf_dt'] = date_conv(df_manip['perf_dt'])

##### Remove old perf dates and store as df_current

In [98]:
# Create list checking if each row contains the correct fiscal year
is_correct_fy = df_manip['perf_dt']>=fy_start 

In [99]:
# Convert df_manip to only include rows with correct fy
df_current = df_manip[is_correct_fy]

In [100]:
len(df_current)

111623

##### Remove current dates and store as df_historical

In [101]:
df_historical = df_manip[[not i for i in is_correct_fy]]

In [102]:
len(df_historical)

241974

## Count the total number of new customers in current fiscal year

In [104]:
historical_customers = df_historical['summary_cust_id'].unique()

In [105]:
current_customers = df_current['summary_cust_id'].unique()

##### Identify if customers in current_customers also exist in historical customers

In [128]:
customers_dict = {customer:customer not in historical_customers for customer in current_customers}

##### Isolate new customers only and assign total number of new customers to a variable

In [146]:
new_customers_dict = {k:v for (k,v) in customers_dict.items() if v}

In [147]:
new_customers = len(new_customers_dict)

In [148]:
new_customers

8407

## Customer Acquisition Cost

In [151]:
# CAC using total marketing expenses
fy_marketing_expenses / new_customers

342.2888069465921