In [309]:
import pandas as pd
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 200)
import numpy as np
import math

In [351]:
class Final:
    
    def __init__(self, excel_file = None):
        self.excel_file = excel_file
        
    def read_excel(self):
        purchase_exit_survey_data_df = pd.read_excel(self.excel_file, sheet_name = 'Purchase Exit Survey Data', header=[0,1,2,3,4], index_col=[0,1])
        airings_df = pd.read_excel(self.excel_file, sheet_name = 'Airings')
        lookup_df = pd.read_excel(self.excel_file, sheet_name = 'Lookup',header=1)
        return purchase_exit_survey_data_df, airings_df, lookup_df
    
    def prepare_data(self):
        df1, df2, df3 = self.read_excel()
        
        # Preare df1/purchase_exit_survey_data_df for use
        
        # Reformat purchase_exit_survey_data_df
        df1 = df1.stack().stack().stack().stack()
        # Set Up Names inpurchase_exit_survey_data_df
        df1.index.set_names(['Campaign_Type','TV_Network','Submitted_Application_Num','Month', 'Quarter','Year'],level=[0,1,2,3,4,5], inplace=True)
        # Reset index in purchase_exit_survey_data_df
        #return df1.reset_index()
        df1 = df1.reset_index()
        # Change last column name to Value in purchase_exit_survey_data_df
        df1 = df1.rename(columns={'Submitted Application Timestamp':'Value'})
        # Change Year to Int
        df1['Year'] = pd.to_datetime(df1.Year, format='%Y').dt.year
        # Change Month Column to Ints
        df1['Month'] = pd.to_datetime(df1.Month, format='%B').dt.month
        # Finalize purchase_exit_survey_data_df columns
        df1 = df1[['TV_Network','Year','Quarter','Month','Submitted_Application_Num','Value']]
        
        # Prepare df2/airings_df for use
        
        # Change Date/Time ET Column Name to Datetime
        df2 = df2.rename(columns={'Date/Time ET': 'Datetime'})
        df2['Year'] = pd.DatetimeIndex(df2['Datetime']).year
        df2['Month'] = pd.DatetimeIndex(df2['Datetime']).month
        
        # Prepare df3/lookup for use
        
        # Make firt column lowercase
        df3['Exit Survey'] = df3['Exit Survey'].str.lower()
        
        return df1, df2, df3
    
    def get_monthly_stats(self):
        df1, df2, df3 = self.prepare_data()
        
        # Compute Visits Per Month
        visits_per_month = df1.groupby(['Year','Quarter','Month','TV_Network'])['Value'].sum()
        visits_per_month = visits_per_month.to_frame()
        visits_per_month = visits_per_month.reset_index()
        visits_per_month = visits_per_month.rename(columns = {'Value':'Purchases'})
        
        # Merge visits_per_month to Lookup Table
        visits_per_month = pd.merge(visits_per_month, df3[['Exit Survey','Airings']],left_on= ['TV_Network'], right_on = ['Exit Survey'], how='left')
        
        # Compute Spend and Lift Per Month
        lift_spend_df = df2.groupby(['Year','Month','Network'])['Spend','Lift'].sum()
        lift_spend_df = lift_spend_df.reset_index()
        
        # Merge Spend and Lift to Lookup Tab;e
        lift_spend_df = pd.merge(lift_spend_df, df3[['Exit Survey','Airings']],left_on= ['Network'], right_on = ['Airings'], how='left')
        
        # Merge visit data and lift/spend data
        monthly_stats = pd.merge(visits_per_month, lift_spend_df, on=['Year','Month','Exit Survey'], how='outer')
        monthly_stats = monthly_stats[['Year','Quarter','Month','TV_Network','Network','Purchases','Spend','Lift']]
        monthly_stats = monthly_stats[monthly_stats.Year.notna()]
        
        return monthly_stats
    
    def get_metrics(self):
        # Get Monthly Stats Df
        monthly_stats = self.get_monthly_stats()
        
        # Compute Cost Per Visitor
        monthly_stats['cpv'] = np.where(df.Spend.isnull(),0, np.where((df.Purchases.isnull()),1000000,(df.Spend/df.Purchases)))
        
        # Compute Conversion Rate 
        monthly_stats['conversion_rate'] = monthly_stats['Purchases']/monthly_stats['Lift']
        
        # Compute Cost Per Acquisition
        monthly_stats['cpa'] = monthly_stats['Spend']/monthly_stats['Purchases']
        
        return monthly_stats
    

In [352]:
test = Final('Analyst_Dataset.xlsx')


In [353]:
monthly_stats = test.get_metrics()

  lift_spend_df = df2.groupby(['Year','Month','Network'])['Spend','Lift'].sum()


In [354]:
monthly_stats

Unnamed: 0,Year,Quarter,Month,TV_Network,Network,Purchases,Spend,Lift,cpv,conversion_rate,cpa
0,2017,Q3,9,aapka_colors,,1.0,,,0.0,,
1,2017,Q3,9,baby_first,,1.0,,,0.0,,
2,2017,Q3,9,bloomberg,BLOM,1.0,4966.72,199.0,4966.72,0.005025,4966.72
3,2017,Q3,9,cbs_sports,,1.0,,,0.0,,
4,2017,Q3,9,cnbc,CNBC,5.0,6481.42,406.0,1296.284,0.012315,1296.284
5,2017,Q3,9,cnn,CNN,13.0,9159.6,507.0,704.584615,0.025641,704.584615
6,2017,Q3,9,dish_network,DISH,4.0,2513.09,976.0,628.2725,0.004098,628.2725
7,2017,Q3,9,diy,,2.0,,,0.0,,
8,2017,Q3,9,fox_news,,5.0,,,0.0,,
9,2017,Q3,9,fox_sports,FS1,1.0,4258.5,183.0,4258.5,0.005464,4258.5


# Cost Per Vistor 

The Cost Per Vistor is the total dollar amount spent per visit to a company's website/physical location. In order to compute the Cost Per Vistor, the number of visitors to a company website/physical location needs to be recorded. Company XYZ only has records from people who have made a purchase. If the number of survey recordings (which is also equivalent to the number of sales made) is used in place of total visits, the Cost Per Visit calculated will be incorrect. If the Cost Per Visit were to be computed with the existing data, it would be equivalent to the Cost Per Acquisition.

# Cost Per Acquisition

In [295]:
monthly_stats.head()

Unnamed: 0,Year,Quarter,Month,TV_Network,Network,Purchases,Spend,Lift,cpv,conversion_rate,cpa
0,2017,Q3,9,aapka_colors,,1.0,,,,,
1,2017,Q3,9,baby_first,,1.0,,,,,
2,2017,Q3,9,bloomberg,BLOM,1.0,4966.72,199.0,4966.72,0.005025,4966.72
3,2017,Q3,9,cbs_sports,,1.0,,,,,
4,2017,Q3,9,cnbc,CNBC,5.0,6481.42,406.0,1296.284,0.012315,1296.284


In [315]:
df = monthly_stats
df.head()

Unnamed: 0,Year,Quarter,Month,TV_Network,Network,Purchases,Spend,Lift,cpv,conversion_rate,cpa,test
0,2017,Q3,9,aapka_colors,,1.0,,,,,,0
1,2017,Q3,9,baby_first,,1.0,,,,,,0
2,2017,Q3,9,bloomberg,BLOM,1.0,4966.72,199.0,4966.72,0.005025,4966.72,0
3,2017,Q3,9,cbs_sports,,1.0,,,,,,0
4,2017,Q3,9,cnbc,CNBC,5.0,6481.42,406.0,1296.284,0.012315,1296.284,0


In [323]:

math.isnan(df['Spend'][0])

True

In [347]:
#df['d'] = np.where(df.a.isnull(),np.nan, np.where((df.b == "N")&(~df.c.isnull()),df.a*df.c,df.a))

df['test'] = np.where(df.Spend.isnull(),0, np.where((df.Purchases.isnull()),10000,(df.Spend/df.Purchases)))

#df['test'] = np.where((df.Lift >= 0)&(df.Spend.isnull()), df.Spend/df.Lift, 0)

In [348]:
df.head()

Unnamed: 0,Year,Quarter,Month,TV_Network,Network,Purchases,Spend,Lift,cpv,conversion_rate,cpa,test
0,2017,Q3,9,aapka_colors,,1.0,,,,,,0.0
1,2017,Q3,9,baby_first,,1.0,,,,,,0.0
2,2017,Q3,9,bloomberg,BLOM,1.0,4966.72,199.0,4966.72,0.005025,4966.72,4966.72
3,2017,Q3,9,cbs_sports,,1.0,,,,,,0.0
4,2017,Q3,9,cnbc,CNBC,5.0,6481.42,406.0,1296.284,0.012315,1296.284,1296.284


In [None]:
spend is nan and purchases >= 1 then cpv is 0

In [306]:
for i in range(len(monthly_stats['Purchases'])):
    if monthly_stats['Purchases'][i]>= 1 and monthly_stats['Spend'][i].isna():
        print(monthly_stats['Purchases'][i], monthly_stats['Spend'][i])

AttributeError: 'numpy.float64' object has no attribute 'isna'

In [None]:
df.head()

In [105]:
df2.head()

Unnamed: 0,Company,Date/Time ET,Rotation,Creative,Network,Spend,Lift,Program
0,Company XYZ,2017-10-30 22:50:00,MSNB Weekday Prime,ISCICODE0015H,MSNB,980.0,152,THE LAST WORD WITH LAWRENCE O'DONNEL
1,Company XYZ,2017-10-30 22:27:50,HIST Everyday Prime (mirrored),ISCICODE0015H,HIST,1500.0,77,PAWN STARS
2,Company XYZ,2017-10-30 21:42:20,TWC Everyday Prime,ISCICODE0015H,TWC,300.0,15,WEATHER HACKS
3,Company XYZ,2017-10-30 21:17:22,MSNB Weekday Prime,ISCICODE0015H,MSNB,1020.0,123,THE RACHEL MADDOW SHOW
4,Company XYZ,2017-10-30 20:28:46,MSNB Weekday Prime,ISCICODE0015H,MSNB,1020.0,121,ALL IN WITH CHRIS HAYES


In [119]:
df1.head()

Unnamed: 0,TV_Network,Year,Quarter,Month,Submitted_Application_Num,Value
0,(blank),2017,Q4,October,14,1.0
1,(blank),2017,Q4,October,29,1.0
2,(blank),2017,Q4,October,8,1.0
3,aapka_colors,2017,Q4,October,1,1.0
4,aapka_colors,2017,Q3,September,11,1.0


In [None]:
class Final:
    
    def __init__(self, excel_file = None):
        self.excel_file
    
    def read_excel(self, excel_file):
        purchase_exit_survey_data_df = pd.read_excel(excel_file, sheet_name = 'Purchase Exit Survey Data', header=[0,1,2,3,4], index_col=[0,1])
        airings_df = pd.read_excel('Analyst_Dataset.xlsx', sheet_name = 'Airings')
        lookup_df = pd.read_excel('Analyst_Dataset.xlsx', sheet_name = 'Lookup',header=1)
        return purchase_exit_survey_data_df, airings_df, lookup_df
    
    def clean_purchase_exit_survey_data(self, excel_file):
        df1, df2, df3 = read_excel(excel_file)
        #df1 = df1.stack().stack().stack().stack()
        #df1.index.set_names(['Campaign_Type','TV_Network','Submitted_Application_Num','Month', 'Quarter','Year'],level=[0,1,2,3,4,5], inplace=True)
        #df1 = df1.reset_index()
        #df1 = df1.rename(columns={'Submitted Application Timestamp':'Value'})
        #df1 = df1[['TV_Network','Year','Quarter','Month','Submitted_Application_Num','Value']]
        return 'hi'