## Read in Libraries

In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore') 

## Defining import / export file paths

In [2]:
# STUDENT INPUT REQUIRED - Modify path directly below for location of the 04a. Facebook - Missing values - Raw data.csv file on your laptop/desktop
raw_data_path = r'C:\Users\drvis\Downloads\MMM-2\04a. Facebook - Missing values - Raw data.csv'

export_data_path = 'C:/Users/drvis/Downloads/MMM-2/Processed Data/'
file_name = 'final_facebook.csv'

## Read in Facebook/Instagram Spend and Impressions Data

In [3]:
# This Python script loads data from a CSV file into a Pandas DataFrame called "data_for_imputation."
# Then, it outputs the data types of every column in the DataFrame using the ".dtypes" property.
# This code checks the data types of the columns in the DataFrame to see how the data is stored.

data_for_imputation = pd.read_csv(raw_data_path)

data_for_imputation.dtypes

week_starting_date     object
fbig_spend            float64
fbig_imp              float64
dtype: object

In [4]:
# Here the Python code to transform the "week_starting_date" column within the "data_for_imputation" DataFrame into datetime format using the "pd.to_datetime()" function.
# Upon converting the column to datetime format, it can be valuable to examine the initial rows of the DataFrame to verify the conversion and gain a broad understanding of the data.

data_for_imputation['week_starting_date'] = pd.to_datetime(data_for_imputation['week_starting_date'])

# STUDENT INPUT REQUIRED - Use the head function to write code that prints out the first 10 rows of the data_for_imputation dataframe - HINT: see how head funtion used in separate Outdoor Campaigns python script

print(data_for_imputation.head(10))

  week_starting_date  fbig_spend  fbig_imp
0         2019-01-07         0.0       0.0
1         2019-01-14         0.0       0.0
2         2019-01-21         0.0       0.0
3         2019-01-28         0.0       0.0
4         2019-02-04         0.0       0.0
5         2019-02-11         0.0       0.0
6         2019-02-18         0.0       0.0
7         2019-02-25         0.0       0.0
8         2019-03-04         0.0       0.0
9         2019-03-11         0.0       0.0


In [5]:
# This Python code selects rows from the data_for_imputation DataFrame where: - The `fbig_spend` column shows that money was spent on Facebook/Instagram ads (`fbig_spend > 0`) - The `fbig_imp` column shows that no impressions were recorded from these ads (`fbig_imp == 0`)
# It uses the `.loc[]` method with boolean indexing. Within the `.loc[]` method, multiple conditions can be combined using the `&` operator for logical AND.
# This code helps us find cases where money has been spent on Facebook/Instagram ads but no impressions were gained.

data_for_imputation.loc[(data_for_imputation['fbig_spend']>0)&
                                               (data_for_imputation['fbig_imp']==0)]

Unnamed: 0,week_starting_date,fbig_spend,fbig_imp
21,2019-06-03,13444.23715,0.0
22,2019-06-10,17661.09506,0.0
23,2019-06-17,5969.195235,0.0
24,2019-06-24,6456.33976,0.0


## Use Overall cost per impression to impute missing impression values

In [6]:
# This Python code computes the cost per impression for Facebook or Instagram ads.
# It uses filtered data from the data_for_imputation DataFrame.
# The filtered data, stored in data_for_imputation_filtered, includes only rows that have values for both impressions (fbig_imp column) and spending (fbig_spend column).
# It excludes rows where either of these values is absent.
# The cost per impression is calculated by dividing the total expenditure on Facebook or Instagram ads by the total number of impressions.
# These values are computed from the filtered DataFrame by summing the values in the fbig_spend and fbig_imp columns, respectively.
# This calculation results in the cost_per_imp_for_imputation variable.
# The "cost_per_imp_for_imputation" variable calculates the average cost for each time an advertisement was viewed (impression).
# It does this by dividing the total amount spent on Facebook/Instagram advertisements by the total number of times those ads were seen.
# To get these values, it adds up the amounts spent ("fbig_spend") and the number of impressions ("fbig_imp") from the rows of the filtered "data_for_imputation_filtered" table.
# It then divides the total spent by the total impressions. The line of code "print('Cost per Impression : ' + str(cost_per_imp_for_imputation))" displays the calculated cost per impression.
# Overall, this code computes and displays the average cost of each impression for Facebook/Instagram advertisements using the data in the filtered table.
# This helps understand how effectively the ad budget is being used on these platforms.


data_for_imputation_filtered = (data_for_imputation.loc[(data_for_imputation['fbig_imp']>0)&
                                                            (data_for_imputation['fbig_spend']>0)])


cost_per_imp_for_imputation = (data_for_imputation_filtered['fbig_spend'].sum()/
             data_for_imputation_filtered['fbig_imp'].sum()
            )

print('Cost per Impression : ' + str(cost_per_imp_for_imputation))

Cost per Impression : 0.006257490152210633


In [7]:
# This Python code aims to impute missing values in the 'fbig_imp' column of the DataFrame data_for_imputation based on certain conditions.
# The python code np.where()**, uses an element-wise check to compare **expenditure** in the `'fbig_spend'` column with 0, and checks for missing impressions (NaN) in the `'fbig_imp'` column.
# If both conditions are true, it calculates impressions by dividing spend by the pre-determined "cost per impression for imputation."
# Otherwise, it keeps the original impression values.
# data_for_imputation['fbig_imp']**: Stores the results of the conditional operation in the `'fbig_imp'` column.
# data_for_imputation.set_index('week_starting_date')**: Sets the index of the `data_for_imputation` DataFrame to the `'week_starting_date'` column.
# Overall, this code segment imputes missing values in the 'fbig_imp' column based on the calculated cost per impression and sets the index of the DataFrame for further analysis.

data_for_imputation['fbig_imp'] = (np.where((data_for_imputation['fbig_spend']>0)
                                                                          & (data_for_imputation['fbig_imp']==0),
                                                                           data_for_imputation['fbig_spend']/cost_per_imp_for_imputation,
                                                                           data_for_imputation['fbig_imp']
                                                                 )
                                                                 )

data_for_imputation = data_for_imputation.set_index('week_starting_date')

In [8]:
# This Python code targets the DataFrame named "data_for_imputation" to select specific rows that meet certain criteria.
# It filters rows based on two conditions: 1. The expenditure on Facebook/Instagram ads (represented as "fbig_spend") must be greater than 0.
# 2. The impressions related to those ads (represented as "fbig_imp") must be equal to 0.
# The primary goal of this code is to identify scenarios where expenses were incurred for Facebook/Instagram ads but there were no corresponding impressions generated.

data_for_imputation.loc[(data_for_imputation['fbig_spend']>0)&
                                               (data_for_imputation['fbig_imp']==0)]

Unnamed: 0_level_0,fbig_spend,fbig_imp
week_starting_date,Unnamed: 1_level_1,Unnamed: 2_level_1


In [9]:
# This Python code calculates and displays the overall amount spent on Facebook and Instagram advertising.
# `data_for_imputation['fbig_spend'].sum()`: This part calculates the total value of the 'fbig_spend' column in the DataFrame `data_for_imputation`, representing the total money spent on Facebook/Instagram ads.
# `print('Total Spend : ' + str(data_for_imputation['fbig_spend'].sum())`: This code prints out a message along with the calculated total cost of Facebook/Instagram ads.
# This code helps understand and analyze the total amount spent on Facebook and Instagram advertising, providing valuable insights into the advertising budget for these platforms.

print('Total Spend : ' + str(data_for_imputation['fbig_spend'].sum()))

Total Spend : 3271267.472244


In [10]:
# STUDENT INPUT REQUIRED - Create similar code as above to print and check impressions total sum

print('Total Impressions : ' + str(data_for_imputation['fbig_imp'].sum()))

Total Impressions : 522776287.7242937


## Export processed data

In [11]:
# This Python code saves the data from the data_for_imputation DataFrame to a CSV file named file_name.
# The CSV file is stored in the directory specified by the export_data_path variable.
# By exporting the data to a CSV file, you can save and share the data, import it into other programs, or perform further analysis on it.
data_for_imputation.to_csv(export_data_path + file_name)