## Read in Libraries

In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore') 

## Defining export file paths

In [2]:
# The provided Python code assigns two variables: export_data_path and file_name.
# The "export_data_path" variable specifies a directory named "Processed Data" in the current directory to store processed data.
# The trailing slash "/" indicates this is a directory, not a file.
# The "file_name" variable designates the name of the CSV file where the final outdoor data will be stored as "final_outdoor.csv".
# Together, this code sets up the path and filename for exporting processed outdoor data to a CSV file named "final_outdoor.csv" in the "Processed Data"directory.

export_data_path = 'C:/Users/drvis/Downloads/MMM-2/Processed Data/'
file_name = 'final_outdoor.csv'

## Calculate the total impressions + spend for this campaign

In [3]:
# The provided code calculates the total number of impressions generated by five specific activations.
# These activations could represent different marketing channels, campaigns, or strategies used in the project.
# By summing up the impressions from each activation, the code helps in understanding the overall reach or exposure achieved through the combined marketing efforts.

total_impressions = 5500000 + 6200000 + 2000000 + 10000000 + 3500000

# STUDENT INPUT REQUIRED - create a new variable labeled total_spend by adding together spend for all 5 activations
total_spend = 30000 + 15000 + 10000 + 40000 + 10000


In [4]:
print('total outdoor impressions: ' + str(total_impressions))

# STUDENT INPUT REQUIRED - use the code directly above for total_impressions to create a similar print command for total_spend
print('total spend: ' + str(total_spend))

total outdoor impressions: 27200000
total spend: 105000


## Create weekly table

In [5]:
# The provided Python code initializes two variables, first_week and last_week, with date strings.
#These variables could be utilized to define the start and end dates of a specific time period relevant to the data being analyzed.
#The first_week variable could represent the start date of the analysis period, while the last_week variable could represent the end date.
first_week = '2019-01-07'
last_week = '2020-12-28'

In [6]:
# Using Pandas, the code creates a weekly date range between first_week and last_week.
# start and end: Derived from first_week and last_week, these define the range's start and end dates.
# freq: 'W-MON' means weeks start on Monday.
#name: Sets the index name to 'week_starting_date'.
#The result is a Pandas DatetimeIndex called weeks_index, which contains the starting dates of each week in the range.

weeks_index = pd.date_range(
    start = first_week, 
    end = last_week,
    freq='W-MON', #default for this function is Sunday week start but our weeks start on Monday - replace with 'W' only to test this out
    name='week_starting_date',
    )


In [7]:
# This Python code creates a DataFrame named "df_outdoor" that stores the start dates of weeks.
# It uses the "weeks_index" list generated earlier.
# The DataFrame has a single column named "weeks_index," which contains the week starting dates.
# To preview the data, the code displays the first few rows of "df_outdoor" using the "head()" function.
# This DataFrame structure allows for convenient storage, manipulation, and analysis of data on a weekly basis within a marketing model.

df_outdoor = pd.DataFrame(weeks_index)
df_outdoor.head()

Unnamed: 0,week_starting_date
0,2019-01-07
1,2019-01-14
2,2019-01-21
3,2019-01-28
4,2019-02-04


## Allocate total impressions and spend across the outdoor campaign duration
#### Hint: this outdoor campaign runs perfectly across 4 weeks and the first day is a Monday week start

In [8]:
# The provided Python code is intended to allocate the total impressions generated by the outdoor campaign evenly across the four weeks of the campaign duration.
# Additionally, it initializes a new column named 'outdoor_impressions' in the DataFrame df_outdoor to store these allocated impressions.
# The `df_outdoor` DataFrame gains a new column called `outdoor_impressions` that starts at zero for all rows.
# The NumPy `np.where` function changes some of the values in this column.
# It checks which values in the `week_starting_date` column match the specified dates for the start of the outdoor campaign (August 3rd, 10th, 17th, and 24th, 2020).
# When there's a match for specific dates in the dataset, we split the total impressions in the 'outdoor_impressions' column equally among the corresponding dates.
# This ensures that each week within the four-week campaign duration receives the same number of impressions.
# After this allocation process, we can view the first few rows of the updated DataFrame (df_outdoor) using df_outdoor.head().
# This shows the changes made to the data.
# Overall, this code makes sure that the impressions generated by the outdoor campaign are evenly spread across the campaign period, with each week receiving an equal portion.
df_outdoor['outdoor_impressions'] = 0


df_outdoor['outdoor_impressions'] = np.where((df_outdoor['week_starting_date'] == '2020-08-03')
                                     |(df_outdoor['week_starting_date'] == '2020-08-10')
                                     |(df_outdoor['week_starting_date'] == '2020-08-17')
                                     |(df_outdoor['week_starting_date'] == '2020-08-24'), total_impressions/4, df_outdoor['outdoor_impressions'])
df_outdoor.head()

Unnamed: 0,week_starting_date,outdoor_impressions
0,2019-01-07,0.0
1,2019-01-14,0.0
2,2019-01-21,0.0
3,2019-01-28,0.0
4,2019-02-04,0.0


In [9]:
# STUDENT INPUT REQUIRED - use the code directly above for outdoor_impressions to initialize (with 0's) and allocate spend in outdoor_spend column

df_outdoor['outdoor_spend'] = 0

df_outdoor['outdoor_spend'] = np.where((df_outdoor['week_starting_date'] == '2020-08-03')
                                    | (df_outdoor['week_starting_date'] == '2020-08-10')
                                    | (df_outdoor['week_starting_date'] == '2020-08-17')
                                    | (df_outdoor['week_starting_date'] == '2020-08-24'), total_spend/4, df_outdoor['outdoor_spend'])

df_outdoor.head()


Unnamed: 0,week_starting_date,outdoor_impressions,outdoor_spend
0,2019-01-07,0.0,0.0
1,2019-01-14,0.0,0.0
2,2019-01-21,0.0,0.0
3,2019-01-28,0.0,0.0
4,2019-02-04,0.0,0.0


## Check that your sums match original totals

In [10]:
# This Python code calculates the difference between the total impressions allocated to the outdoor advertising campaign and the overall total impressions from all campaigns.
# `df_outdoor['outdoor_impressions'].sum()` adds up all the values in the 'outdoor_impressions' column of the `df_outdoor` DataFrame.
# This gives us the total number of impressions specifically allocated to the outdoor campaign.
# `total_impressions` represents the total number of impressions generated by all advertising efforts combined.
# The code calculates the difference between the number of impressions allocated to the outdoor campaign and the overall total impressions.
# This difference shows how the allocated impressions for the outdoor campaign compare to the total impressions generated.
# It helps identify any variations or unmet expectations in reaching the campaign's goal for impressions.

df_outdoor['outdoor_impressions'].sum() - total_impressions

0.0

In [11]:
# STUDENT INPUT REQUIRED - use code directly above to create a similar check for spend data - sum the spend column in your dataframe and subtract the original total_spend value

df_outdoor['outdoor_spend'].sum() - total_spend

0.0

In [12]:
# The given Python code sets the row labels of the DataFrame df_outdoor to the values in the 'week_starting_date' column.
# df_outdoor.set_index('week_starting_date'): This line changes the index of df_outdoor to be the 'week_starting_date' column.
# Each row will now have a unique week starting date as its label.

df_outdoor = df_outdoor.set_index('week_starting_date')

## Export processed data

In [13]:
# The Python code exports the DataFrame called "df_outdoor" to a CSV (Comma-Separated Values) file.
# The CSV file is created using the "to_csv" method.
# The output location of the CSV file is determined by combining two variables
# "export_data_path": This variable contains the directory path where you want to save the CSV file.
# "file_name": This variable represents the name of the CSV file.
# By combining these two variables, the code specifies the complete path and filename for the CSV file.
# The "to_csv" method exports the data from "df_outdoor" to the CSV file at the specified location.
# This allows us to save the DataFrame's contents in a format that is easy to share, import into other applications, or use for further analysis.

df_outdoor.to_csv(export_data_path + file_name)