<a href="https://colab.research.google.com/github/amfei/Marketing-Mix-Modeling/blob/main/Marketing_mix_modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Marketing Mix Modeling (MMM)**
MMM is a statistical analysis technique that helps marketers understand the impact of various marketing activities on sales or other key performance indicators (KPIs). Ordinary Least Squares (OLS) regression is a common method used for this purpose.

In MMM, we use historical data to assess the influence of different marketing channels, like TV, radio, digital advertising, and other factors, on a target variable like sales. OLS regression allows us to determine the relationship between these variables.

Here is a Python example of how you might conduct marketing mix modeling using OLS regression with statsmodels

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt

# Create a DataFrame
df = pd.read_excel("data0.xlsx")
print(df.columns)
df= df[['Week',	'Revenue',	'Newspaper Ads',	'Search Ads',	'Social Ads',	'Price Change',	'Temperature',	'Holiday']]
# rename columns
df.columns = ['Week',	'Revenue',	'Newspaper_Ads',	'Search_Ads',	'Social_Ads',	'Price_Change',	'Temperature',	'Holiday']
df.head()

In [None]:
# Visualize the data
sns.pairplot(df, diag_kind='kde', kind='reg')
plt.show()

In [None]:
# Define the independent variables (X) and the dependent variable (y)
X = df[[	'Newspaper_Ads',	'Search_Ads', 'Social_Ads',	'Price_Change',	'Temperature',	'Holiday']]
y = df['Revenue']

In [None]:
from sklearn.preprocessing import MinMaxScaler
# Initialize the Min-Max Scaler
scaler = MinMaxScaler()

# Apply Min-Max Scaling
X_scaled = scaler.fit_transform(X)

# Create a DataFrame with scaled data
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)


In [None]:
# Create a correlation matrix
corr = X_scaled.corr()

# Plot a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm', linewidths=0.5, fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

def calc_vif(X):

    # Calculating VIF
    vif = pd.DataFrame()
    vif["variables"] = X_scaled.columns
    vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

    return(vif)


f = calc_vif(X_scaled).sort_values(by = 'VIF', ascending = False)
f

In [None]:
X_scaled.drop(['Search_Ads'], axis = 1, inplace = True)

In [None]:
# Add a constant to the independent variables (for the intercept)
X_scaled = sm.add_constant(X_scaled)

X_scaled.head()

In [None]:
# Fit the OLS model
model = sm.OLS(y, X_scaled).fit()

# Display the summary of the model
print(model.summary())

In [None]:
predicted_revenue = model.predict(X_scaled)
print("Predicted Revenue:")
predicted_revenue.head()

In [None]:
# Get the coefficients (excluding the constant)
coefficients = model.params[1:]
print('coefficients: ', coefficients)

# Calculate the contribution of each feature
contributions = X_scaled.iloc[:, 1:].multiply(coefficients, axis=1)

# Add a column for the baseline (intercept)
contributions['Baseline'] = model.params['const']

contributions= contributions[['Baseline'	,'Price_Change','Temperature',	'Newspaper_Ads',		'Social_Ads','Holiday']]
contributions.index = range(1, len(contributions) + 1)
contributions.head()

In [None]:
result= pd.DataFrame()
result['Predicted Revenue']= predicted_revenue
result['Revenue']= y
result['Error'] = result['Predicted Revenue'] - result['Revenue']
result['Error %'] = 100*result['Error']/y
result.index = range(1, len(result) + 1)
result.head()

In [None]:
plt.figure(figsize = (8, 6))
plt.plot(result.index,result['Predicted Revenue'], label = 'Predicted Revenue')

plt.plot(result.index,result['Revenue'], label = 'Revenue')
plt.legend()
plt.xlabel("Week")

plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.show()

In [None]:
plt.bar(result.index,result['Error %'], color = 'r')
plt.xlabel("Week")
plt.ylabel("Error %")

plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.show()

In [None]:
# Create a stack plot to show the contribution of each feature

colors = sns.color_palette("Set2", len(contributions.columns) - 1)

plt.figure(figsize=(12,8))
plt.stackplot(contributions.index, contributions.T, labels=contributions.columns, colors=colors, alpha=0.7)
plt.plot(result.index, result['Revenue'] , c = 'k' ,label = 'Revenue')
#plt.plot(Error.index, Error['Predicted Revenue'] ,c = 'b' ,label = 'Predicted Revenue')
plt.xlabel("Time")
plt.ylabel("Revenue")
plt.title("Key Driver Stack Plot for Revenue")
plt.legend(loc="upper left")
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.show()