In [1]:
# imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
import cleaner
import preprocesser
import trainer

Specific Accepted Campaign Models

In [2]:
# Feature Engineering:
# For our target variable, we chose to combine the promotion columns into one feature in which
# 0 represents if the customer did not accept the promotion, 1-6 represents if the customer did 
# accept the promotion on the respective campaign.

marketing_data = pd.read_csv("marketing_campaign.csv", delimiter="\t")
marketing_data = marketing_data.dropna()

marketing_data = cleaner.clean(marketing_data)

campaign_accepted = np.repeat(0, len(marketing_data))
campaign_accepted[marketing_data["Response"] == 1] = 6

for i in range(5,0,-1):
    column_name = "AcceptedCmp" + str(i)
    campaign_accepted[marketing_data[column_name] == 1] = i

marketing_data.drop(columns=["AcceptedCmp1", "AcceptedCmp2", "AcceptedCmp3", "AcceptedCmp4", "AcceptedCmp5", "Response"], inplace=True)
marketing_data["CampaignAccepted"] = campaign_accepted

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = (data[col] - _min) / (_max - _min)


In [3]:
marketing_data = preprocesser.convert_marital_status(marketing_data)
marketing_data = preprocesser.convert_education(marketing_data)

In [5]:
campaign_data = marketing_data[marketing_data["CampaignAccepted"] != 0]
feature_columns = [col for col in campaign_data.columns if col not in ["ID", "CampaignAccepted", "Dt_Customer"]]
x_data = campaign_data[feature_columns]
y_data = campaign_data["CampaignAccepted"]

train_x, test_x, train_y, test_y = train_test_split(x_data, y_data)


In [6]:
print("Specific Campaign Accepted: All Spending Values")
svc_model = SVC()
best_model = trainer.perform_cross_validation(svc_model, train_x, train_y)
svc_predictions = best_model.predict(test_x)
svc_accuracy = trainer.calculate_accuracy(svc_predictions, test_y)
print("Support Vector Machine Test Accuracy: " + str(svc_accuracy))

Specific Campaign Accepted: All Spending Values
Average Validation Score: 0.43
Support Vector Machine Test Accuracy: 0.40397350993377484


Accepted a Campaign or Not

In [7]:
accepted_campaign = marketing_data.copy(deep=True)
campaign = accepted_campaign.copy(deep=True)["CampaignAccepted"]
campaign[campaign != 0] = 1
accepted_campaign["CampaignAccepted"] = campaign

In [8]:
feature_columns = [col for col in accepted_campaign.columns if col not in ["ID", "CampaignAccepted", "Dt_Customer"]]
x_data = accepted_campaign[feature_columns]
y_data = accepted_campaign["CampaignAccepted"]

train_x, test_x, train_y, test_y = train_test_split(x_data, y_data)

print("Campaign Accepted or Not: All Spending Values")
svc_model = SVC()
best_model = trainer.perform_cross_validation(svc_model, train_x, train_y)
svc_predictions = best_model.predict(test_x)
svc_accuracy = trainer.calculate_accuracy(svc_predictions, test_y)
print("Support Vector Machine Test Accuracy: " + str(svc_accuracy))

Campaign Accepted or Not: All Spending Values
Average Validation Score: 0.76
Support Vector Machine Test Accuracy: 0.7522603978300181


In [9]:
marketing_data = preprocesser.create_spending(marketing_data)
campaign_data = marketing_data[marketing_data["CampaignAccepted"] != 0]
feature_columns = [col for col in campaign_data.columns if col not in ["ID", "CampaignAccepted", "Dt_Customer"]]
x_data = campaign_data[feature_columns]
y_data = campaign_data["CampaignAccepted"]

train_x, test_x, train_y, test_y = train_test_split(x_data, y_data)

print("Specific Campaign Accepted: Total Spending")
svc_model = SVC()
best_model = trainer.perform_cross_validation(svc_model, train_x, train_y)
svc_predictions = best_model.predict(test_x)
svc_accuracy = trainer.calculate_accuracy(svc_predictions, test_y)
print("Support Vector Machine Test Accuracy: " + str(svc_accuracy))

Specific Campaign Accepted: Total Spending
Average Validation Score: 0.39
Support Vector Machine Test Accuracy: 0.3973509933774834


In [10]:
accepted_campaign = preprocesser.create_spending(accepted_campaign)
feature_columns = [col for col in accepted_campaign.columns if col not in ["ID", "CampaignAccepted", "Dt_Customer"]]
x_data = accepted_campaign[feature_columns]
y_data = accepted_campaign["CampaignAccepted"]

train_x, test_x, train_y, test_y = train_test_split(x_data, y_data)

In [11]:
print("Campaign Accepted or Not: Total Spending")
svc_model = SVC(probability=True)
best_model = trainer.perform_cross_validation(svc_model, train_x, train_y)
svc_predictions = best_model.predict(test_x)
svc_accuracy = trainer.calculate_accuracy(svc_predictions, test_y)
print("Support Vector Machine Test Accuracy: " + str(svc_accuracy))


Campaign Accepted or Not: Total Spending
Average Validation Score: 0.75
Support Vector Machine Test Accuracy: 0.7739602169981917
