In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn as sk
import random  
from sklearn.mixture import GaussianMixture    
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, mean_squared_error, confusion_matrix
from sklearn.preprocessing import (LabelEncoder, FunctionTransformer, StandardScaler, 
                                   MinMaxScaler, RobustScaler, OneHotEncoder)
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA, FastICA
from sklearn.random_projection import GaussianRandomProjection
from sklearn.model_selection import (train_test_split, learning_curve, GridSearchCV, 
                                     LeaveOneOut)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers 
from tensorflow.keras.initializers import HeNormal
from scipy import stats
import time


Preproccesing

In [239]:

marketing_df=pd.read_csv("marketing_campaign.csv",sep="\t") 
random.seed(42)

In [240]:
#Marketing Campaign 
# Creating are "target class"
marketing_df['AcceptedAny'] = (marketing_df[['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5',"Response"]].sum(axis=1) > 0).astype(int)

# One hot encoding marital status
relationship_dict = {
    'Single': 0,
    'Together': 1,
    'Married': 1,
    'Divorced':0,
    'Widow': 0,
    'Alone': 0,
    'Absurd': 0,
    'YOLO': 0
}

marketing_df['Marital_Status'] = marketing_df['Marital_Status'].map(relationship_dict)


# Target Encoding for Education 
education_mapping = {
    'Basic': 0, 
    'Graduation': 1, 
    "2n Cycle":2,
    "Master":2,
    "Phd":3}


# transforming thet amount to being what is the distribution of their porchases money wise
marketing_df['Education'] = marketing_df['Education'].map(education_mapping)


marketing_df["Kidhome"]=marketing_df["Kidhome"]+marketing_df['Teenhome']


# Creating the 'birth_eras' column based on the bins
marketing_df['birth_eras'] =  2014 - marketing_df['Year_Birth']




marketing_df['NumAllPurchases'] = marketing_df['NumWebPurchases']+marketing_df['NumCatalogPurchases']+marketing_df['NumStorePurchases']



In [241]:


#Marketing Removal Of Uneeded 
marketing_df.drop(columns=["ID",'NumDealsPurchases', 'NumWebPurchases',
       'NumCatalogPurchases', 'NumStorePurchases', "Year_Birth","Teenhome","Recency","Dt_Customer",'AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5','Response',"Z_CostContact","Z_Revenue"],axis =1,inplace=True) 
marketing_df.dropna(inplace=True)
 




In [None]:
columns_to_check = ["Income"]

# Calculate Z-scores only for the selected columns
z_scores = np.abs(stats.zscore(marketing_df[columns_to_check]))


threshold = 3

# Keep only rows where the Z-scores for the selected columns are below the threshold
marketing_df = marketing_df[(z_scores < threshold).all(axis=1)]


In [243]:
x_marketing= marketing_df.drop(columns=['AcceptedAny'])
y_marketing=marketing_df['AcceptedAny']


In [None]:
# Creates the Scaling Preproccesor for X Values
standard_categories = ['bpm']

percentage_categories = [ 'danceability_%', 'energy_%', 'acousticness_%', 
                      'instrumentalness_%', 'liveness_%']

# Define the transformers

def divide_by_100(X):
    return X / 100 

scaling_pipeline = Pipeline(steps=[
    ('standard_scaling', RobustScaler()),  
    ('min_max_scaling', MinMaxScaler())     
])

preprocessor_spotify = ColumnTransformer(
    transformers=[
        ('standard', scaling_pipeline, standard_categories), 
        ('percentage', scaling_pipeline, percentage_categories), 
         
    ], 
    remainder='passthrough', 
    force_int_remainder_cols=False  
)

# Scaling categories (Based on Training/Testing)
standard_categories = ['Education', 'Income', 'Kidhome', 'MntWines',
       'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds', 'NumAllPurchases', 'NumWebVisitsMonth',"birth_eras"
       ]  

preprocessor_marketing = ColumnTransformer(
    transformers=[
        ('standard', scaling_pipeline, standard_categories),   
           
    ], 
    remainder='passthrough', 
    force_int_remainder_cols=False
)



Base Neural Network

In [245]:

def create_marketing_model(n_features=25, activation_function="relu", learning_rate=.0001, num_neurons=128, layers=5):
    model = Sequential()

    
    model.add(Dense(num_neurons, activation=activation_function, input_shape=(n_features,), kernel_initializer=HeNormal())) 

    for a in range(layers - 1): 
        # Prevent num_neurons from becoming too small (<= 1)
        if num_neurons > 8:
            num_neurons = max(num_neurons // 2, 2)  # Ensure neurons don't go below 2
        model.add(Dense(num_neurons, activation=activation_function, kernel_initializer=HeNormal()))  # Apply He Normal
        
    model.add(Dense(1, activation='sigmoid', kernel_initializer=HeNormal()))  # Sigmoid output layer

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, 
                  loss='binary_crossentropy',  
                  metrics=["accuracy","precision","recall"])

    return model


In [None]:

X_train, X_test, y_train, y_test = train_test_split(x_marketing, y_marketing, test_size=0.2, random_state=42)


X_train = preprocessor_marketing.fit_transform(X_train)
X_test = preprocessor_marketing.transform(X_test)




marketing_model_swish = create_marketing_model(n_features=len(X_train[0]),activation_function="swish",num_neurons=256)



marketing_model_swish.fit(X_train, y_train, epochs=200, batch_size=64, 
                                            verbose=0) 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<keras.src.callbacks.history.History at 0x18794881b40>

In [247]:
y_pred=marketing_model_swish.predict(X_test) 
y_pred = (y_pred > 0.5).astype(int)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [248]:
base_confusion=confusion_matrix(y_test, y_pred)   
base_confusion

array([[229,  19],
       [ 46,  53]])

Neural Network With Dimension reduction

In [None]:
# Function to create a pipeline and get components
def get_components(preproccessor,dim_reduction_method, X):
    # Apply preprocessing and dimensionality reduction
    pipeline = Pipeline(steps=[
        ('preprocessor', preproccessor),  
        ('dim_reduction', dim_reduction_method)  
    ])
    

    pipeline.fit(X)
    

    transformed_data = pipeline.transform(X)
    return transformed_data 

 
pca = PCA(n_components=8,random_state=42)  
rp = GaussianRandomProjection(n_components=11,random_state=42)  
ica = FastICA(n_components=4,random_state=42) 

transformed_pca_marketing = get_components(preprocessor_marketing,pca, x_marketing)
transformed_rp_marketing = get_components(preprocessor_marketing,rp, x_marketing)
transformed_ica_marketing = get_components(preprocessor_marketing,ica, x_marketing) 

In [None]:
#PCA
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(transformed_pca_marketing, y_marketing, test_size=0.2, random_state=42)






# Create models for each activation function

marketing_model_swish_pca = create_marketing_model(n_features=len(X_train[0]),activation_function="swish",num_neurons=256)



# Train models with early stopping

marketing_model_swish_pca.fit(X_train, y_train, epochs=200, batch_size=64, 
                                            verbose=0)  

y_pred=marketing_model_swish_pca.predict(X_test) 
y_pred = (y_pred > 0.5).astype(int) 
pca_confusion=confusion_matrix(y_test, y_pred)   
pca_confusion

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


array([[230,  18],
       [ 53,  46]])

In [251]:
#ICA
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(transformed_ica_marketing, y_marketing, test_size=0.2, random_state=42)




# Create models for each activation function

marketing_model_swish_ica = create_marketing_model(n_features=len(X_train[0]),activation_function="swish",num_neurons=256)



# Train models with early stopping

marketing_model_swish_ica.fit(X_train, y_train, epochs=200, batch_size=64, 
                                            verbose=0)  

y_pred=marketing_model_swish_ica.predict(X_test) 
y_pred = (y_pred > 0.5).astype(int) 
ica_confusion=confusion_matrix(y_test, y_pred)   
ica_confusion

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


array([[229,  19],
       [ 60,  39]])

In [252]:
#rp
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(transformed_rp_marketing, y_marketing, test_size=0.2, random_state=42)


# Create models for each activation function

marketing_model_swish_rp = create_marketing_model(n_features=len(X_train[0]),activation_function="swish",num_neurons=256)



# Train models with early stopping

marketing_model_swish_rp.fit(X_train, y_train, epochs=200, batch_size=64, 
                                            verbose=0)  

y_pred=marketing_model_swish_rp.predict(X_test) 
y_pred = (y_pred > 0.5).astype(int) 
rp_confusion=confusion_matrix(y_test, y_pred)   
rp_confusion

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


array([[231,  17],
       [ 57,  42]])

adding clusters

In [None]:
#getting cluster labels 

pipeline_kmeans_marketing = Pipeline([
    ('preprocessor', preprocessor_marketing),  
    ('kmeans', KMeans(n_clusters=3,init='k-means++',max_iter=200, n_init=50,random_state=42,algorithm="elkan"))  
]) 

pipeline_gmm_marketing = Pipeline([
    ('preprocessor', preprocessor_marketing),  
    ('gmm', GaussianMixture(n_components=3,init_params='kmeans',max_iter=200,n_init=50,random_state=42))  
])


In [254]:
pipeline_kmeans_marketing.fit(x_marketing) 
labels_kmeans=pipeline_kmeans_marketing.predict(x_marketing)


pipeline_gmm_marketing.fit(x_marketing) 
labels_gmm=pipeline_gmm_marketing.predict(x_marketing)


In [255]:
np.unique(labels_kmeans,return_counts=True)

(array([0, 1, 2], dtype=int32), array([610, 711, 410]))

In [256]:
np.unique(labels_gmm,return_counts=True)

(array([0, 1, 2]), array([570, 610, 551]))

In [257]:
labels_kmeans = labels_kmeans.reshape(-1)  
labels_gmm = labels_gmm.reshape(-1) 
 

 # Convert numpy array to pandas Series
df_kmeans = pd.Series(labels_kmeans)
df_gmm = pd.Series(labels_gmm)

# One-hot encode using get_dummies
one_hot_kmeans_df = pd.get_dummies(df_kmeans, prefix='cluster').astype(int)
one_hot_gmm_df = pd.get_dummies(df_gmm, prefix='cluster').astype(int)
  

print(marketing_df.shape)
print(one_hot_gmm_df.shape)

marketing_df_kmeans = pd.concat([x_marketing.reset_index(drop=True), one_hot_kmeans_df], axis=1)

marketing_df_gmm = pd.concat([x_marketing.reset_index(drop=True), one_hot_gmm_df], axis=1)

(1731, 15)
(1731, 3)


In [None]:
#gmm

X_train, X_test, y_train, y_test = train_test_split(marketing_df_gmm, y_marketing, test_size=0.2, random_state=42)

X_train = preprocessor_marketing.fit_transform(X_train)
X_test = preprocessor_marketing.transform(X_test)


marketing_model_swish_gmm = create_marketing_model(n_features=len(X_train[0]),activation_function="swish",num_neurons=256)





marketing_model_swish_gmm.fit(X_train, y_train, epochs=200, batch_size=64, 
                                            verbose=0)  

y_pred=marketing_model_swish_gmm.predict(X_test) 
y_pred = (y_pred > 0.5).astype(int) 
gmm_confusion=confusion_matrix(y_test, y_pred)   
gmm_confusion

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


array([[230,  18],
       [ 53,  46]])

In [259]:
#kmeans
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(marketing_df_kmeans, y_marketing, test_size=0.2, random_state=42)

X_train = preprocessor_marketing.fit_transform(X_train)
X_test = preprocessor_marketing.transform(X_test)

# Create models for each activation function

marketing_model_swish_kmeans = create_marketing_model(n_features=len(X_train[0]),activation_function="swish",num_neurons=256)



# Train models with early stopping

marketing_model_swish_kmeans.fit(X_train, y_train, epochs=200, batch_size=64, 
                                            verbose=0)  

y_pred=marketing_model_swish_kmeans.predict(X_test) 
y_pred = (y_pred > 0.5).astype(int) 
kmeans_confusion=confusion_matrix(y_test, y_pred)   
kmeans_confusion

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


array([[236,  12],
       [ 55,  44]])