#### 0.Data

In [34]:
import pandas as pd
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
import numpy as np
import itertools

In [2]:
coupon = pd.read_excel("http://byungwan.com/class/Coupon_Assignment2.xlsx")
coupon.head()

Unnamed: 0,CID,Gender,Age,CClass,Discount,Visits,Email,SMS,MStatus,Purchases,Sales
0,1,Male,25,2,Coupon Not Used,1,Opt-in,Opt-in,Married,1,0.505
1,2,Female,17,2,Coupon Not Used,1,Opt-in,Opt-in,Married,1,0.33
2,3,Female,38,2,Coupon Not Used,3,Opt-in,Opt-in,Married,1,0.255
3,4,Female,36,2,Coupon Not Used,6,Opt-in,Opt-in,Single,3,1.8762
4,5,Female,37,2,Coupon Not Used,7,Opt-in,Opt-in,Single,5,6.094


In [3]:
coupon = pd.get_dummies(coupon, columns=['Gender','Discount','Email','SMS','MStatus'], drop_first=True, dtype=int)

In [4]:
coupon.head()

Unnamed: 0,CID,Age,CClass,Visits,Purchases,Sales,Gender_Male,Discount_Coupon Used,Email_Opt-out,SMS_Opt-out,MStatus_Single
0,1,25,2,1,1,0.505,1,0,0,0,0
1,2,17,2,1,1,0.33,0,0,0,0,0
2,3,38,2,3,1,0.255,0,0,0,0,0
3,4,36,2,6,3,1.8762,0,0,0,0,1
4,5,37,2,7,5,6.094,0,0,0,0,1


In [5]:
coupon['Discount_Coupon Used'].value_counts()

Discount_Coupon Used
0    927
1     73
Name: count, dtype: int64

#### 1. Balance Data (Downsampling)

In [6]:
coupon0 = coupon[coupon['Discount_Coupon Used']==0]
coupon1 = coupon[coupon['Discount_Coupon Used']==1]

In [7]:
coupon0_downsampled = resample(coupon0, replace=False, n_samples=73, random_state=0)

In [8]:
coupon_downsampled = pd.concat([coupon0_downsampled, coupon1], axis=0)
coupon_downsampled['Discount_Coupon Used'].value_counts()

Discount_Coupon Used
0    73
1    73
Name: count, dtype: int64

In [9]:
coupon_downsampled.head()

Unnamed: 0,CID,Age,CClass,Visits,Purchases,Sales,Gender_Male,Discount_Coupon Used,Email_Opt-out,SMS_Opt-out,MStatus_Single
339,340,24,2,50,11,5.0964,0,0,0,0,0
386,387,40,2,13,9,6.5546,0,0,0,0,0
730,731,31,2,2,1,0.4,1,0,0,0,0
984,985,24,1,2,0,0.0,1,0,0,0,0
886,887,25,1,0,0,0.0,0,0,0,0,0


In [10]:
coupon_downsampled.info()

<class 'pandas.core.frame.DataFrame'>
Index: 146 entries, 339 to 970
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   CID                   146 non-null    int64  
 1   Age                   146 non-null    int64  
 2   CClass                146 non-null    int64  
 3   Visits                146 non-null    int64  
 4   Purchases             146 non-null    int64  
 5   Sales                 146 non-null    float64
 6   Gender_Male           146 non-null    int64  
 7   Discount_Coupon Used  146 non-null    int64  
 8   Email_Opt-out         146 non-null    int64  
 9   SMS_Opt-out           146 non-null    int64  
 10  MStatus_Single        146 non-null    int64  
dtypes: float64(1), int64(10)
memory usage: 13.7 KB


#### 2. Preprocessing

In [11]:
y = coupon_downsampled.iloc[:, 7]
x = coupon_downsampled.iloc[:, [1,2,3,4,5,6,8,9,10]]

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

#### 3. Scaling

In [13]:
scaler = StandardScaler()
scaler_model = scaler.fit(x)
scaled_x_train = scaler_model.transform(x_train)
scaled_x_test = scaler_model.transform(x_test)

#### 4. Neural Networks (Downsampling)

In [None]:
# 1. MLP with 1 hidden layer, 1 node
mlp = MLPClassifier(activation='relu', hidden_layer_sizes=(1), max_iter=2000, random_state=0)

In [16]:
mlp_model = mlp.fit(scaled_x_train, y_train)

In [17]:
y_pred = mlp_model.predict(scaled_x_test)

In [19]:
metrics.accuracy_score(y_test, y_pred)

0.6136363636363636

In [47]:
# 1.1 MLP with 1 hidden layer, n nodes
nums = list(range(1,11))

In [49]:
for n in nums:
    mlp_n = MLPClassifier(activation='relu', hidden_layer_sizes=(n), max_iter=2000, random_state=0)
    scores = cross_val_score(mlp_n, scaler_x, y, cv=5)
    print(f'Hidden layer nodes: {n} , Cross-validated accuracy: {np.mean(scores):.4f}')

Hidden layer nodes: 1 , Cross-validated accuracy: 0.6984
Hidden layer nodes: 2 , Cross-validated accuracy: 0.6782
Hidden layer nodes: 3 , Cross-validated accuracy: 0.6779
Hidden layer nodes: 4 , Cross-validated accuracy: 0.7117
Hidden layer nodes: 5 , Cross-validated accuracy: 0.7257
Hidden layer nodes: 6 , Cross-validated accuracy: 0.7193
Hidden layer nodes: 7 , Cross-validated accuracy: 0.7600
Hidden layer nodes: 8 , Cross-validated accuracy: 0.6986
Hidden layer nodes: 9 , Cross-validated accuracy: 0.7395
Hidden layer nodes: 10 , Cross-validated accuracy: 0.7393


In [25]:
# 2. MLP with 3 hidden layer
a = b = c= range(1,4)

In [29]:
abc = list(itertools.product(a, b, c))
print(abc)

[(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 2, 1), (1, 2, 2), (1, 2, 3), (1, 3, 1), (1, 3, 2), (1, 3, 3), (2, 1, 1), (2, 1, 2), (2, 1, 3), (2, 2, 1), (2, 2, 2), (2, 2, 3), (2, 3, 1), (2, 3, 2), (2, 3, 3), (3, 1, 1), (3, 1, 2), (3, 1, 3), (3, 2, 1), (3, 2, 2), (3, 2, 3), (3, 3, 1), (3, 3, 2), (3, 3, 3)]


In [31]:
scaled_x = scaler_model.transform(x)

In [35]:
for n in abc:
    mlp2 = MLPClassifier(activation='relu', hidden_layer_sizes=n, max_iter=2000, random_state=0)
    scores = cross_val_score(mlp2, scaled_x, y, cv=5)
    mean_score = np.mean(scores)
    print(f'Hidden layers: {n}, Cross-validated accuracy: {mean_score:.4f}')

Hidden layers: (1, 1, 1), Cross-validated accuracy: 0.5000
Hidden layers: (1, 1, 2), Cross-validated accuracy: 0.5483
Hidden layers: (1, 1, 3), Cross-validated accuracy: 0.5345
Hidden layers: (1, 2, 1), Cross-validated accuracy: 0.5000
Hidden layers: (1, 2, 2), Cross-validated accuracy: 0.4940
Hidden layers: (1, 2, 3), Cross-validated accuracy: 0.4862
Hidden layers: (1, 3, 1), Cross-validated accuracy: 0.5000
Hidden layers: (1, 3, 2), Cross-validated accuracy: 0.6497
Hidden layers: (1, 3, 3), Cross-validated accuracy: 0.4529
Hidden layers: (2, 1, 1), Cross-validated accuracy: 0.6434
Hidden layers: (2, 1, 2), Cross-validated accuracy: 0.6713
Hidden layers: (2, 1, 3), Cross-validated accuracy: 0.5890
Hidden layers: (2, 2, 1), Cross-validated accuracy: 0.5207
Hidden layers: (2, 2, 2), Cross-validated accuracy: 0.7053
Hidden layers: (2, 2, 3), Cross-validated accuracy: 0.6784
Hidden layers: (2, 3, 1), Cross-validated accuracy: 0.6920




Hidden layers: (2, 3, 2), Cross-validated accuracy: 0.6846
Hidden layers: (2, 3, 3), Cross-validated accuracy: 0.6161
Hidden layers: (3, 1, 1), Cross-validated accuracy: 0.5000
Hidden layers: (3, 1, 2), Cross-validated accuracy: 0.6306
Hidden layers: (3, 1, 3), Cross-validated accuracy: 0.6986
Hidden layers: (3, 2, 1), Cross-validated accuracy: 0.6841




Hidden layers: (3, 2, 2), Cross-validated accuracy: 0.7322
Hidden layers: (3, 2, 3), Cross-validated accuracy: 0.5897
Hidden layers: (3, 3, 1), Cross-validated accuracy: 0.7048
Hidden layers: (3, 3, 2), Cross-validated accuracy: 0.6090
Hidden layers: (3, 3, 3), Cross-validated accuracy: 0.5623


--------------------------

#### 1. Balance Data (Upsampling)

In [37]:
coupon['Discount_Coupon Used'].value_counts()

Discount_Coupon Used
0    927
1     73
Name: count, dtype: int64

In [38]:
coupon1_upsampled = resample(coupon1, replace=True, n_samples=927, random_state=0)

In [39]:
coupon_upsampled = pd.concat([coupon0, coupon1_upsampled], axis=0)
coupon_upsampled['Discount_Coupon Used'].value_counts()

Discount_Coupon Used
0    927
1    927
Name: count, dtype: int64

#### 2. Preprocessing

In [40]:
y_up = coupon_upsampled.iloc[:, 7]
x_up = coupon_upsampled.iloc[:, [1,2,3,4,5,6,8,9,10]]

In [43]:
x_up_train, x_up_test, y_up_train, y_up_test = train_test_split(x_up, y_up, test_size=0.3, random_state=0)

#### 3. Scaling

In [44]:
scaler = StandardScaler()
scaler_model = scaler.fit(x)
scaler_x_up = scaler_model.transform(x_up)
scaler_x_up_train = scaler_model.transform(x_up_train)
scaler_x_up_test = scaler_model.transform(x_up_test)

#### 4. Neural Networks (Upsampling)

In [46]:
### 1. MLP with 1 hidden layer, 1 node
mlp_up = MLPClassifier(activation='relu', hidden_layer_sizes=(1), max_iter=2000, random_state=0)

In [51]:
mlp_up_model = mlp_up.fit(scaler_x_up_train, y_up_train)
y_up_pred = mlp_up_model.predict(scaler_x_up_test)
metrics.accuracy_score(y_up_test, y_up_pred)

0.748653500897666

In [52]:
### 1.1 MLP with 1 hidden layer, n nodes
nums = list(range(1,11))
for n in nums:
    mlp_n = MLPClassifier(activation='relu', hidden_layer_sizes=(n), max_iter=2000, random_state=0)
    scores = cross_val_score(mlp_n, scaler_x_up, y_up, cv=5)
    print(f'Hidden layer nodes: {n}, Cross-validated accuracy: {np.mean(scores):.4f}')

Hidden layer nodes: 1, Cross-validated accuracy: 0.7741
Hidden layer nodes: 2, Cross-validated accuracy: 0.7697
Hidden layer nodes: 3, Cross-validated accuracy: 0.7902
Hidden layer nodes: 4, Cross-validated accuracy: 0.8134
Hidden layer nodes: 5, Cross-validated accuracy: 0.8339
Hidden layer nodes: 6, Cross-validated accuracy: 0.8312
Hidden layer nodes: 7, Cross-validated accuracy: 0.8350
Hidden layer nodes: 8, Cross-validated accuracy: 0.8501
Hidden layer nodes: 9, Cross-validated accuracy: 0.8571
Hidden layer nodes: 10, Cross-validated accuracy: 0.8334


In [53]:
### 2. MLP with 3 hidden layer
a = b = c = range(1,4)
abc = list(itertools.product(a,b,c))

for n in abc:
    mlp2_up = MLPClassifier(activation='relu', hidden_layer_sizes=n, max_iter=2000, random_state=0)
    scores = cross_val_score(mlp2_up, scaler_x_up, y_up, cv=5)
    print(f'Hidden layers: {n}, Cross-validated accuracy: {np.mean(scores):.4f}')

Hidden layers: (1, 1, 1), Cross-validated accuracy: 0.7649
Hidden layers: (1, 1, 2), Cross-validated accuracy: 0.6629
Hidden layers: (1, 1, 3), Cross-validated accuracy: 0.6613
Hidden layers: (1, 2, 1), Cross-validated accuracy: 0.5000
Hidden layers: (1, 2, 2), Cross-validated accuracy: 0.6129
Hidden layers: (1, 2, 3), Cross-validated accuracy: 0.6937
Hidden layers: (1, 3, 1), Cross-validated accuracy: 0.5000
Hidden layers: (1, 3, 2), Cross-validated accuracy: 0.7719
Hidden layers: (1, 3, 3), Cross-validated accuracy: 0.7724
Hidden layers: (2, 1, 1), Cross-validated accuracy: 0.7735
Hidden layers: (2, 1, 2), Cross-validated accuracy: 0.7800
Hidden layers: (2, 1, 3), Cross-validated accuracy: 0.7045
Hidden layers: (2, 2, 1), Cross-validated accuracy: 0.8037
Hidden layers: (2, 2, 2), Cross-validated accuracy: 0.7794
Hidden layers: (2, 2, 3), Cross-validated accuracy: 0.6920
Hidden layers: (2, 3, 1), Cross-validated accuracy: 0.7757
Hidden layers: (2, 3, 2), Cross-validated accuracy: 0.74

### Summary
* The target variable was highly imbalanced, so I evaluated the performance of both downsampling and upsampling using 5-fold cross-validation. The results show a clear difference between the two approaches.

* With downsampling, single-hidden-layer models achieved moderate accuracy while multi-layer models showed unstable performance and in many cases dropped to 40~50%. 

* In contrast, upsampling greatly improved model stability and predictive power. The best single-layer model reached 0.857 accuracy, and the best 3-hidden-layer architecture achieved 0.8614 making upsampling the superior method for this dataset. 

* Overall, small multi-layer networks performed competitively, but the most consistent improvements came from balancing the data through upsampling.

#### Key Findings

Upsampling significantly outperformed downsampling

* Best accuracies:

- Single hidden layer: 0.8571 (upsampled) -> 9 nodes

- Three hidden layers: 0.8614 (upsampled) -> (3, 2, 2)