# 4. Classification (Campaign response)

In [218]:
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [219]:
path = "https://raw.githubusercontent.com/ekaratnida/Automated_Tools-/keep/Mini-bads3/cust_sensitivity.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,CUST_CODE,BASKET_ID,PROD_CODE,STORE_CODE,price_sensitivity
0,CUST0000001098,2,8,1,1
1,CUST0000001392,1,17,1,0
2,CUST0000001437,1,2,1,0
3,CUST0000002218,1,8,1,0
4,CUST0000002678,1,2,1,0


In [220]:
x = df.iloc[:,1:-1]
print(x.shape)

(1860, 3)


In [221]:
y = df.iloc[:,-1:]
print(y.shape)

(1860, 1)


In [222]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
print(y_train.value_counts())
print(y_test.value_counts())

price_sensitivity
0                    1169
1                     319
dtype: int64
price_sensitivity
0                    292
1                     80
dtype: int64


## Apply negative downsampling

In [223]:
y_train_pos = y_train[y_train['price_sensitivity']==1]
y_train_neg = y_train[y_train['price_sensitivity']==0]
#print("origin ", y_train_neg)
y_train_neg = y_train_neg.sample(int(y_train_neg.shape[0] * 0.30), random_state=42)
#print("down ", y_train_neg)
y_train_new = pd.concat([y_train_pos, y_train_neg])
x_train_new = x_train.loc[y_train_new.index]

print(y_train_new.value_counts())
print(y_test.value_counts())

price_sensitivity
0                    350
1                    319
dtype: int64
price_sensitivity
0                    292
1                     80
dtype: int64


<H1> Train model

1. Logistic Regression

In [224]:
model = LogisticRegression()
train_result = model.fit(x_train_new, y_train_new)

#Evaluation
y_pred = train_result.predict(x_test)
#print(y_pred)

confusion_matrix_train = confusion_matrix(y_test, y_pred)
print(confusion_matrix_train)

TruePositive = confusion_matrix_train[1,1]
print("TP=",TruePositive)

FalseNegative = confusion_matrix_train[1,0]
print("FN=",FalseNegative)

TrueNegative = confusion_matrix_train[0,0]
print("TN=",TrueNegative)

FalsePositive = confusion_matrix_train[0,1]
print("FP=",FalsePositive)

print(classification_report(y_test, y_pred, target_names=target_names))

[[143 149]
 [ 41  39]]
TP= 39
FN= 41
TN= 143
FP= 149
                     precision    recall  f1-score   support

not_sensitivity (0)       0.78      0.49      0.60       292
    sensitivity (1)       0.21      0.49      0.29        80

           accuracy                           0.49       372
          macro avg       0.49      0.49      0.45       372
       weighted avg       0.65      0.49      0.53       372



2. Grid search

In [225]:
from sklearn.model_selection import GridSearchCV
grid={'C': [0.0001,0.001,0.01,0.1,1,10,1e2,1e3,1e6,1e9,1e12], "penalty":["l1","l2"], "solver":["newton-cg", "lbfgs", "liblinear", "sag", "saga"]}# l1 lasso l2 ridge
logreg=LogisticRegression()
model=GridSearchCV(logreg, grid, cv=11)
train_result = model.fit(x_train_new,y_train_new)
print("tuned hpyerparameters :(best parameters) ",train_result.best_params_)
print("accuracy :",train_result.best_score_)

#Evaluation
y_pred = train_result.predict(x_test)
#print(y_pred)

confusion_matrix_train = confusion_matrix(y_test, y_pred)
print(confusion_matrix_train)

TruePositive = confusion_matrix_train[1,1]
print("TP=",TruePositive)

FalseNegative = confusion_matrix_train[1,0]
print("FN=",FalseNegative)

TrueNegative = confusion_matrix_train[0,0]
print("TN=",TrueNegative)

FalsePositive = confusion_matrix_train[0,1]
print("FP=",FalsePositive)

print(classification_report(y_test, y_pred, target_names=target_names))

tuned hpyerparameters :(best parameters)  {'C': 10, 'penalty': 'l2', 'solver': 'newton-cg'}
accuracy : 0.5696224540486835
[[142 150]
 [ 40  40]]
TP= 40
FN= 40
TN= 142
FP= 150
                     precision    recall  f1-score   support

not_sensitivity (0)       0.78      0.49      0.60       292
    sensitivity (1)       0.21      0.50      0.30        80

           accuracy                           0.49       372
          macro avg       0.50      0.49      0.45       372
       weighted avg       0.66      0.49      0.53       372



Grid search for multiple algorithms

In [234]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

# Just initialize the pipeline with any estimator you like
pipe = Pipeline(steps=[('estimator', SVC())])

# Add a dict of estimator and estimator related parameters in this list
params_grid = [{
'estimator':[SVC()],
'estimator__C': [1, 10, 100, 1000],
'estimator__gamma': [0.001, 0.0001],
},
{
'estimator': [DecisionTreeClassifier()],
'estimator__max_depth': [1,2,3,4,5],
'estimator__max_features': [None, "auto", "sqrt", "log2"],
}
# {'estimator':[Any_other_estimator_you_want],
# 'estimator__valid_param_of_your_estimator':[valid_values]

 ]

model = GridSearchCV(pipe, params_grid)

train_result = model.fit(x_train_new,y_train_new)
print("tuned hpyerparameters :(best parameters) ",train_result.best_params_)
print("accuracy :",train_result.best_score_)

#Evaluation
y_pred = train_result.predict(x_test)
#print(y_pred)

confusion_matrix_train = confusion_matrix(y_test, y_pred)
print(confusion_matrix_train)

TruePositive = confusion_matrix_train[1,1]
print("TP=",TruePositive)

FalseNegative = confusion_matrix_train[1,0]
print("FN=",FalseNegative)

TrueNegative = confusion_matrix_train[0,0]
print("TN=",TrueNegative)

FalsePositive = confusion_matrix_train[0,1]
print("FP=",FalsePositive)

print(classification_report(y_test, y_pred, target_names=target_names))

tuned hpyerparameters :(best parameters)  {'estimator': DecisionTreeClassifier(max_depth=4, max_features='log2'), 'estimator__max_depth': 4, 'estimator__max_features': 'log2'}
accuracy : 0.5875434855796207
[[154 138]
 [ 45  35]]
TP= 35
FN= 45
TN= 154
FP= 138
                     precision    recall  f1-score   support

not_sensitivity (0)       0.77      0.53      0.63       292
    sensitivity (1)       0.20      0.44      0.28        80

           accuracy                           0.51       372
          macro avg       0.49      0.48      0.45       372
       weighted avg       0.65      0.51      0.55       372



# Exercise and Homework
# Dataset
https://raw.githubusercontent.com/ekaratnida/Applied-machine-learning/master/Dataset/heart.csv