In [4]:
from numpy import mean
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [5]:
# define dataset
X, y = make_classification(n_samples=10000, n_features=2, n_redundant=0,
n_clusters_per_class=1, weights=[0.99], flip_y=0.0, random_state=1)

# Cost Sensitive Logistic regression

In [6]:
logreg = LogisticRegression(solver='lbfgs')
scores = cross_val_score(logreg, X, y,scoring='roc_auc')
print("Average cross-validation score: {:.2f}".format(scores.mean()))

Average cross-validation score: 0.94


In [7]:
logreg = LogisticRegression(solver='lbfgs',class_weight='balanced')
scores = cross_val_score(logreg, X, y,scoring='roc_auc')
print("Average cross-validation score : {:.2f}".format(scores.mean()))

Average cross-validation score : 0.95


In [8]:
logreg = LogisticRegression(solver='lbfgs')
param_grid = {
    
    'class_weight': [{0:100,1:1}, {0:10,1:1}, {0:1,1:1}, {0:1,1:10}, {0:1,1:100}],
}

#apply grid search
grid_logreg= GridSearchCV(logreg, param_grid, cv=5, n_jobs=2, scoring='roc_auc')
grid_logreg.fit(X, y)

print("Best parameters: {}".format(grid_logreg.best_params_))
print("Best Mean cross-validation score: {:.2f}".format(grid_logreg.best_score_))

Best parameters: {'class_weight': {0: 1, 1: 100}}
Best Mean cross-validation score: 0.95


In [9]:
logreg = LogisticRegression(solver='lbfgs')
param_grid = { 
    'class_weight': [{0:100,1:1}, {0:10,1:1}, {0:1,1:1}, {0:1,1:10}, {0:1,1:100}]
}
grid_logreg= GridSearchCV(logreg, param_grid, cv=5, n_jobs=2, scoring='roc_auc')
grid_logreg.fit(X, y)

 

print("Best parameters: {}".format(grid_logreg.best_params_))
print("Best Mean cross-validation score: {:.2f}".format(grid_logreg.best_score_))

Best parameters: {'class_weight': {0: 1, 1: 100}}
Best Mean cross-validation score: 0.95


# Cost Sensitive SVM

In [10]:
svc = SVC()
param_grid = {
    
    'class_weight': [{0:100,1:1}, {0:10,1:1}, {0:1,1:1}, {0:1,1:10}, {0:1,1:100}],
}

#apply grid search
grid_svc= GridSearchCV(svc, param_grid, cv=5, n_jobs=2, scoring='roc_auc')
grid_svc.fit(X, y)

print("Best parameters: {}".format(grid_svc.best_params_))
print("Best Mean cross-validation score: {:.2f}".format(grid_svc.best_score_))

Best parameters: {'class_weight': {0: 1, 1: 100}}
Best Mean cross-validation score: 0.95


# Cost Sensitive Decison Tree

In [11]:
dtree = DecisionTreeClassifier()
param_grid = {
    
    'class_weight': [{0:100,1:1}, {0:10,1:1}, {0:1,1:1}, {0:1,1:10}, {0:1,1:100}],
}

#apply grid search
grid_dtree= GridSearchCV(dtree, param_grid, cv=5, n_jobs=2, scoring='roc_auc')
grid_dtree.fit(X, y)

print("Best parameters: {}".format(grid_dtree.best_params_))
print("Best Mean cross-validation score: {:.2f}".format(grid_dtree.best_score_))

Best parameters: {'class_weight': {0: 100, 1: 1}}
Best Mean cross-validation score: 0.79


# Cost Sensitive Random Forest Classifier

In [13]:
rf = RandomForestClassifier(random_state=42)
param_grid = {
    
    'class_weight': [{0:100,1:1}, {0:10,1:1}, {0:1,1:1}, {0:1,1:10}, {0:1,1:100}],
}

#apply grid search
grid_rf= GridSearchCV(rf, param_grid, cv=5, n_jobs=2, scoring='roc_auc')
grid_rf.fit(X, y)

print("Best parameters: {}".format(grid_rf.best_params_))
print("Best Mean cross-validation score: {:.2f}".format(grid_rf.best_score_))

Best parameters: {'class_weight': {0: 1, 1: 1}}
Best Mean cross-validation score: 0.90
