In [5]:
%matplotlib inline

# Support
import numpy as np
# Data
from sklearn.datasets import make_classification

# Sampling
from sklearn.cross_validation import train_test_split, StratifiedKFold

# Models
# Linear
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.svm import SVC
#from sklearn.naive_bayes import GaussianNB
# Non-Linear
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

# Evaluation
from sklearn.metrics import precision_score, recall_score

# Plotting
import matplotlib.pyplot as plt

In [51]:
X, y = make_classification(n_samples=10000, n_features=200, n_classes=2, n_clusters_per_class=1)

In [52]:
def init(names = names):
    recalls = {}
    precisions = {}
    for name in names:
        recalls[name] = []
        precisions[name] = []
        
    return recalls, precisions

In [53]:
costs = np.arange(1, 100, 5)

names = [
    "Logistic",
    "Perceptron",
    "Linear SVC",
    "Decision Tree",
    "Random Forest",
    "Non-linear SVC"
]

estimators = [
    LogisticRegression,
    Perceptron,
    SVC,
    DecisionTreeClassifier,
    RandomForestClassifier,
    SVC
]

sss = StratifiedKFold(y = y, n_folds = 5, shuffle=True, random_state=2016)

In [59]:
def cost_sensitive(estimators, names, costs):
    recalls, precisions = init(names)
    
    for name, estimator in zip(names,estimators):
        for cost in costs:
            pres = []
            recs = []
            model = estimator()
            if hasattr(model, 'class_weight'):
                if name == 'Linear SVC':
                    model.kernel = 'linear'
                model.class_weight = class_weight={1:cost}
                for train_index, test_index in sss:
                    X_train, y_train = X[train], y[train]
                    X_test, y_test = X[test], y[test]
                    model.fit(X_train, y_train)
                    pres.append(precision_score(y_test, model.predict(X_test)))
                    recs.append(recall_score(y_test, model.predict(X_test)))
                precisions[name].append(pres)
                recalls[name].append(recs)
            else:
                print("It doesn't support cost-sensitive learning")
    return precisions, recalls

In [60]:
precisions, recalls = cost_sensitive(estimators, names, costs)

KeyboardInterrupt: 

In [56]:
precisions

{'Decision Tree': [0.94899999999999995,
  0.94925373134328361,
  0.94543650793650791,
  0.95099999999999996,
  0.94317048853439678],
 'Linear SVC': [0.68611111111111112,
  0.68611111111111112,
  0.68611111111111112,
  0.68611111111111112,
  0.68611111111111112],
 'Logistic': [0.67595108695652173,
  0.67595108695652173,
  0.67595108695652173,
  0.67595108695652173,
  0.67595108695652173],
 'Non-linear SVC': [0.95854398382204242,
  0.95854398382204242,
  0.95854398382204242,
  0.95854398382204242,
  0.95854398382204242],
 'Perceptron': [0.6071863580998782,
  0.6071863580998782,
  0.6071863580998782,
  0.6071863580998782,
  0.6071863580998782],
 'Random Forest': [0.96649746192893404,
  0.97643442622950816,
  0.97553516819571862,
  0.97046843177189412,
  0.96780684104627768]}

In [57]:
recalls

{'Decision Tree': [0.94146825396825395,
  0.9464285714285714,
  0.94543650793650791,
  0.94345238095238093,
  0.93849206349206349],
 'Linear SVC': [0.98015873015873012,
  0.98015873015873012,
  0.98015873015873012,
  0.98015873015873012,
  0.98015873015873012],
 'Logistic': [0.98710317460317465,
  0.98710317460317465,
  0.98710317460317465,
  0.98710317460317465,
  0.98710317460317465],
 'Non-linear SVC': [0.94047619047619047,
  0.94047619047619047,
  0.94047619047619047,
  0.94047619047619047,
  0.94047619047619047],
 'Perceptron': [0.98908730158730163,
  0.98908730158730163,
  0.98908730158730163,
  0.98908730158730163,
  0.98908730158730163],
 'Random Forest': [0.94444444444444442,
  0.94543650793650791,
  0.94940476190476186,
  0.94543650793650791,
  0.95436507936507942]}