In [5]:
%matplotlib inline

# Support
import numpy as np
# Data
from sklearn.datasets import make_classification

# Sampling
from sklearn.cross_validation import train_test_split, StratifiedKFold

# Models
# Linear
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.svm import SVC
#from sklearn.naive_bayes import GaussianNB
# Non-Linear
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

# Evaluation
from sklearn.metrics import precision_score, recall_score

# Plotting
import matplotlib.pyplot as plt

In [6]:
X, y = make_classification(n_samples=10000, n_features=10, n_classes=2, n_clusters_per_class=1)

In [17]:
def init(names = names):
    recalls = {}
    precisions = {}
    for name in names:
        recalls[name] = []
        precisions[name] = []
        
    return recalls, precisions

In [35]:
costs = np.arange(1, 100, 5)

names = [
    "Logistic",
    "Perceptron",
    "Linear SVC",
    "Decision Tree",
    "Random Forest",
    "Non-linear SVC"
]

estimators = [
    LogisticRegression,
    Perceptron,
    SVC,
    DecisionTreeClassifier,
    RandomForestClassifier,
    AdaBoostClassifier
]

sss = StratifiedKFold(y = y, n_folds = 5)

In [36]:
def cost_sensitive(estimators, names, costs):
    recalls, precisions = init(names)
    
    for name, estimator in zip(names,estimators):
        for cost in costs:
            pres = []
            recs = []
            model = estimator()
            if hasattr(model, 'class_weight'):
                if name == 'Linear SVC':
                    model.kernel = 'linear'
                model.class_weight = class_weight={1:cost}
                for train_index, test_index in sss:
                    X_train, y_train = X[train], y[train]
                    X_test, y_test = X[test], y[test]
                    model.fit(X_train, y_train)
                    pres.append(precision_score(y_test, model.predict(X_test)))
                    recs.append(recall_score(y_test, model.predict(X_test)))
                precisions[name] = pres
                recalls[name] = recs
            else:
                print("It doesn't support cost-sensitive learning")
    return precisions, recalls

In [None]:
precisions, recalls = cost_sensitive(estimators, names, costs)