/
exploringPipelineBuild.py
37 lines (29 loc) · 1.12 KB
/
exploringPipelineBuild.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from sklearn import linear_model
import sklearn.cross_validation
import sklearn.metrics
import sklearn.ensemble
import sklearn.preprocessing
import sklearn.pipeline
import modelTools
import trainingData
byRepo = trainingData.load()
modelTools.addAliveOrDeadColumn(byRepo)
(X, colNames) = modelTools.makePredictors(byRepo)
y = modelTools.makeTarget(byRepo)
evaluateModel = modelTools.makeModelEvaluator(X, y)
logistic = linear_model.LogisticRegression()
reportLogistic = evaluateModel(learner = logistic)
scaledLogistic = sklearn.pipeline.Pipeline(
[('scaler', sklearn.preprocessing.StandardScaler()),
('logistic', linear_model.LogisticRegression())])
reportScaledLogistic = evaluateModel(scaledLogistic)
print reportScaledLogistic.classification_report()
# Cvalues = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
# results = []
# for C in Cvalues:
# scaledLogistic.set_params(logistic__C=C)
# results.append(evaluateModel(scaledLogistic))
# for c, result in zip(Cvalues, results):
# print c, ': auc=', result.auc()
reportForest = evaluateModel(learner = sklearn.ensemble.RandomForestClassifier())
print reportForest.classification_report()