Skip to content

Commit

Permalink
bugfix + added basic regression importance
Browse files Browse the repository at this point in the history
  • Loading branch information
benhamner committed Jan 21, 2015
1 parent de0f606 commit 27dc415
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
25 changes: 21 additions & 4 deletions src/importance.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ type ImportanceResults
best_scores::Vector{Float64}
end

function importances(x::Matrix{Float64}, y::Vector, opts::ClassificationModelOptions)
function importances(x::Matrix{Float64}, y::Vector, opts::SupervisedModelOptions)
num_features = size(x, 2)
num_splits = 5
imps = zeros(num_splits, num_features)
Expand All @@ -22,8 +22,8 @@ function importances(x::Matrix{Float64}, y::Vector, opts::ClassificationModelOpt
end

function single_importances(split::TrainTestSplit, opts::ClassificationModelOptions)
x_train, y_train = train_set(split)
x_test, y_test = test_set(split)
x_train, y_train = train_set_x_y(split)
x_test, y_test = test_set_x_y(split)
num_features = size(x_train, 2)
model = fit(x_train, y_train, opts)
predictions = vec(predict_probs(model, x_test)[:,2])
Expand All @@ -38,7 +38,24 @@ function single_importances(split::TrainTestSplit, opts::ClassificationModelOpti
importance, best_score
end

function importances(df::DataFrame, target_column::Symbol, opts::ClassificationModelOptions)
function single_importances(split::TrainTestSplit, opts::RegressionModelOptions)
x_train, y_train = train_set_x_y(split)
x_test, y_test = test_set_x_y(split)
num_features = size(x_train, 2)
model = fit(x_train, y_train, opts)
predictions = predict(model, x_test)
best_score = sqrt(mean((y_test-predictions).^2))
importance = zeros(num_features)
for feature=1:num_features
x_test_permuted = copy(x_test)
x_test_permuted[:,feature] = shuffle(x_test[:,feature])
predictions = predict(model, x_test_permuted)
importance[feature] = sqrt(mean((y_test-predictions).^2))-best_score
end
importance, best_score
end

function importances(df::DataFrame, target_column::Symbol, opts::SupervisedModelOptions)
y = array(df[target_column])
if typeof(opts) <: RegressionModelOptions
y *= 1.0
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ tests = [
"classification",
"common",
"decision_tree",
"importance",
"metrics",
"neural_net",
"pipeline",
Expand Down

0 comments on commit 27dc415

Please sign in to comment.