forked from myui/hivemall
-
Notifications
You must be signed in to change notification settings - Fork 0
news20 binary classification #2 (CW, AROW, SCW)
Makoto YUI edited this page Jul 2, 2014
·
8 revisions
use news20;
delete jar /home/myui/tmp/hivemall.jar;
add jar /home/myui/tmp/hivemall.jar;
source /home/myui/tmp/define-all.hive;
drop table news20b_cw_model1;
create table news20b_cw_model1 as
select
feature,
cast(voted_avg(weight) as float) as weight
from
(select
-- train_cw(addBias(features), label) as (feature, weight) -- [hivemall v0.1]
train_cw(addBias(features), label) as (feature, weight, covar) -- [hivemall v0.2 or later]
from
news20b_train_x3
) t
group by feature;
create or replace view news20b_cw_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_cw_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view news20b_cw_submit1
as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
news20b_test t JOIN news20b_cw_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_cw_submit1
where actual = predicted;
0.9655724579663731
drop table news20b_cw_model1;
drop view news20b_cw_predict1;
drop view news20b_cw_submit1;
drop table news20b_arow_model1;
create table news20b_arow_model1 as
select
feature,
cast(voted_avg(weight) as float) as weight
from
(select
-- train_arow(addBias(features),label) as (feature,weight) -- [hivemall v0.1]
train_arow(addBias(features),label) as (feature,weight,covar) -- [hivemall v0.2 or later]
from
news20b_train_x3
) t
group by feature;
create or replace view news20b_arow_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_arow_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view news20b_arow_submit1 as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
news20b_test t JOIN news20b_arow_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_arow_submit1
where actual = predicted;
0.9659727782225781
drop table news20b_arow_model1;
drop view news20b_arow_predict1;
drop view news20b_arow_submit1;
drop table news20b_scw_model1;
create table news20b_scw_model1 as
select
feature,
cast(voted_avg(weight) as float) as weight
from
(select
-- train_scw(addBias(features),label) as (feature,weight) -- [hivemall v0.1]
train_scw(addBias(features),label) as (feature,weight,covar) -- [hivemall v0.2 or later]
from
news20b_train_x3
) t
group by feature;
create or replace view news20b_scw_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_scw_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view news20b_scw_submit1 as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
news20b_test t JOIN news20b_scw_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_scw_submit1
where actual = predicted;
0.9661729383506805
drop table news20b_scw_model1;
drop view news20b_scw_predict1;
drop view news20b_scw_submit1;
drop table news20b_scw2_model1;
create table news20b_scw2_model1 as
select
feature,
cast(voted_avg(weight) as float) as weight
from
(select
-- train_scw2(addBias(features),label) as (feature,weight) -- [hivemall v0.1]
train_scw2(addBias(features),label) as (feature,weight,covar) -- [hivemall v0.2 or later]
from
news20b_train_x3
) t
group by feature;
create or replace view news20b_scw2_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_scw2_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view news20b_scw2_submit1 as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
news20b_test t JOIN news20b_scw2_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_scw2_submit1
where actual = predicted;
0.9579663730984788
drop table news20b_scw2_model1;
drop view news20b_scw2_predict1;
drop view news20b_scw2_submit1;
--
Algorithm | Accuracy |
---|---|
Perceptron | 0.9459567654123299 |
SCW2 | 0.9579663730984788 |
PA2 | 0.9597678142514011 |
PA1 | 0.9601681345076061 |
PA | 0.9603682946357086 |
CW | 0.9655724579663731 |
AROW | 0.9659727782225781 |
SCW1 | 0.9661729383506805 |
My recommendation is AROW for classification.