In [1]:
import pandas as pd
import numpy as np
from scipy.stats import mode
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score

In [2]:
def load_true_values():
    y_tests = []
    for i in range(10):
        y_test = pd.read_csv("data_test_kf_"+str(i)+".csv", encoding="utf-8")["sarc"]
        y_tests.append(y_test)
    return y_tests

In [3]:
def load_probas(alg):
    y_probas = []
    for i in range(10):
        a = pd.Series.from_csv(alg +"_y_prb_kf_"+str(i)+".csv",  header=None)
        y_probas.append(a.values)
    return y_probas

In [4]:
def load_predicts(alg):
    y_predicts = []
    for i in range(10):
        a = pd.Series.from_csv(alg +"_y_prd_kf_"+str(i)+".csv",  header=None)
        y_predicts.append(a.values)
        
    return y_predicts

In [5]:
y_tests = load_true_values()


In [6]:
y_proba_bm = load_probas("bm")
y_pr_bm = load_predicts("bm")

In [7]:
y_proba_con = load_probas("con")
y_pr_con = load_predicts("con")

In [8]:
y_proba_bag = load_probas("bag")
y_pr_bag = load_predicts("bag")

In [9]:
y_proba_nov = load_probas("nov")
y_pr_nov = load_predicts("nov")

In [10]:
y_pr_new = load_predicts("new")

In [11]:
def and_group(l):
    for i in l:
        if i == 0:
            return 0
    return 1

def or_group(l):
    for i in l:
        if i == 1:
            return 1
    return 0

In [12]:
def get_and_predictions(l):
    predictions = []
    for test in zip(*l):
        pr = pd.DataFrame(data = np.array(test)).apply(and_group).values
        predictions.append(pr)
    return predictions  

def get_or_predictions(l):
    predictions = []
    for test in zip(*l):
        pr = pd.DataFrame(data = np.array(test)).apply(or_group).values
        predictions.append(pr)
    return predictions  

In [13]:
def get_majority_predictions(l):
    predictions = []
    for test in zip(*l):
        pr = mode(test, axis=0).mode[0]
        predictions.append(pr)
    return predictions

In [33]:
def w_v(l):
    new_l = []
    for w, i in zip(weights, l):
        new_l += [i] * w
    return mode(new_l, axis=None).mode[0]

def get_weighted_voting_predictions(l):
    predictions = []
    for test in zip(*l):
        pr = pd.DataFrame(data = np.array(test)).apply(w_v).values
        predictions.append(pr)
    return predictions

In [15]:
def get_scores(y_tests, y_predicts):
    f_scores = []
    recall_scores = []
    precision_scores = []
    accuracy_scores = []
    for y_test, y_pred in zip(y_tests, y_predicts):
        f_scores.append(f1_score(y_test, y_pred))
        recall_scores.append(recall_score(y_test, y_pred))
        precision_scores.append(precision_score(y_test, y_pred))
        accuracy_scores.append(accuracy_score(y_test, y_pred))
    return f_scores, recall_scores, precision_scores, accuracy_scores

In [25]:
new_con_and = get_and_predictions([y_pr_new, y_pr_con])
new_con_or = get_or_predictions([y_pr_new, y_pr_con])
f1, recall, pr, acc = get_scores(y_tests, new_con_or)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.784467215468
avg recall: 0.873169895644
precision: 0.712465705824
accuracy: 0.760319704928


In [38]:
weights = [2, 1, 1]
con_new_nov_maj = get_majority_predictions([y_pr_new, y_pr_nov, y_pr_con])
con_new_nov_w_v = get_weighted_voting_predictions([y_pr_new, y_pr_nov,  y_pr_con])
f1, recall, pr, acc = get_scores(y_tests, con_new_nov_maj)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.786730590526
avg recall: 0.823821568216
precision: 0.753220275503
accuracy: 0.776830407375


In [39]:
f1, recall, pr, acc = get_scores(y_tests, con_new_nov_w_v)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.779619108581
avg recall: 0.77257880854
precision: 0.787204862967
accuracy: 0.781692450724


In [41]:
bag_new_nov_maj = get_majority_predictions([y_pr_new, y_pr_nov, y_pr_bag])
f1, recall, pr, acc = get_scores(y_tests, bag_new_nov_maj)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.788711841136
avg recall: 0.821450935609
precision: 0.758855180164
accuracy: 0.780075245133


In [43]:
weights = [2, 1, 1]
bag_new_nov_w_v = get_weighted_voting_predictions([y_pr_new, y_pr_nov,  y_pr_bag])
f1, recall, pr, acc = get_scores(y_tests, bag_new_nov_w_v)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.78051565122
avg recall: 0.777912354708
precision: 0.783631727115
accuracy: 0.781398551575


In [52]:
weights = [3, 1, 1, 1]
bag_new_nov_w_v = get_weighted_voting_predictions([y_pr_new, y_pr_nov,  y_pr_bag, y_pr_con])
f1, recall, pr, acc = get_scores(y_tests, bag_new_nov_w_v)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.782751571795
avg recall: 0.784924867608
precision: 0.781043915066
accuracy: 0.782282204005


In [59]:
weights = [2, 1, 1, 1]
bag_new_nov_w_v = get_weighted_voting_predictions([y_pr_new, y_pr_nov,  y_pr_bm, y_pr_con])
f1, recall, pr, acc = get_scores(y_tests, bag_new_nov_w_v)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.789002933825
avg recall: 0.827970223497
precision: 0.753930847571
accuracy: 0.778746508183


In [60]:
weights = [2, 1, 1, 1]
bag_new_nov_w_v = get_weighted_voting_predictions([y_pr_new, y_pr_nov,  y_pr_bag, y_pr_bm])
f1, recall, pr, acc = get_scores(y_tests, bag_new_nov_w_v)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.790030407184
avg recall: 0.825283730311
precision: 0.758078166179
accuracy: 0.780810970497


In [67]:
weights = [3, 1, 1, 1, 1]
bag_new_nov_w_v = get_weighted_voting_predictions([y_pr_new, y_pr_nov,  y_pr_bag, y_pr_bm, y_pr_con])
f1, recall, pr, acc = get_scores(y_tests, bag_new_nov_w_v)
print "avg f1:", np.mean(f1)
print "avg recall:", np.mean(recall)
print "precision:", np.mean(pr)
print "accuracy:", np.mean(acc)

avg f1: 0.788901396696
avg recall: 0.821965003781
precision: 0.758718590045
accuracy: 0.780220131114
