In [32]:
import pandas as pd
import math
import numpy as np

# Datasets

In [5]:
filename1 = 'online_shoppers_intentions'
df1 = pd.read_csv("../data/processed/"+filename1+".csv", index_col=0)
target1 = 'Revenue'

print(df1.shape)
print(df1[target1].value_counts())

(12330, 18)
0    10422
1     1908
Name: Revenue, dtype: int64


In [9]:
filename2 = 'marketing_campaign'
df2 = pd.read_csv("../data/processed/"+filename2+".csv", index_col=0)
target2 = 'Teenhome'

print(df2.shape)
print(df2[target2].value_counts())

(2240, 26)
0    1158
1    1082
Name: Teenhome, dtype: int64


In [10]:
filename3 = 'heart'
df3 = pd.read_csv("../data/processed/"+filename3+".csv", index_col=0)
target3 = 'target'

print(df3.shape)
print(df3[target3].value_counts())

(303, 14)
1    165
0    138
Name: target, dtype: int64


# Experimental results

In [57]:
st_df1 = pd.read_csv("../pred/selftraining/"+filename1+"_metrics.csv", index_col=0)
st_df2 = pd.read_csv("../pred/selftraining/"+filename2+"_metrics.csv", index_col=0)
st_df3 = pd.read_csv("../pred/selftraining/"+filename3+"_metrics.csv", index_col=0)

sb_df1 = pd.read_csv("../pred/semiboost/"+filename1+"_metrics.csv", index_col=0)
sb_df2 = pd.read_csv("../pred/semiboost/"+filename2+"_metrics.csv", index_col=0)
sb_df3 = pd.read_csv("../pred/semiboost/"+filename3+"_metrics.csv", index_col=0)

lb_df1 = pd.read_csv("../pred/labelpropagation/"+filename1+"_metrics.csv", index_col=0)
lb_df2 = pd.read_csv("../pred/labelpropagation/"+filename2+"_metrics.csv", index_col=0)
lb_df3 = pd.read_csv("../pred/labelpropagation/"+filename3+"_metrics.csv", index_col=0)

In [58]:
def StringificationResults(df):
    string = ''

    for resampling in [('noresampling','None'),('undersampling','Under'),('oversampling','Over')]:
        #string = string+"______________________________________________________________________________________ \n"
        for label_level in ['0','10','20','50','90','95']:
            f = np.round(df.loc['f1_'+label_level,resampling[0]],2)
            a = np.round(df.loc['accuracy_'+label_level,resampling[0]],2)
            auc = np.round(df.loc['auc_'+label_level,resampling[0]],2)
            t = np.round(df.loc['time_'+label_level,resampling[0]],2)
            if label_level=='95':
                string = string + "{} & "+label_level+" & "+str(f)+" & "+str(a)+" & "+str(auc)+" & "+str(t)+" \\\ \hline \n"
            elif label_level=='0':
                string = string + "\\multirow{6}{*}{"+resampling[1]+"} & "+label_level+" & "+str(f)+" & "+str(a)+" & "+str(auc)+" & "+str(t)+" \\\ \n"               
            else:
                string = string + "{} & "+label_level+" & "+str(f)+" & "+str(a)+" & "+str(auc)+" & "+str(t)+" \\\ \n"

    print(string)

In [59]:
# Self-training 

print("\\multicolumn{6}{| c | }{Online\\_Shopping\\_Intention} \\\ \\hline")
StringificationResults(st_df1)
print("\\multicolumn{6}{| c | }{Marketing\\_Campaign} \\\ \\hline")
StringificationResults(st_df2)
print("\\multicolumn{6}{| c | }{Heart} \\\ \\hline")
StringificationResults(st_df3)

\multicolumn{6}{| c | }{Online\_Shopping\_Intention} \\ \hline
\multirow{6}{*}{None} & 0 & 88.38 & 88.24 & 89.54 & 0.08 \\ 
{} & 10 & 77.44 & 84.51 & 35.58 & 0.1 \\ 
{} & 20 & 77.45 & 84.54 & 42.42 & 0.1 \\ 
{} & 50 & 77.43 & 84.48 & 61.21 & 0.1 \\ 
{} & 90 & 78.21 & 84.16 & 50.97 & 0.1 \\ 
{} & 95 & 77.46 & 84.05 & 49.3 & 0.08 \\ \hline 
\multirow{6}{*}{Under} & 0 & 87.74 & 86.64 & 92.5 & 0.67 \\ 
{} & 10 & 40.94 & 35.88 & 50.43 & 0.75 \\ 
{} & 20 & 22.95 & 24.38 & 50.33 & 0.67 \\ 
{} & 50 & 12.91 & 19.57 & 60.75 & 0.66 \\ 
{} & 90 & 26.6 & 26.76 & 56.06 & 0.52 \\ 
{} & 95 & 41.75 & 36.74 & 50.2 & 0.47 \\ \hline 
\multirow{6}{*}{Over} & 0 & 88.38 & 86.16 & 91.82 & 1.98 \\ 
{} & 10 & 77.44 & 20.28 & 42.52 & 2.77 \\ 
{} & 20 & 77.45 & 16.17 & 53.84 & 1.73 \\ 
{} & 50 & 77.43 & 15.79 & 56.2 & 1.29 \\ 
{} & 90 & 78.21 & 15.46 & 52.11 & 0.7 \\ 
{} & 95 & 77.46 & 17.98 & 30.58 & 0.64 \\ \hline 

\multicolumn{6}{| c | }{Marketing\_Campaign\} \\ \hline
\multirow{6}{*}{None} & 0 & 91.37 & 91.3

In [47]:
# SemiBoost

print("\\multicolumn{6}{| c | }{Online\\_Shopping\\_Intention} \\\ \\hline")
StringificationResults(sb_df1)
print("\\multicolumn{6}{| c | }{Marketing\\_Campaign} \\\ \\hline")
StringificationResults(sb_df2)
print("\\multicolumn{6}{| c | }{Heart} \\\ \\hline")
StringificationResults(sb_df3)

______________________________________ Online Shopping Intention
\multirow{6}{*}{None} & 0 & 89.62 & 90.19 & 92.77 & 16.84 \\ 
{} & 10 & 4.14 & 15.46 & 50.61 & 167.43 \\ 
{} & 20 & 4.14 & 15.46 & 48.19 & 186.88 \\ 
{} & 50 & 4.14 & 15.46 & 51.95 & 197.68 \\ 
{} & 90 & 4.14 & 15.46 & 48.4 & 190.14 \\ 
{} & 95 & 4.14 & 15.46 & 47.0 & 199.5 \\ \hline 
\multirow{6}{*}{Under} & 0 & 89.97 & 88.97 & 97.22 & 1.8 \\ 
{} & 10 & 4.14 & 15.46 & 46.82 & 14.19 \\ 
{} & 20 & 4.14 & 15.46 & 56.22 & 24.17 \\ 
{} & 50 & 4.14 & 15.46 & 34.18 & 19.51 \\ 
{} & 90 & 4.14 & 15.46 & 56.32 & 22.51 \\ 
{} & 95 & 4.14 & 15.46 & 54.89 & 21.52 \\ \hline 
\multirow{6}{*}{Over} & 0 & 89.62 & 89.78 & 92.88 & 111.85 \\ 
{} & 10 & 4.14 & 15.46 & 52.61 & 776.52 \\ 
{} & 20 & 4.14 & 15.46 & 49.95 & 635.3 \\ 
{} & 50 & 4.14 & 15.46 & 47.52 & 965.95 \\ 
{} & 90 & 4.14 & 15.46 & 47.27 & 963.41 \\ 
{} & 95 & 4.14 & 15.46 & 41.17 & 924.91 \\ \hline 

______________________________________ Marketing Campaign
\multirow{6}{*}{No

In [48]:
# Label Propagation

print("\\multicolumn{6}{| c | }{Online\\_Shopping\\_Intention} \\\ \\hline")
StringificationResults(lb_df1)
print("\\multicolumn{6}{| c | }{Marketing\\_Campaign} \\\ \\hline")
StringificationResults(lb_df2)
print("\\multicolumn{6}{| c | }{Heart} \\\ \\hline")
StringificationResults(lb_df3)

______________________________________ Online Shopping Intention
\multirow{6}{*}{None} & 0 & 77.45 & 84.54 & 75.45 & 9.96 \\ 
{} & 10 & 77.45 & 84.54 & 39.08 & 10.82 \\ 
{} & 20 & 77.45 & 84.54 & 51.56 & 10.36 \\ 
{} & 50 & 77.45 & 84.54 & 54.83 & 10.49 \\ 
{} & 90 & 77.45 & 84.54 & 39.99 & 14.46 \\ 
{} & 95 & 77.45 & 84.54 & 36.76 & 20.78 \\ \hline 
\multirow{6}{*}{Under} & 0 & 65.78 & 60.29 & 73.97 & 3.68 \\ 
{} & 10 & 4.14 & 15.46 & 42.73 & 3.72 \\ 
{} & 20 & 4.14 & 15.46 & 57.29 & 3.75 \\ 
{} & 50 & 4.14 & 15.46 & 62.69 & 3.97 \\ 
{} & 90 & 4.14 & 15.46 & 48.02 & 4.24 \\ 
{} & 95 & 4.14 & 15.46 & 57.02 & 6.32 \\ \hline 
\multirow{6}{*}{Over} & 0 & 77.45 & 72.97 & 79.42 & 16.12 \\ 
{} & 10 & 77.45 & 16.27 & 41.39 & 19.06 \\ 
{} & 20 & 77.45 & 15.44 & 53.22 & 16.65 \\ 
{} & 50 & 77.45 & 15.46 & 49.82 & 17.74 \\ 
{} & 90 & 77.45 & 15.46 & 60.52 & 27.49 \\ 
{} & 95 & 77.45 & 15.46 & 47.76 & 36.72 \\ \hline 

______________________________________ Marketing Campaign
\multirow{6}{*}{None